# Using common and diff maps predicting labels 

In [38]:
import numpy as np
import pandas as pd
import os

from algo import *

from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [39]:
# define data path
path = '/nfs/s2/userhome/yanganmin/workingdir/attention_data_complete/train_test_data'
beta_path = '/nfs/s2/userhome/yanganmin/workingdir/attention_predict/reorganized_results/common_diff_map'

In [40]:
# read sa train and test data, also test on fa
train_X_sa = np.load(os.path.join(path,'train_X_sa.npy'))
test_X_sa = np.load(os.path.join(path,'test_X_sa.npy'))
train_Y_sa = np.load(os.path.join(path,'train_Y_sa.npy'))
test_Y_sa = np.load(os.path.join(path,'test_Y_sa.npy'))
X_fa = np.load(os.path.join(path,'X_fa.npy'))
Y_fa = np.load(os.path.join(path,'Y_fa.npy'))

mask_NaN = np.load(os.path.join(path,'mask_NaN.npy'))

beta_common = np.load(os.path.join(beta_path,'common_beta_sa.npy'))
beta_diff = np.load(os.path.join(beta_path,'diff_beta_sa.npy'))

beta_common = beta_common[mask_NaN]
beta_diff = beta_diff[mask_NaN] # reduce dimention to 140000 to eliminate Nan

# Reduce dimention of features

In [41]:
# scale data
train_X_sa = scale(train_X_sa)
test_X_sa = scale(test_X_sa)
X_fa = scale(X_fa)

# reduce dimention with PCA
pca = PCA() # reduce dimentions to the number of observations
pca.fit(train_X_sa)
train_X_sa = pca.transform(train_X_sa)
test_X_sa = pca.transform(test_X_sa)
X_fa = pca.transform(X_fa)

beta_common = pca.transform(beta_common.reshape(1,-1))
beta_diff = pca.transform(beta_diff.reshape(1,-1))

# Build Logistic Regression, replace beta

In [42]:
C_best = 0.95 # obtained from last prediction
clf = LogisticRegression(penalty='l1',C=C_best,solver='liblinear')
clf.fit(train_X_sa,train_Y_sa)
LR_beta_iszero = clf.coef_[0]

In [43]:
# feature reduction 
beta_cutoff = 0.0003 # value aquired from previous analysis
mask_beta = (abs(LR_beta_iszero) > beta_cutoff)
train_X_sa = train_X_sa[:,mask_beta]
test_X_sa = test_X_sa[:,mask_beta]
X_fa = X_fa[:,mask_beta]

clf.fit(train_X_sa,train_Y_sa)

predict_Y_ori_train_sa = clf.predict(train_X_sa)
predict_Y_ori_test_sa = clf.predict(test_X_sa)
predict_Y_ori_fa = clf.predict(X_fa)

report_ori_train_sa = classification_report(train_Y_sa,predict_Y_ori_train_sa)
report_ori_test_sa = classification_report(test_Y_sa,predict_Y_ori_test_sa)
reprot_ori_fa = classification_report(Y_fa,predict_Y_ori_fa)

print('Train sa')
print(report_ori_train_sa)
print('\nTest sa')
print(report_ori_test_sa)
print('\nFa')
print(reprot_ori_fa)

Train sa
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       157
           1       1.00      1.00      1.00       157

    accuracy                           1.00       314
   macro avg       1.00      1.00      1.00       314
weighted avg       1.00      1.00      1.00       314


Test sa
              precision    recall  f1-score   support

           0       0.62      0.64      0.63        78
           1       0.63      0.60      0.61        78

    accuracy                           0.62       156
   macro avg       0.62      0.62      0.62       156
weighted avg       0.62      0.62      0.62       156


Fa
              precision    recall  f1-score   support

           0       0.53      0.52      0.52       235
           1       0.53      0.53      0.53       235

    accuracy                           0.53       470
   macro avg       0.53      0.53      0.53       470
weighted avg       0.53      0.53      0.53       47

## predict with common beta

In [44]:
clf.coef_ = beta_common[0][mask_beta]
clf.coef_ = clf.coef_.reshape(1,-1)

predict_Y_common_train_sa = clf.predict(train_X_sa)
predict_Y_common_test_sa = clf.predict(test_X_sa)
predict_Y_common_fa = clf.predict(X_fa)

report_common_train_sa = classification_report(train_Y_sa,predict_Y_common_train_sa)
report_common_test_sa = classification_report(test_Y_sa,predict_Y_common_test_sa)
report_common_fa = classification_report(Y_fa,predict_Y_common_fa)

print('Common Train sa')
print(report_common_train_sa)
print('\nCommon Test sa')
print(report_common_test_sa)
print('\nCommon fa')
print(report_common_fa)

Common Train sa
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       157
           1       1.00      0.99      0.99       157

    accuracy                           0.99       314
   macro avg       0.99      0.99      0.99       314
weighted avg       0.99      0.99      0.99       314


Common Test sa
              precision    recall  f1-score   support

           0       0.61      0.60      0.61        78
           1       0.61      0.62      0.61        78

    accuracy                           0.61       156
   macro avg       0.61      0.61      0.61       156
weighted avg       0.61      0.61      0.61       156


Common fa
              precision    recall  f1-score   support

           0       0.53      0.52      0.52       235
           1       0.53      0.53      0.53       235

    accuracy                           0.53       470
   macro avg       0.53      0.53      0.53       470
weighted avg       0.53      0.

## predict with diff beta 

In [45]:
clf.coef_ = beta_diff[0][mask_beta]
clf.coef_ = clf.coef_.reshape(1,-1)

predict_Y_diff_train_sa = clf.predict(train_X_sa)
predict_Y_diff_test_sa = clf.predict(test_X_sa)
predict_Y_diff_fa = clf.predict(X_fa)

report_diff_train_sa = classification_report(train_Y_sa,predict_Y_diff_train_sa)
report_diff_test_sa = classification_report(test_Y_sa,predict_Y_diff_test_sa)
report_diff_fa = classification_report(Y_fa,predict_Y_diff_fa)

print('Diff Train sa')
print(report_diff_train_sa)
print('\nDiff Test sa')
print(report_diff_test_sa)
print('\nDiff fa')
print(report_diff_fa)

Diff Train sa
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       157
           1       1.00      1.00      1.00       157

    accuracy                           1.00       314
   macro avg       1.00      1.00      1.00       314
weighted avg       1.00      1.00      1.00       314


Diff Test sa
              precision    recall  f1-score   support

           0       0.62      0.63      0.62        78
           1       0.62      0.62      0.62        78

    accuracy                           0.62       156
   macro avg       0.62      0.62      0.62       156
weighted avg       0.62      0.62      0.62       156


Diff fa
              precision    recall  f1-score   support

           0       0.56      0.58      0.57       235
           1       0.57      0.55      0.56       235

    accuracy                           0.57       470
   macro avg       0.57      0.57      0.57       470
weighted avg       0.57      0.57    