# using integreted beta map of FA to predict labels 

In [72]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

from algo import *

from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.model_selection import LeaveOneOut,GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

import nibabel as nib

In [73]:
# define data path
path = '/nfs/s2/userhome/yanganmin/workingdir/attention_data_complete/train_test_data'
beta_path = '/nfs/s2/userhome/yanganmin/workingdir/attention_predict/reorganized_results/common_diff_map'

In [74]:
# read fa train and test data, also test on sa
train_X_fa = np.load(os.path.join(path,'train_X_fa.npy'))
test_X_fa = np.load(os.path.join(path,'test_X_fa.npy'))
train_Y_fa = np.load(os.path.join(path,'train_Y_fa.npy'))
test_Y_fa = np.load(os.path.join(path,'test_Y_fa.npy'))
X_sa = np.load(os.path.join(path,'X_sa.npy'))
Y_sa = np.load(os.path.join(path,'Y_sa.npy'))

mask_NaN = np.load(os.path.join(path,'mask_NaN.npy'))

beta_common = np.load(os.path.join(beta_path,'common_beta_fa.npy'))
beta_diff = np.load(os.path.join(beta_path,'diff_beta_fa.npy'))

beta_common = beta_common[mask_NaN]
beta_diff = beta_diff[mask_NaN] # reduce dimention to 140000 to eliminate Nan

# Reduce dimention of features

In [75]:
# scale data
train_X_fa = scale(train_X_fa)
test_X_fa = scale(test_X_fa)
X_sa = scale(X_sa)

# reduce dimention with PCA
pca = PCA() # reduce dimentions to the number of observations
pca.fit(train_X_fa)
train_X_fa = pca.transform(train_X_fa)
test_X_fa = pca.transform(test_X_fa)
X_sa = pca.transform(X_sa)

beta_common = pca.transform(beta_common.reshape(1,-1))
beta_diff = pca.transform(beta_diff.reshape(1,-1))

# Build Logistic Regression, replace beta  

In [76]:
C_best = 0.05 # obtained from last prediction
clf = LogisticRegression(penalty='l1',C=C_best,solver='liblinear')
clf.fit(train_X_fa,train_Y_fa)
LR_beta_iszero = clf.coef_[0]

## Original Precision

In [77]:
# feature reduction 
beta_cutoff = 0.01 # value aquired from previous analysis
mask_beta = (abs(LR_beta_iszero) > beta_cutoff)
train_X_fa = train_X_fa[:,mask_beta]
test_X_fa = test_X_fa[:,mask_beta]
X_sa = X_sa[:,mask_beta]

clf.fit(train_X_fa,train_Y_fa)

predict_Y_ori_train_fa = clf.predict(train_X_fa)
predict_Y_ori_test_fa = clf.predict(test_X_fa)
predict_Y_ori_sa = clf.predict(X_sa)

report_ori_train_fa = classification_report(train_Y_fa,predict_Y_ori_train_fa)
report_ori_test_fa = classification_report(test_Y_fa,predict_Y_ori_test_fa)
reprot_ori_sa = classification_report(Y_sa,predict_Y_ori_sa)

print('Train fa')
print(report_ori_train_fa)
print('\nTest fa')
print(report_ori_test_fa)
print('\nSa')
print(reprot_ori_sa)

Train fa
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       157
           1       1.00      0.99      0.99       157

    accuracy                           0.99       314
   macro avg       0.99      0.99      0.99       314
weighted avg       0.99      0.99      0.99       314


Test fa
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        78
           1       1.00      0.95      0.97        78

    accuracy                           0.97       156
   macro avg       0.98      0.97      0.97       156
weighted avg       0.98      0.97      0.97       156


Sa
              precision    recall  f1-score   support

           0       0.64      0.68      0.66       235
           1       0.66      0.63      0.64       235

    accuracy                           0.65       470
   macro avg       0.65      0.65      0.65       470
weighted avg       0.65      0.65      0.65       47

In [63]:
print(report_ori)

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       157
           1       1.00      0.99      0.99       157

    accuracy                           0.99       314
   macro avg       0.99      0.99      0.99       314
weighted avg       0.99      0.99      0.99       314



## predict with common beta 

In [70]:
clf.coef_ = beta_common[0][mask_beta]
clf.coef_ = clf.coef_.reshape(1,-1)

predict_Y_common_train_fa = clf.predict(train_X_fa)
predict_Y_common_test_fa = clf.predict(test_X_fa)
predict_Y_common_sa = clf.predict(X_sa)

report_common_train_fa = classification_report(train_Y_fa,predict_Y_common_train_fa)
report_common_test_fa = classification_report(test_Y_fa,predict_Y_common_test_fa)
report_common_sa = classification_report(Y_sa,predict_Y_common_sa)

print('Common Train fa')
print(report_common_train_fa)
print('\nCommon Test fa')
print(report_common_test_fa)
print('\nCommon sa')
print(report_common_sa)

Common Train fa
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       157
           1       1.00      0.96      0.98       157

    accuracy                           0.98       314
   macro avg       0.98      0.98      0.98       314
weighted avg       0.98      0.98      0.98       314


Common Test fa
              precision    recall  f1-score   support

           0       0.95      0.97      0.96        78
           1       0.97      0.95      0.96        78

    accuracy                           0.96       156
   macro avg       0.96      0.96      0.96       156
weighted avg       0.96      0.96      0.96       156


Common sa
              precision    recall  f1-score   support

           0       0.63      0.67      0.65       235
           1       0.65      0.61      0.63       235

    accuracy                           0.64       470
   macro avg       0.64      0.64      0.64       470
weighted avg       0.64      0.

## predict with diff beta

In [71]:
clf.coef_ = beta_diff[0][mask_beta]
clf.coef_ = clf.coef_.reshape(1,-1)

predict_Y_diff_train_fa = clf.predict(train_X_fa)
predict_Y_diff_test_fa = clf.predict(test_X_fa)
predict_Y_diff_sa = clf.predict(X_sa)

report_diff_train_fa = classification_report(train_Y_fa,predict_Y_diff_train_fa)
report_diff_test_fa = classification_report(test_Y_fa,predict_Y_diff_test_fa)
report_diff_sa = classification_report(Y_sa,predict_Y_diff_sa)

print('Diff Train fa')
print(report_diff_train_fa)
print('\nDiff Test fa')
print(report_diff_test_fa)
print('\nDiff sa')
print(report_diff_sa)

Diff Train fa
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       157
           1       0.04      0.04      0.04       157

    accuracy                           0.02       314
   macro avg       0.02      0.02      0.02       314
weighted avg       0.02      0.02      0.02       314


Diff Test fa
              precision    recall  f1-score   support

           0       0.03      0.03      0.03        78
           1       0.05      0.05      0.05        78

    accuracy                           0.04       156
   macro avg       0.04      0.04      0.04       156
weighted avg       0.04      0.04      0.04       156


Diff sa
              precision    recall  f1-score   support

           0       0.35      0.33      0.34       235
           1       0.37      0.39      0.38       235

    accuracy                           0.36       470
   macro avg       0.36      0.36      0.36       470
weighted avg       0.36      0.36    