# Train logistic regression modifier on all subjects

In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import util

from sklearn import metrics
from sklearn import linear_model, datasets
from sklearn.model_selection import cross_val_score

# Useful function

In [44]:
def load_move_nomove_each(move_csv, nomove_csv):
    """ Load the movement dataframe and nomovement dataframe and label them
    
        Parameter:
            move_csv (String): The name of the move csv
            nomove_csv (String): The name of the nomove csv
            
        Return:
            df (pandas dataframe): The dataframe that has both data labelled
    """
    # Load feature dataframes 
    dfmove = pd.read_csv(move_csv,index_col=0)
    dfnomove = pd.read_csv(nomove_csv,index_col=0)

    # Remove non-feature cols
    dfmove.drop(['end_time', 'start_time'], axis=1, inplace=True)
    dfnomove.drop(['end_time', 'start_time'], axis=1, inplace=True)
    
    # Set label for each dataframe
    dfmove['move']=1
    dfnomove['move']=0

    # concatenate dfs
    df = pd.concat([dfmove, dfnomove])
    df.reset_index(inplace=True, drop=True)
    df.dropna(inplace=True,axis=0)
    return df

In [46]:
def calculate_test_train_each(df, fold, subj):
    """ Calculate the n-fold cross validation and also calculate the accuracy 
        on the training set. Print the confusion matrix
        
        Parameter:
            df (pandas dataframe): The data frame for both move and nomove
            
            fold: The number that we want to calculate for cross validation
            
            subj: The subject we are calculating for 
            
        Return:
            A dictioanry containing the results
            A confusion matrix
    """
    
    result={}
    # get the X and Y to feed log reg
    X = df[list(df.keys())[:-1]].values
    Y = df['move'].values
    
    # Caluculate cross validation
    logreg = linear_model.LogisticRegression(C=1e5)
    scores = cross_val_score(logreg, X, Y, cv=fold)
    result['cross_val'] = np.mean(scores) # Average accuracy
    
    # Calculate Confusion matrix and accuracy on training set
    logreg.fit(X,Y)
    y_pred = logreg.predict(X)
    
    result['training']=np.mean(Y==y_pred)
    return result, metrics.confusion_matrix(Y, y_pred)

In [53]:
# Get all subjects
all_subjects = util.get_subjects_all('C:/data2/dg')

result_all={}
for subj in all_subjects:
    move = 'neural_move_'+subj+'.csv'
    nomove = 'neural_nomove_'+subj+'.csv'
    
    df = load_move_nomove_each(move,nomove)
    result_each,metric=calculate_cross_val(df,5,subj)
    print(subj)
    print(metric)
    result_all[subj]=result_each
    

bp
[[102  40]
 [ 35 115]]
cc
[[111  24]
 [ 22 128]]
ht
[[111  28]
 [ 23 127]]
jc
[[89 40]
 [33 98]]
jp
[[ 45  17]
 [  9 106]]
wc
[[ 75  39]
 [ 30 120]]
wm
[[48 25]
 [16 93]]
zt
[[ 72  36]
 [ 28 122]]


In [54]:
result_all

{'bp': {'cross_val': 0.64061952074810047, 'training': 0.74315068493150682},
 'cc': {'cross_val': 0.69473684210526321, 'training': 0.83859649122807023},
 'ht': {'cross_val': 0.66448880822746514, 'training': 0.82352941176470584},
 'jc': {'cross_val': 0.56117134807478875, 'training': 0.71923076923076923},
 'jp': {'cross_val': 0.58158730158730154, 'training': 0.85310734463276838},
 'wc': {'cross_val': 0.5456458635703918, 'training': 0.73863636363636365},
 'wm': {'cross_val': 0.5227198627198627, 'training': 0.77472527472527475},
 'zt': {'cross_val': 0.56990950226244341, 'training': 0.75193798449612403}}