# Machine Learning models on connectivity data

In this notebook: 
- Necessary imports
- SVM model 
- Logistic Regression model
- Decision Tree model

## Imports

In [1]:
import os       # using operating system dependent functionality (folders)
import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import copy     # Can Copy and Deepcopy files so original file is untouched.
import seaborn as sn
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, '../eegyolk') # path to helper functions
import helper_functions as hf # library useful for eeg and erp data cleaning
#import initialization_functions #library to import data
import epod_helper

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn import tree

In [2]:
df = pd.read_csv('df_connectivity.csv', sep = ',')

In [3]:
df

Unnamed: 0,32,64,65,96,97,98,128,129,130,131,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,Group_AccToParents
0,0.15203,0.069506,0.028244,0.216019,0.23654,0.110768,0.146072,0.234775,0.123786,0.224846,...,0.092895,0.109223,0.09113,0.110989,0.126434,0.059135,0.116726,0.058032,0.164387,1
1,0.049602,0.027627,0.060067,0.021557,0.016116,0.090414,0.022394,0.05337,0.050649,0.114483,...,0.114692,0.096693,0.043533,0.169527,0.042905,0.028882,0.031812,0.038928,0.098577,0
2,0.380034,0.336761,0.322622,0.404456,0.434876,0.360326,0.452442,0.464439,0.064267,0.492716,...,0.339332,0.338046,0.392888,0.127249,0.395458,0.368038,0.422022,0.341902,0.32862,1
3,0.044057,0.06373,0.048361,0.123975,0.148566,0.029508,0.107992,0.155943,0.071107,0.08709,...,0.045082,0.057582,0.068238,0.159016,0.117418,0.093033,0.115574,0.086475,0.15041,1
4,0.137384,0.124695,0.082479,0.244021,0.198389,0.036115,0.171791,0.170327,0.102733,0.15837,...,0.027086,0.053197,0.029771,0.063202,0.110786,0.061249,0.121279,0.102245,0.085652,1
5,0.216787,0.13414,0.031411,0.230433,0.149331,0.042225,0.172245,0.203399,0.120494,0.208805,...,0.17482,0.091401,0.09964,0.096035,0.070288,0.056128,0.077755,0.062049,0.145211,1
6,0.174103,0.048556,0.073053,0.139326,0.125984,0.102581,0.132765,0.160105,0.137795,0.134733,...,0.036964,0.024934,0.029528,0.031277,0.101925,0.08224,0.122266,0.106955,0.114173,1
7,0.081009,0.046555,0.019688,0.105824,0.091879,0.039992,0.092289,0.126743,0.068909,0.122026,...,0.016202,0.025226,0.011075,0.084495,0.073216,0.031583,0.069524,0.055783,0.096185,1
8,0.076415,0.013631,0.024783,0.12784,0.078067,0.045642,0.067121,0.092524,0.044816,0.074556,...,0.036762,0.051219,0.011979,0.074969,0.049979,0.03449,0.030359,0.027881,0.063817,1
9,0.112211,0.052805,0.031353,0.173267,0.124587,0.049505,0.124587,0.086634,0.020627,0.066007,...,0.089934,0.067657,0.075908,0.126238,0.073432,0.040429,0.05363,0.061056,0.066832,1


## Split data

In [49]:
y = df['Group_AccToParents'].values # dependant variable
X = df.drop(['Group_AccToParents'],axis=1).values   # independant features


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [50]:
y

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 1, 0, 0], dtype=int64)

## Scale data

In [51]:
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

## SVM model

In [52]:
svm = SVC(kernel= 'linear', random_state=1, C=0.1)
svm.fit(X_train, y_train)

SVC(C=0.1, kernel='linear', random_state=1)

In [53]:
y_pred = svm.predict(X_test)
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))

Accuracy: 0.520


In [54]:
y_pred

array([0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 1], dtype=int64)

## Logistic Regression model

In [55]:
lr = LogisticRegression(solver='liblinear', random_state=0)
lr.fit(X_train, y_train)

LogisticRegression(random_state=0, solver='liblinear')

In [56]:
y_pred = lr.predict(X_test)

In [57]:
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))

Accuracy: 0.600


In [58]:
y_pred

array([0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,
       1, 0, 1], dtype=int64)

In [59]:
print(lr.coef_)

[[ 3.49832060e-01  7.41420820e-03 -9.44756171e-02  1.63716654e-01
   6.31767851e-02 -5.70631861e-02  5.02131988e-02  5.38075875e-02
   1.22148277e-02  9.98888851e-02  2.48324504e-02 -3.48248007e-02
   5.21602080e-02 -4.43008297e-02 -6.61879233e-02 -5.80973542e-02
  -1.52677065e-01 -9.72599207e-02 -1.32980958e-01 -6.85891720e-02
  -1.24796645e-01 -1.86541528e-01 -6.92349239e-02  3.08967417e-03
  -4.77276121e-02 -6.88906729e-02  6.32444659e-02 -1.09676132e-01
  -7.84676878e-03 -7.63610899e-02 -2.83555892e-02 -1.15807115e-01
  -1.65530059e-03 -2.56191505e-02 -1.21230663e-01 -4.70578122e-02
   2.74504193e-02 -9.53224174e-02 -3.90876413e-02 -7.07418359e-02
  -4.09793863e-02 -6.13790928e-02  1.24973374e-01 -9.11582696e-02
  -1.27669536e-01  6.30695055e-02  7.25998430e-02  1.38931283e-01
   7.88349192e-02 -5.61206936e-02  4.97515681e-02  2.33517032e-01
  -4.54465945e-02 -1.36022266e-01  1.20136227e-01  1.40005987e-02
  -2.84234028e-02  3.29366295e-02 -5.70904104e-02  4.47317251e-02
   7.09167

## Decision Tree model

In [60]:
dt = tree.DecisionTreeClassifier()
dt.fit(X_train, y_train)

DecisionTreeClassifier()

In [61]:
y_pred = dt.predict(X_test)

In [62]:
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))

Accuracy: 0.640


In [63]:
y_pred

array([1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 1], dtype=int64)