# Machine Learning models on connectivity data

In this notebook: 
- Necessary imports
- SVM model 
- Logistic Regression model
- Decision Tree model

## Imports

In [1]:
import os       # using operating system dependent functionality (folders)
import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import copy     # Can Copy and Deepcopy files so original file is untouched.
import seaborn as sn
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, '../eegyolk') # path to helper functions
import helper_functions as hf # library useful for eeg and erp data cleaning
#import initialization_functions #library to import data
import epod_helper

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn import tree

In [2]:
df = pd.read_csv('df_connectivity.csv', sep = ',')

In [3]:
df

Unnamed: 0,32,64,65,96,97,98,128,129,130,131,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,Group_AccToParents
0,0.190203,0.086275,0.038614,0.242277,0.177405,0.117387,0.183142,0.203663,0.144748,0.185349,...,0.015446,0.025154,0.033981,0.026037,0.055825,0.027582,0.129523,0.108782,0.116284,1
1,0.019674,0.078275,0.05337,0.050021,0.084345,0.046463,0.082671,0.104646,0.123273,0.055881,...,0.172666,0.0563,0.101716,0.117832,0.106111,0.039975,0.096275,0.083926,0.065299,0


## Split data

In [4]:
y = df['Group_AccToParents'].values # dependant variable
X = df.drop(['Group_AccToParents'],axis=1).values   # independant features


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
y

array([1, 0], dtype=int64)

## Scale data

In [6]:
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

## SVM model

In [7]:
svm = SVC(kernel= 'linear', random_state=1, C=0.1)
svm.fit(X_train, y_train)

ValueError: The number of classes has to be greater than one; got 1 class

In [None]:
y_pred = svm.predict(X_test)
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))

In [8]:
y_pred

NameError: name 'y_pred' is not defined

## Logistic Regression model

In [10]:
lr = LogisticRegression(solver='liblinear', random_state=0)
lr.fit(X_train, y_train)

LogisticRegression(random_state=0, solver='liblinear')

In [11]:
y_pred = lr.predict(X_test)

In [12]:
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))

Accuracy: 0.833


In [13]:
y_pred

array([0, 0, 1, 1, 0, 1], dtype=int64)

In [18]:
print(lr.coef_)

[[-5.14953576e-03 -2.77462255e-03 -1.00069983e-02 -1.18191132e-02
  -9.94070994e-03 -1.30945375e-02 -2.58100193e-02 -1.69386666e-02
  -8.08934859e-03 -1.50886081e-02 -1.80055660e-02 -2.03021165e-02
  -4.78572162e-03 -8.53964509e-03 -6.53431313e-03 -7.96875862e-03
  -1.20840989e-02  1.53384910e-03 -4.33872643e-02 -2.19073796e-02
  -2.50808209e-02 -1.51817318e-02 -2.15998313e-02 -2.05969932e-02
  -1.80484757e-02 -1.14540060e-02 -4.06972570e-03 -1.36692870e-02
  -6.10947256e-03 -5.70386988e-03 -1.11813631e-02 -2.03130421e-02
  -1.40701783e-02  1.83243945e-02  1.25360948e-02  2.16307959e-02
   5.51880275e-02  6.53926746e-03  2.46286596e-02  4.74691494e-02
   7.68423470e-02  2.51163328e-02  4.38036359e-02  7.28593404e-02
   4.26400108e-02  4.39943164e-02  3.74946574e-02  2.62622062e-04
  -1.26153010e-02  8.32849246e-03 -4.37237176e-02  2.17883665e-02
   2.95457171e-02  3.46799062e-03  3.04534779e-02  3.44774526e-02
   4.47323045e-02  5.16134815e-02 -3.69683342e-02  3.92147799e-02
  -1.39614

## Decision Tree model

In [14]:
dt = tree.DecisionTreeClassifier()
dt.fit(X_train, y_train)

DecisionTreeClassifier()

In [15]:
y_pred = dt.predict(X_test)

In [16]:
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))

Accuracy: 0.167


In [17]:
y_pred

array([0, 1, 0, 0, 0, 0], dtype=int64)