In [16]:
# Imports
import os

# Data science and vis tools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Nilearn: Python module for statistical processing of neuroimaging data.
from nilearn import datasets, plotting, image, interfaces

# NiftiMasker is for single regoin of interest
# NiftiMapsMasker is for probablistic atlas
# NiftiLabelsMasker is for discrete atlas
from nilearn.maskers import NiftiMasker, NiftiMapsMasker, NiftiLabelsMasker 

import nibabel as nib # Read and write access to common neuroimaging file formats
import nitime as nit # library for the analysis of time series in the field of neuroimaging. 

# Custom python files
from hcp_data import get_hcp_cleaned_data
import component_plotting as cp

## Get Cleaned Data

In [17]:
data = get_hcp_cleaned_data()

In [18]:
data

Unnamed: 0_level_0,Gender,Age,netmat
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
100206,M,26-30,"[[0.0, 0.61676, 9.5727, -5.4959, 0.34639, 3.00..."
100307,F,26-30,"[[0.0, -0.29664, 17.317, -9.0467, -0.28723, 1...."
100408,M,31-35,"[[0.0, 1.6486, 6.6189, -8.8877, 1.4337, 1.006,..."
100610,M,26-30,"[[0.0, -0.90275, 7.7215, -8.3907, 3.3144, 2.93..."
101006,F,31-35,"[[0.0, -0.088768, 9.4979, -10.412, 1.0646, 4.3..."
...,...,...,...
992673,F,31-35,"[[0.0, -0.11536, 7.1338, -5.5322, 0.34004, 1.6..."
992774,M,31-35,"[[0.0, 0.25353, 8.0265, -6.2072, 4.1589, 1.582..."
993675,F,26-30,"[[0.0, -1.0378, 10.709, -3.3224, -0.090704, 2...."
994273,M,26-30,"[[0.0, 0.64613, 11.471, -5.5137, 1.9357, 4.259..."


## KNN Implementation

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [8]:
X = data['netmat'].apply(lambda x : x.flatten()).to_list()
y = data['Gender'].to_list()

In [9]:
# Generate Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

In [10]:
# Knn classifier
knn10 = KNeighborsClassifier(n_neighbors = 10)

In [11]:
knn10.fit(X_train, y_train)
y_pred_2 = knn10.predict(X_test)

In [12]:
print("Accuracy with k=10", accuracy_score(y_test, y_pred_2)*100)

Accuracy with k=10 84.86055776892431


In [13]:
# Test different n
for i in range(1, 25):
    knn = KNeighborsClassifier(n_neighbors = i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print("Accuracy with k=" + str(i), accuracy_score(y_test, y_pred)*100)

Accuracy with k=1 80.87649402390437
Accuracy with k=2 83.26693227091634
Accuracy with k=3 79.6812749003984
Accuracy with k=4 85.65737051792829
Accuracy with k=5 81.27490039840637
Accuracy with k=6 86.85258964143426
Accuracy with k=7 80.0796812749004
Accuracy with k=8 82.07171314741036
Accuracy with k=9 80.47808764940238
Accuracy with k=10 84.86055776892431
Accuracy with k=11 80.47808764940238
Accuracy with k=12 85.65737051792829
Accuracy with k=13 81.27490039840637
Accuracy with k=14 84.06374501992032
Accuracy with k=15 81.67330677290838
Accuracy with k=16 83.26693227091634
Accuracy with k=17 80.0796812749004
Accuracy with k=18 84.06374501992032
Accuracy with k=19 82.47011952191235
Accuracy with k=20 84.86055776892431
Accuracy with k=21 82.07171314741036
Accuracy with k=22 84.4621513944223
Accuracy with k=23 84.06374501992032
Accuracy with k=24 82.86852589641434


**85 percent accuracy not bad but the n does not make a difference**