In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("balanced_migraine_data.csv")

In [4]:
# Typical aura with migraine        0
# Migraine without aura             1
# Familial hemiplegic migraine      2
# Typical aura without migraine     3
# Basilar-type aura                 4
# Sporadic hemiplegic migraine      5
# Other                             6

In [5]:
def changeType(x):
    if(x=="Typical aura with migraine"): return 0
    elif(x=="Migraine without aura"): return 1
    elif(x=="Familial hemiplegic migraine"): return 2
    elif(x=="Typical aura without migraine"): return 3
    elif(x=="Basilar-type aura"): return 4
    elif(x=="Sporadic hemiplegic migraine"): return 5
    else: return 6

In [6]:
df["Type"] = df["Type"].apply(changeType)

In [7]:
df.sample(10)

Unnamed: 0,Age,Duration,Frequency,Location,Character,Intensity,Nausea,Vomit,Phonophobia,Photophobia,...,Vertigo,Tinnitus,Hypoacusis,Diplopia,Defect,Ataxia,Conscience,Paresthesia,DPF,Type
1433,19,1,2,1,1,1,1,0,1,1,...,0,1,0,0,0,0,0,0,0,5
1027,17,2,2,1,1,3,1,0,1,1,...,0,0,0,0,0,0,0,0,0,1
974,41,1,5,1,1,3,1,0,1,1,...,0,0,0,0,0,0,0,0,0,1
1128,51,2,3,2,2,3,1,0,0,0,...,0,0,0,0,0,0,0,0,1,6
970,22,2,5,1,1,3,1,0,1,1,...,0,0,0,0,0,0,0,0,1,1
1077,43,2,1,1,2,3,1,0,0,0,...,0,0,0,0,0,0,0,0,1,6
938,38,1,4,1,1,3,1,0,1,1,...,0,0,0,0,0,0,0,0,0,1
1078,50,2,3,2,1,3,1,0,0,0,...,0,0,0,0,0,0,0,0,0,6
183,50,1,1,1,1,3,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
829,16,1,1,1,1,2,1,0,1,1,...,0,1,0,0,0,0,0,0,1,2


In [8]:
X = df.iloc[:, 0:]
y = df.iloc[:,-1]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [10]:
from sklearn.preprocessing import StandardScaler

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier()
param_grid = {'n_neighbors': range(1, 200)}

# Use GridSearchCV to find the best parameter
grid_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Print the best parameter
print(f"Best number of neighbors: {grid_search.best_params_['n_neighbors']}")

Best number of neighbors: 1


In [13]:
param_grid = {
    'n_neighbors': range(1, 31),
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

grid_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print(f"Best parameters: {grid_search.best_params_}")

Best parameters: {'metric': 'manhattan', 'n_neighbors': 1}


In [14]:
clf = KNeighborsClassifier(n_neighbors=1, weights="uniform")
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [15]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [16]:
accuracy_score(y_test, y_pred)

0.9826589595375722

In [17]:
precision_score(y_test, y_pred, average="macro")

0.9822166624896657

In [18]:
recall_score(y_test, y_pred, average="macro")

0.9831135272995739

In [19]:
f1_score(y_test, y_pred, average="macro")

0.9825144823747612

In [20]:
from sklearn.model_selection import cross_val_score, cross_validate
cv_results = cross_validate(clf, X, y, cv=5, scoring=['precision_macro', 'recall_macro', 'f1_macro', 'accuracy'])

# Print cross-validation results
print(f"Cross-validated Precision: {cv_results['test_precision_macro'].mean()}")
print(f"Cross-validated Recall: {cv_results['test_recall_macro'].mean()}")
print(f"Cross-validated F1 Score: {cv_results['test_f1_macro'].mean()}")
print(f"Cross-validated Accuracy: {cv_results['test_accuracy'].mean()}")

Cross-validated Precision: 0.9922073297077489
Cross-validated Recall: 0.9919183673469387
Cross-validated F1 Score: 0.9919030345153297
Cross-validated Accuracy: 0.9919008125994806
