In [92]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier


In [93]:
df = pd.read_csv("balanced_migraine_data.csv")

In [94]:
df.duplicated().sum()

734

In [95]:
df=df.drop_duplicates()

In [96]:
label_encoder = LabelEncoder()
df["Type"] = label_encoder.fit_transform(df["Type"])

In [97]:
X = df.drop(columns=['Intensity'])
y_intensity = df['Intensity']  

In [98]:
df.head()

Unnamed: 0,Age,Duration,Frequency,Location,Character,Intensity,Nausea,Vomit,Phonophobia,Photophobia,...,Vertigo,Tinnitus,Hypoacusis,Diplopia,Defect,Ataxia,Conscience,Paresthesia,DPF,Type
0,30,1,5,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,0,5
1,50,3,5,1,1,3,1,1,1,1,...,1,0,0,0,0,0,0,0,0,5
2,53,2,1,1,1,2,1,1,1,1,...,0,0,0,0,0,0,0,0,0,5
3,45,3,5,1,1,3,1,0,1,1,...,1,0,0,0,0,0,0,0,0,5
4,53,1,1,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,1,5


In [99]:
X_train_int, X_test_int, y_train_int, y_test_int = train_test_split(X, y_intensity, test_size=0.2, random_state=42)

In [100]:
scaler = StandardScaler()
X_train_int = scaler.fit_transform(X_train_int)
X_test_int = scaler.transform(X_test_int)

In [101]:
log = LogisticRegression(C=1, penalty="l1", solver="saga", random_state=42)
log.fit(X_train_int, y_train_int)
y_pred = log.predict(X_test_int)
print("Accuracy:", accuracy_score(y_test_int, y_pred))
print("Precision:", precision_score(y_test_int, y_pred, average="macro"))
print("Recall:", recall_score(y_test_int, y_pred, average="macro"))
print("F1 Score:", f1_score(y_test_int, y_pred, average="macro"))

Accuracy: 0.8010471204188482
Precision: 0.6041666666666667
Recall: 0.6116071428571428
F1 Score: 0.6075483599663583


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [102]:
# Random Forest Classifier for Intensity
rf_model_int = RandomForestClassifier(bootstrap=True, max_depth=30, min_samples_leaf=1, min_samples_split=2, n_estimators=50, random_state=42)

In [103]:
rf_model_int.fit(X_train_int, y_train_int)

In [104]:
y_pred_int = rf_model_int.predict(X_test_int)
print("Accuracy:", accuracy_score(y_test_int, y_pred_int))
print("Precision:", precision_score(y_test_int, y_pred_int, average="macro"))
print("Recall:", recall_score(y_test_int, y_pred_int, average="macro"))
print("F1 Score:", f1_score(y_test_int, y_pred_int, average="macro"))
print(classification_report(y_test_int, y_pred_int))

Accuracy: 0.7696335078534031
Precision: 0.6648858160486067
Recall: 0.6659226190476191
F1 Score: 0.6653829244702365
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       0.33      0.33      0.33         6
           2       0.49      0.50      0.49        42
           3       0.84      0.83      0.83       112

    accuracy                           0.77       191
   macro avg       0.66      0.67      0.67       191
weighted avg       0.77      0.77      0.77       191



In [105]:
# Gradient Boosting Classifier for Intensity
gb_model_int = GradientBoostingClassifier(n_estimators=100, learning_rate=0.2, max_depth=3, min_samples_leaf=1, min_samples_split=2, subsample=1.0, random_state=42)
gb_model_int.fit(X_train_int, y_train_int)
y_pred_int_gb = gb_model_int.predict(X_test_int)

print("Gradient Boosting Classifier for Intensity")
print("Accuracy:", accuracy_score(y_test_int, y_pred_int_gb))
print("Precision:", precision_score(y_test_int, y_pred_int_gb, average="macro"))
print("Recall:", recall_score(y_test_int, y_pred_int_gb, average="macro"))
print("F1 Score:", f1_score(y_test_int, y_pred_int_gb, average="macro"))
print(classification_report(y_test_int, y_pred_int_gb))

Gradient Boosting Classifier for Intensity
Accuracy: 0.8324607329842932
Precision: 0.7818813131313131
Recall: 0.8333333333333334
F1 Score: 0.8037299909392933
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       0.62      0.83      0.71         6
           2       0.61      0.64      0.63        42
           3       0.89      0.86      0.87       112

    accuracy                           0.83       191
   macro avg       0.78      0.83      0.80       191
weighted avg       0.84      0.83      0.83       191



In [106]:
knn_model_int = KNeighborsClassifier()
# Hyperparameter tuning using Grid Search
param_grid = {'n_neighbors': np.arange(1, 25)}
knn_gs_int = GridSearchCV(knn_model_int, param_grid, cv=5)
knn_gs_int.fit(X_train_int, y_train_int)

In [107]:
print("Best parameters for Intensity:", knn_gs_int.best_params_)

Best parameters for Intensity: {'n_neighbors': 14}


In [108]:
knn = KNeighborsClassifier(n_neighbors=1)

In [109]:
knn.fit(X_train_int, y_train_int)
y_pred = knn.predict(X_test_int)
print("Accuracy:", accuracy_score(y_test_int, y_pred))
print("Precision:", precision_score(y_test_int, y_pred, average="macro"))
print("Recall:", recall_score(y_test_int, y_pred, average="macro"))
print("F1 Score:", f1_score(y_test_int, y_pred, average="macro"))
print("Classification Report:\n", classification_report(y_test_int, y_pred))

Accuracy: 0.7801047120418848
Precision: 0.6694677871148459
Recall: 0.6889880952380952
F1 Score: 0.6769883969914118
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       0.29      0.33      0.31         6
           2       0.51      0.62      0.56        42
           3       0.88      0.80      0.84       112

    accuracy                           0.78       191
   macro avg       0.67      0.69      0.68       191
weighted avg       0.80      0.78      0.79       191



In [110]:
from sklearn.feature_selection import SelectKBest, f_classif

selector = SelectKBest(f_classif, k='all')
X_train_int = selector.fit_transform(X_train_int, y_train_int)
X_test_int = selector.transform(X_test_int)

  f = msb / msw


In [111]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
param_grid = {
    'C': [0.1, 1, 10, 100],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'],
    'coef0': [0, 0.1, 0.5, 1]
}

grid_search = GridSearchCV(SVC(kernel='poly'), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_int, y_train_int)

best_params = grid_search.best_params_
print("Best parameters:", best_params)


Best parameters: {'C': 1, 'coef0': 0.1, 'degree': 2, 'gamma': 'auto'}


In [112]:
from sklearn.svm import SVC
model = SVC(kernel="poly", random_state=42, C=100, coef0 = 1, degree=3, gamma="scale")
model.fit(X_train_int, y_train_int)

In [113]:
y_pred = model.predict(X_test_int)

In [114]:
print("Accuracy:", accuracy_score(y_test_int, y_pred))
print("Precision:", precision_score(y_test_int, y_pred, average="macro"))
print("Recall:", recall_score(y_test_int, y_pred, average="macro"))
print("F1 Score:", f1_score(y_test_int, y_pred, average="macro"))
print("Classification Report:\n", classification_report(y_test_int, y_pred))

Accuracy: 0.7905759162303665
Precision: 0.6911056105610561
Recall: 0.7366071428571428
F1 Score: 0.7080067360685853
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       0.33      0.50      0.40         6
           2       0.54      0.64      0.59        42
           3       0.89      0.80      0.85       112

    accuracy                           0.79       191
   macro avg       0.69      0.74      0.71       191
weighted avg       0.81      0.79      0.80       191



In [115]:
dst = DecisionTreeClassifier()
dst.fit(X_train_int, y_train_int)
y_pred = dst.predict(X_test_int)
print("Accuracy:", accuracy_score(y_test_int, y_pred))
print("Precision:", precision_score(y_test_int, y_pred, average="macro"))
print("Recall:", recall_score(y_test_int, y_pred, average="macro"))
print("F1 Score:", f1_score(y_test_int, y_pred, average="macro"))
print("Classification Report:\n", classification_report(y_test_int, y_pred))

Accuracy: 0.7643979057591623
Precision: 0.700382262996942
Recall: 0.703125
F1 Score: 0.7015720081135903
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       0.50      0.50      0.50         6
           2       0.47      0.50      0.48        42
           3       0.83      0.81      0.82       112

    accuracy                           0.76       191
   macro avg       0.70      0.70      0.70       191
weighted avg       0.77      0.76      0.77       191

