In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

In [2]:
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,Age,Duration,Frequency,Location,Character,Intensity,Nausea,Vomit,Phonophobia,Photophobia,...,Vertigo,Tinnitus,Hypoacusis,Diplopia,Defect,Ataxia,Conscience,Paresthesia,DPF,Type
0,30,1,5,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,0,Typical aura with migraine
1,50,3,5,1,1,3,1,1,1,1,...,1,0,0,0,0,0,0,0,0,Typical aura with migraine
2,53,2,1,1,1,2,1,1,1,1,...,0,0,0,0,0,0,0,0,0,Typical aura with migraine
3,45,3,5,1,1,3,1,0,1,1,...,1,0,0,0,0,0,0,0,0,Typical aura with migraine
4,53,1,1,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,1,Typical aura with migraine


In [3]:
df["Location"].value_counts()

Location
1    371
0     20
2      9
Name: count, dtype: int64

In [4]:
df.shape

(400, 24)

In [None]:
# Attribute Information:
# 1) Age: Patient's age
# 2) Duration: duration of symptoms in last episode in days
# 3) Frequency: Frequency of episodes per month
# 4) Location: Unilateral or bilateral pain location (None - 0, Unilateral - 1, Bilateral - 2)
# 5) Character: Throbbing or constant pain (None - 0, Thobbing - 1, Constant - 2)
# 6) Intensity: Pain intensity, i.e., mild, medium, or severe (None - 0, Mild - 1, Medium - 2, Severe - 3)
# 7) Nausea: Nauseous feeling (Not - 0, Yes - 1)
# 8) Vomit: Vomiting (Not - 0, Yes - 1)
# 9) Phonophobia: Noise sensitivity (Not - 0, Yes - 1)
# 10) Photophobia: Light sensitivity (Not - 0, Yes - 1)
# 11) Visual: Number of reversible visual symptoms
# 12) Sensory: Number of reversible sensory symptoms
# 13) Dysphasia: Lack of speech coordination (Not - 0, Yes - 1)
# 14) Dysarthria: Disarticulated sounds and words (Not - 0, Yes - 1)
# 15) Vertigo: Dizziness (Not - 0, Yes - 1)
# 16) Tinnitus: Ringing in the ears (Not - 0, Yes - 1)
# 17) Hypoacusis: Hearing loss (Not - 0, Yes - 1)
# 18) Diplopia: Double vision (Not - 0, Yes - 1)
# 19) Visual defect: Simultaneous frontal eye field and nasal field defect and in both eyes (Not - 0, Yes - 1)
# 20) Ataxia: Lack of muscle control (Not - 0, Yes - 1)
# 21) Conscience: Jeopardized conscience (Not - 0, Yes - 1)
# 22) Paresthesia: Simultaneous bilateral paresthesia (Not - 0, Yes - 1)
# 23) DPF: Family background (Not - 0, Yes - 1)
# 24) Type: Diagnosis of migraine type (Typical aura with migraine, Migraine without aura, Typical aura without migraine, Familial hemiplegic migraine, Sporadic hemiplegic migraine, Basilar-type aura, Other) [247, 60, 20, 24, 14, 18, 17]


In [5]:
df.columns

Index(['Age', 'Duration', 'Frequency', 'Location', 'Character', 'Intensity',
       'Nausea', 'Vomit', 'Phonophobia', 'Photophobia', 'Visual', 'Sensory',
       'Dysphasia', 'Dysarthria', 'Vertigo', 'Tinnitus', 'Hypoacusis',
       'Diplopia', 'Defect', 'Ataxia', 'Conscience', 'Paresthesia', 'DPF',
       'Type'],
      dtype='object')

In [8]:
df["Type"].value_counts()

Type
Typical aura with migraine       247
Migraine without aura             60
Familial hemiplegic migraine      24
Typical aura without migraine     20
Basilar-type aura                 18
Other                             17
Sporadic hemiplegic migraine      14
Name: count, dtype: int64

In [10]:
X = df.drop(columns=['Type'])
X.head()

Unnamed: 0,Age,Duration,Frequency,Location,Character,Intensity,Nausea,Vomit,Phonophobia,Photophobia,...,Dysarthria,Vertigo,Tinnitus,Hypoacusis,Diplopia,Defect,Ataxia,Conscience,Paresthesia,DPF
0,30,1,5,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,50,3,5,1,1,3,1,1,1,1,...,0,1,0,0,0,0,0,0,0,0
2,53,2,1,1,1,2,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
3,45,3,5,1,1,3,1,0,1,1,...,0,1,0,0,0,0,0,0,0,0
4,53,1,1,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,0,1


In [12]:
y = df.iloc[:,-1]
y.head()

0    Typical aura with migraine
1    Typical aura with migraine
2    Typical aura with migraine
3    Typical aura with migraine
4    Typical aura with migraine
Name: Type, dtype: object

In [13]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
resampled_data = pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.DataFrame(y_resampled, columns=['Type'])], axis=1)

In [14]:
resampled_data.to_csv('balanced_migraine_data.csv', index=False)

In [15]:
migraine = pd.read_csv("balanced_migraine_data.csv")

In [16]:
migraine.shape

(1729, 24)

In [17]:
migraine.head()

Unnamed: 0,Age,Duration,Frequency,Location,Character,Intensity,Nausea,Vomit,Phonophobia,Photophobia,...,Vertigo,Tinnitus,Hypoacusis,Diplopia,Defect,Ataxia,Conscience,Paresthesia,DPF,Type
0,30,1,5,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,0,Typical aura with migraine
1,50,3,5,1,1,3,1,1,1,1,...,1,0,0,0,0,0,0,0,0,Typical aura with migraine
2,53,2,1,1,1,2,1,1,1,1,...,0,0,0,0,0,0,0,0,0,Typical aura with migraine
3,45,3,5,1,1,3,1,0,1,1,...,1,0,0,0,0,0,0,0,0,Typical aura with migraine
4,53,1,1,1,1,2,1,0,1,1,...,0,0,0,0,0,0,0,0,1,Typical aura with migraine


In [18]:
migraine["Type"].value_counts()

Type
Typical aura with migraine       247
Migraine without aura            247
Basilar-type aura                247
Sporadic hemiplegic migraine     247
Familial hemiplegic migraine     247
Other                            247
Typical aura without migraine    247
Name: count, dtype: int64

In [20]:
migraine.isnull().sum()

Age            0
Duration       0
Frequency      0
Location       0
Character      0
Intensity      0
Nausea         0
Vomit          0
Phonophobia    0
Photophobia    0
Visual         0
Sensory        0
Dysphasia      0
Dysarthria     0
Vertigo        0
Tinnitus       0
Hypoacusis     0
Diplopia       0
Defect         0
Ataxia         0
Conscience     0
Paresthesia    0
DPF            0
Type           0
dtype: int64

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [22]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

In [25]:
y_pred = dtc.predict(X_test)

In [40]:
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, recall_score, f1_score

In [44]:
accuracy_score(y_test, y_pred)

0.8375

In [45]:
y_pred

array(['Typical aura with migraine', 'Migraine without aura',
       'Typical aura with migraine', 'Typical aura with migraine',
       'Typical aura without migraine', 'Migraine without aura',
       'Typical aura with migraine', 'Familial hemiplegic migraine',
       'Typical aura with migraine', 'Typical aura with migraine',
       'Typical aura with migraine', 'Typical aura with migraine',
       'Typical aura with migraine', 'Migraine without aura',
       'Typical aura with migraine', 'Typical aura with migraine',
       'Typical aura with migraine', 'Basilar-type aura',
       'Typical aura with migraine', 'Typical aura with migraine',
       'Typical aura with migraine', 'Familial hemiplegic migraine',
       'Typical aura with migraine', 'Basilar-type aura',
       'Typical aura with migraine', 'Typical aura with migraine',
       'Migraine without aura', 'Typical aura with migraine',
       'Typical aura with migraine', 'Typical aura with migraine',
       'Migraine without a

In [46]:
y_test.shape

(80,)

In [47]:
confusion_matrix(y_test, y_pred)

array([[ 2,  1,  0,  0,  0,  2,  0],
       [ 0,  0,  0,  1,  1,  3,  0],
       [ 0,  0, 12,  0,  0,  0,  0],
       [ 0,  1,  0,  0,  0,  1,  0],
       [ 0,  0,  0,  0,  1,  1,  0],
       [ 1,  1,  0,  0,  0, 50,  0],
       [ 0,  0,  0,  0,  0,  0,  2]], dtype=int64)

In [55]:
precision_score(y_test, y_pred, average="macro")

0.5776942355889724

In [56]:
recall_score(y_test, y_pred, average="macro")

0.5516483516483517

In [57]:
f1_score(y_test, y_pred, average="macro")

0.5596330275229358

In [58]:
confusion_matrix(y_train, dtc.predict(X_train))

array([[ 13,   0,   0,   0,   0,   0,   0],
       [  0,  19,   0,   0,   0,   0,   0],
       [  0,   0,  48,   0,   0,   0,   0],
       [  0,   0,   0,  15,   0,   0,   0],
       [  0,   0,   0,   0,  12,   0,   0],
       [  0,   0,   0,   0,   0, 195,   0],
       [  0,   0,   0,   0,   0,   0,  18]], dtype=int64)

In [34]:
accuracy_score(y_train, dtc.predict(X_train))

1.0

In [None]:
df.columns

Index(['Age', 'Duration', 'Frequency', 'Location', 'Character', 'Intensity',
       'Nausea', 'Vomit', 'Phonophobia', 'Photophobia', 'Visual', 'Sensory',
       'Dysphasia', 'Dysarthria', 'Vertigo', 'Tinnitus', 'Hypoacusis',
       'Diplopia', 'Defect', 'Ataxia', 'Conscience', 'Paresthesia', 'DPF',
       'Type'],
      dtype='object')

In [37]:
precision_score(y_train, dtc.predict(X_train), average=None)

array([1., 1., 1., 1., 1., 1., 1.])