Import Libraries

In [1]:
import pandas as pd
dataset = pd.read_csv("Datasets/Training.csv")


Printing the data that is inside the Training Dataset

In [2]:
print(dataset.head())

   itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  shivering  \
0        1          1                     1                    0          0   
1        0          1                     1                    0          0   
2        1          0                     1                    0          0   
3        1          1                     0                    0          0   
4        1          1                     1                    0          0   

   chills  joint_pain  stomach_pain  acidity  ulcers_on_tongue  ...  \
0       0           0             0        0                 0  ...   
1       0           0             0        0                 0  ...   
2       0           0             0        0                 0  ...   
3       0           0             0        0                 0  ...   
4       0           0             0        0                 0  ...   

   blackheads  scurring  skin_peeling  silver_like_dusting  \
0           0         0             

In [3]:
dataset.shape

(4920, 133)

In [4]:
len(dataset['prognosis'].unique())

41

In [5]:
dataset['prognosis'].unique

<bound method Series.unique of 0                              Fungal infection
1                              Fungal infection
2                              Fungal infection
3                              Fungal infection
4                              Fungal infection
                         ...                   
4915    (vertigo) Paroymsal  Positional Vertigo
4916                                       Acne
4917                    Urinary tract infection
4918                                  Psoriasis
4919                                   Impetigo
Name: prognosis, Length: 4920, dtype: object>

Train Test Split

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [7]:
x = dataset.drop('prognosis', axis=1)
y = dataset['prognosis']

In [8]:
y

0                              Fungal infection
1                              Fungal infection
2                              Fungal infection
3                              Fungal infection
4                              Fungal infection
                         ...                   
4915    (vertigo) Paroymsal  Positional Vertigo
4916                                       Acne
4917                    Urinary tract infection
4918                                  Psoriasis
4919                                   Impetigo
Name: prognosis, Length: 4920, dtype: object

In [9]:
le = LabelEncoder()
le.fit(y)
y = le.transform(y)

In [10]:
y

array([15, 15, 15, ..., 38, 35, 27])

In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size=0.3, random_state = 20)

In [12]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((3444, 132), (1476, 132), (3444,), (1476,))

Training top models

In [14]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np


Create a dictionary to store models

In [17]:
models = {
    "SVC":SVC(kernel='linear'),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "GradientBoosting" : GradientBoostingClassifier(n_estimators=100,random_state=42),
    "KNeighbors" : KNeighborsClassifier(n_neighbors=5),
    "MultinomialNB": MultinomialNB()
}
for model_name, model in models.items():
    # train model
    model.fit(x_train, y_train)
    
    # test model
    predictions = model.predict(x_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test,  predictions)
    
    # Calculate confusion matrix
    cm = confusion_matrix(y_test, predictions)
    
    print(f"{model_name} accuracy : {accuracy}")
    print(f"{model_name} Confusion Matrix: ")
    print(np.array2string(cm, separator=', '))

SVC accuracy : 1.0
SVC Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
RandomForest accuracy : 1.0
RandomForest Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
GradientBoosting accuracy : 1.0
GradientBoosting Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
KNeighbors accuracy : 1.0
KNeighbors Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, 

Single Prediction.

In [18]:
svc = SVC(kernel= 'linear')
svc.fit(x_train, y_train)
ypred = svc.predict(x_test)
accuracy_score(y_test, ypred)

1.0

In [21]:
import pickle
"""pickle.dump(svc, open("models/svc.pkl", 'wb'))"""

Load Model

In [22]:
svc = pickle.load(open("models/svc.pkl", 'rb'))


In [26]:
x_test.iloc[0].values


array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])