In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv("C:BMI.csv")
data.head()

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,4
1,Male,189,87,2
2,Female,185,110,4
3,Female,195,104,3
4,Male,149,61,3


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Gender  500 non-null    object
 1   Height  500 non-null    int64 
 2   Weight  500 non-null    int64 
 3   Index   500 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 15.8+ KB


In [5]:
data.describe()

Unnamed: 0,Height,Weight,Index
count,500.0,500.0,500.0
mean,169.944,106.0,3.748
std,16.375261,32.382607,1.355053
min,140.0,50.0,0.0
25%,156.0,80.0,3.0
50%,170.5,106.0,4.0
75%,184.0,136.0,5.0
max,199.0,160.0,5.0


In [6]:
from sklearn.preprocessing import LabelEncoder

data["Gender"] = LabelEncoder().fit_transform(data.Gender)

In [7]:
data.head()

Unnamed: 0,Gender,Height,Weight,Index
0,1,174,96,4
1,1,189,87,2
2,0,185,110,4
3,0,195,104,3
4,1,149,61,3


In [8]:
data.isna().any()

Gender    False
Height    False
Weight    False
Index     False
dtype: bool

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from sklearn.model_selection import cross_val_score

In [10]:
from sklearn.model_selection import train_test_split

x = data.drop("Index", axis=1)
y = data.Index

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [11]:
clf = [('NB ', GaussianNB()),
       ('LogReg ', LogisticRegression()),
       ('DT ', DecisionTreeClassifier()),
       ('KNN ', KNeighborsClassifier()),
       ('SVC', SVC()),
       ('RFC', RandomForestClassifier())]

In [12]:
res = []
for i, j in clf:
  j.fit(x_train, y_train)
  y_pred = j.predict(x_test)
  print(y_pred, ': Accuracy for ',i, ' = ', j.score(x_test, y_test))
  res.append(j.score(x_test, y_test))

[5 2 4 4 4 5 5 5 3 5 4 5 2 5 5 2 5 5 4 5 2 5 1 4 4 5 5 5 5 2 5 5 4 4 4 2 5
 2 2 3 5 1 4 3 5 3 4 2 4 4 3 4 2 1 5 3 4 5 5 5 5 5 5 3 0 3 3 4 2 2 2 5 3 2
 5 5 5 4 2 5 5 4 2 5 4 5 2 5 5 2 1 2 5 2 5 2 5 5 1 5] : Accuracy for  NB   =  0.76
[4 2 4 4 5 5 5 5 3 5 4 4 2 5 5 2 5 5 3 5 2 5 2 4 4 5 5 5 4 2 5 5 4 4 4 2 5
 1 3 2 5 2 4 3 5 4 4 3 4 4 4 4 2 1 5 3 3 5 5 4 4 5 5 4 0 2 3 4 2 2 1 4 2 3
 5 5 5 5 2 5 5 4 2 5 4 5 3 4 5 2 1 2 5 3 5 2 5 5 2 5] : Accuracy for  LogReg   =  0.78
[5 2 3 3 5 5 5 5 4 5 4 5 2 5 5 3 5 5 3 5 0 5 1 4 4 5 5 5 5 2 4 5 4 4 4 2 5
 2 4 2 5 2 4 3 5 4 3 4 4 4 5 5 2 1 5 3 3 5 5 4 5 5 4 5 0 2 2 4 2 2 2 5 2 4
 5 5 5 4 2 5 5 3 2 5 4 5 3 5 4 2 1 2 5 4 5 2 5 4 1 5] : Accuracy for  DT   =  0.87
[5 2 4 4 4 5 5 5 4 5 4 5 2 5 5 3 5 5 3 5 2 5 1 4 4 5 5 5 5 2 4 5 4 4 4 2 5
 2 4 2 5 1 4 3 5 4 4 4 4 4 4 4 2 1 5 3 3 5 5 4 5 5 5 5 0 2 3 4 2 2 2 4 2 3
 5 5 5 4 2 5 5 3 2 5 4 5 3 5 4 3 1 2 5 4 5 2 5 5 1 5] : Accuracy for  KNN   =  0.92
[4 2 4 4 4 5 5 5 3 5 4 5 2 5 5 2 5 5 3 5 2 5 2 4 4 5 5 5 5 2 4 

In [13]:
def score(clf, x, y):
    sc = []
    for i, j in clf:
        a = cross_val_score(j,x,y, cv=5, n_jobs=1)
        print(a)
        print(f"Mean {i} = {np.mean(a)}\n")
        

In [14]:
score(clf, x, y)

[0.65 0.73 0.67 0.71 0.75]
Mean NB  = 0.702

[0.73 0.71 0.77 0.76 0.76]
Mean LogReg  = 0.7459999999999999

[0.89 0.88 0.82 0.82 0.86]
Mean DT  = 0.8539999999999999

[0.94 0.89 0.83 0.89 0.95]
Mean KNN  = 0.9

[0.79 0.81 0.81 0.79 0.85]
Mean SVC = 0.8099999999999999

[0.91 0.87 0.85 0.86 0.87]
Mean RFC = 0.8719999999999999

