In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score,accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neighbors import KNeighborsClassifier

In [79]:
df = pd.read_csv("Social_Network_Ads.csv")
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [80]:
df = df.drop("User ID", axis=1)

In [81]:
df

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


In [82]:
df.nunique()

Gender               2
Age                 43
EstimatedSalary    117
Purchased            2
dtype: int64

In [83]:
# turn categorical data into numerical data ,, (Gender)
df = pd.get_dummies(df)
df

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender_Female,Gender_Male
0,19,19000,0,False,True
1,35,20000,0,False,True
2,26,43000,0,True,False
3,27,57000,0,True,False
4,19,76000,0,False,True
...,...,...,...,...,...
395,46,41000,1,True,False
396,51,23000,1,False,True
397,50,20000,1,True,False
398,36,33000,0,False,True


In [84]:
y = df['Purchased']
x = df[['Age','EstimatedSalary','Gender_Female','Gender_Male']]
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state=10)

In [85]:
model = KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
ac = accuracy_score(y_test,y_predict)
print(ac)

0.85


In [86]:
accu_score = []

for i in range(1,22):
    x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state=10)
    model = KNeighborsClassifier(n_neighbors=i)
    model.fit(x_train, y_train)
    y_predict = model.predict(x_test)
    ac = accuracy_score(y_test,y_predict)
    accu_score.append((i,ac))
    print(i, ac)

1 0.85
2 0.825
3 0.85
4 0.8
5 0.85
6 0.7875
7 0.875
8 0.85
9 0.85
10 0.8625
11 0.8625
12 0.85
13 0.8375
14 0.825
15 0.825
16 0.8125
17 0.825
18 0.825
19 0.85
20 0.825
21 0.8125


# Offline and MCQ IMP Code Below

In [87]:
actual = [10,10,10,9,10,10,10,9,10,10]
predict = [10,10,10,10,9,9,10,10,10,10]

print("Accuracy Score: ",accuracy_score(actual,predict))
print("Confusion Matrix: \n",confusion_matrix(actual,predict,labels=[9,10]))

Accuracy Score:  0.6
Confusion Matrix: 
 [[0 2]
 [2 6]]


In [88]:
actual  = [10,10,10,9,10,10,10,9,10,10]
predict = [10,10,10,10,9,9,10,10,10,10]

# Calculate accuracy
accu_score = accuracy_score(actual, predict)
print("Accuracy Score = ", accu_score)

# Generate confusion matrix
cm = confusion_matrix(actual, predict)
print("Confusion Matrix:\n", cm)

# Create a labeled DataFrame for the confusion matrix
labels = sorted(list(set(actual + predict)))
cm_df = pd.DataFrame(cm, index=[f'Actual {i}' for i in labels],
                        columns=[f'Predicted {i}' for i in labels])

print("\nConfusion Matrix Table:\n", cm_df)


# reverse confusion matrix
print("_______________________________________________________________")
cm_r = confusion_matrix(actual, predict,labels=[10,9])
print("Confusion Reversed Matrix:\n", cm_r)
labels = [10,9]
cm_df_r = pd.DataFrame(cm_r, index=[f'Actual {i}' for i in labels],
                        columns=[f'Predicted {i}' for i in labels])

print("\nConfusion Reversed Matrix Table:\n", cm_df_r)

Accuracy Score =  0.6
Confusion Matrix:
 [[0 2]
 [2 6]]

Confusion Matrix Table:
            Predicted 9  Predicted 10
Actual 9             0             2
Actual 10            2             6
_______________________________________________________________
Confusion Reversed Matrix:
 [[6 2]
 [2 0]]

Confusion Reversed Matrix Table:
            Predicted 10  Predicted 9
Actual 10             6            2
Actual 9              2            0


In [89]:
# Print classification report
print(classification_report(actual,predict))

              precision    recall  f1-score   support

           9       0.00      0.00      0.00         2
          10       0.75      0.75      0.75         8

    accuracy                           0.60        10
   macro avg       0.38      0.38      0.38        10
weighted avg       0.60      0.60      0.60        10



In [90]:
Tp=cm_r[0,0]
Fn=cm_r[0,1]
Fp=cm_r[1,0]
Tn=cm_r[1,1]

accura_score=(Tp+Tn)/(Tp+Fn+Fp+Tn)
print("Accuracy Score: ",accura_score)

Accuracy Score:  0.6


In [91]:
prc=Tp/(Tp+Fp)
print("Precision: ",prc)

Precision:  0.75


In [92]:
recall = Tp/(Tp+Fn)
print("Recall/Sensitivity: ",recall)

Recall/Sensitivity:  0.75


In [93]:
spec = Tn/(Tn+Fp)
print("Specificity: ",spec)

Specificity:  0.0


In [94]:
f1_score = (2*(prc*recall))/(prc+recall)
print("F1_score: ",f1_score)

F1_score:  0.75


# Diabetes.csv

In [95]:
df = pd.read_csv("diabetes.csv")
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [96]:
df.duplicated().sum()

0

In [97]:
df.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [98]:
# IMP
df['BMI']=np.where(df['BMI']>30,'O',np.where(df['BMI']>26,'OW',np.where(df['BMI']>18,'NW','UW')))

In [99]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,O,0.627,50,1
1,1,85,66,29,0,OW,0.351,31,0
2,8,183,64,0,0,NW,0.672,32,1
3,1,89,66,23,94,OW,0.167,21,0
4,0,137,40,35,168,O,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,O,0.171,63,0
764,2,122,70,27,0,O,0.340,27,0
765,5,121,72,23,112,OW,0.245,30,0
766,1,126,60,0,0,O,0.349,47,1


In [100]:
df = pd.get_dummies(df)
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,DiabetesPedigreeFunction,Age,Outcome,BMI_NW,BMI_O,BMI_OW,BMI_UW
0,6,148,72,35,0,0.627,50,1,False,True,False,False
1,1,85,66,29,0,0.351,31,0,False,False,True,False
2,8,183,64,0,0,0.672,32,1,True,False,False,False
3,1,89,66,23,94,0.167,21,0,False,False,True,False
4,0,137,40,35,168,2.288,33,1,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,0.171,63,0,False,True,False,False
764,2,122,70,27,0,0.340,27,0,False,True,False,False
765,5,121,72,23,112,0.245,30,0,False,False,True,False
766,1,126,60,0,0,0.349,47,1,False,True,False,False


In [101]:
y = df['Outcome']
x = df.drop('Outcome',axis=1)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state=10)
model = KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
ac = accuracy_score(y_test,y_predict)
print("Accuracy Score: ",ac)

Accuracy Score:  0.6558441558441559
