# Import Libraries

In [49]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [50]:
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix, f1_score 
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import StratifiedKFold, GridSearchCV, KFold 
from sklearn.preprocessing import MinMaxScaler

In [51]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

# Dataset

In [52]:
allen = pd.read_csv("Dataset.csv")

In [54]:
allen.head()

Unnamed: 0,Gender,Age,Salary,Purchase Iphone
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


# Label Encoding

In [55]:
l1 = preprocessing.LabelEncoder()
f1 = l1.fit_transform(allen['Gender']) 
f1 = pd.DataFrame(data=f1, columns=['Gender'])
allen['Gender'] = f1['Gender']

In [56]:
allen.head()

Unnamed: 0,Gender,Age,Salary,Purchase Iphone
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


# Train-test-split

In [57]:
x = allen.drop(['Purchase Iphone'],axis=1)

In [58]:
y = allen['Purchase Iphone']

In [59]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.15,random_state=30)

# Random Forest

In [60]:
model1 = RandomForestClassifier()
model1.fit(xtrain,ytrain)
p1 = model1.predict(xtest)

### Confusion Matrix 

In [71]:
print(confusion_matrix(ytest,p1))

[[38  0]
 [ 4 18]]


### Specificity and Sensitivity

In [72]:
c = confusion_matrix(ytest,p1)
print('Specificity: ', c[1,1]/(c[1,0]+c[1,1]))
print('Sensitivity: ', c[0,0]/(c[0,0]+c[0,1]))

Specificity:  0.8181818181818182
Sensitivity:  1.0


### Accuracy

In [73]:
c = confusion_matrix(ytest,p1)
print('Accuracy: {:.2%}'.format((c[0,0]+c[1,1])/sum(sum(c))))

Accuracy: 93.33%


### F1 Score

In [74]:
f11 = f1_score(ytest,p1)
print('F1 Score: {:.2%}'.format(f11))

F1 Score: 90.00%


# Decision Tree

In [75]:
model2 = DecisionTreeClassifier()
model2.fit(xtrain,ytrain)
p2 = model2.predict(xtest)

### Confusion Matrix 

In [77]:
print(confusion_matrix(ytest,p2))

[[37  1]
 [ 5 17]]


### Specificity and Sensitivity

In [79]:
c2 = confusion_matrix(ytest,p2)
print('Specificity: ', c2[1,1]/(c2[1,0]+c2[1,1]))
print('Sensitivity: ', c2[0,0]/(c2[0,0]+c2[0,1]))

Specificity:  0.7727272727272727
Sensitivity:  0.9736842105263158


### Accuracy

In [80]:
c2 = confusion_matrix(ytest,p2)
print('Accuracy: {:.2%}'.format((c2[0,0]+c2[1,1])/sum(sum(c2))))

Accuracy: 90.00%


### F1 Score

In [82]:
f12 = f1_score(ytest,p2)
print('F1 Score: {:.2%}'.format(f12))

F1 Score: 85.00%
