# **Case Study on Python Flask**

**Using the given dataset, create a web application to take feature inputs from the user in a web page and print whether a person will purchase the product or not.**

In [103]:
#importing essential libraries
import pandas as pd
import numpy as np

In [104]:
#loading dataset to data
data = pd.read_csv('/content/Social_Network_Ads (3).csv')

In [105]:
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [106]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [102]:
#checking for null values
data.isnull().sum()

Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

**There are no missing values in the given dataset.**

In [65]:
#user id is unique for all.it's a unique column. so we can drop it.
data = data.drop('User ID',axis=1)

In [66]:
data

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


Here Gender column has categorical values. So we need to encode it into numerical values

# **Label Encoding**

In [67]:
from sklearn.preprocessing import LabelEncoder

In [68]:
le = LabelEncoder()

In [69]:
data['Gender']=le.fit_transform(data['Gender'])

In [71]:
data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


Here column Gender Male is label encoded as 1 and Gender Female as 0.

---



In [73]:
X = data.drop(['Purchased'],axis = True)          #independent variable stored in X
y = data['Purchased']                             #Target variable stored in y

In [74]:
X

Unnamed: 0,Gender,Age,EstimatedSalary
0,1,19,19000
1,1,35,20000
2,0,26,43000
3,0,27,57000
4,1,19,76000
...,...,...,...
395,0,46,41000
396,1,51,23000
397,0,50,20000
398,1,36,33000


In [75]:
y

0      0
1      0
2      0
3      0
4      0
      ..
395    1
396    1
397    1
398    0
399    1
Name: Purchased, Length: 400, dtype: int64

In [76]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size = .2)

**Scaling**

In [77]:
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()

In [78]:
# Scaled_Xtrain = scaler.fit_transform(X_train)
# Scaled_Xtest = scaler.fit_transform(X_test)

**Using Logistic Regression**

In [79]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()

In [80]:
log_reg.fit(X_train,y_train)
log_pred = log_reg.predict(X_test)

In [81]:
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

In [82]:
confusion_matrix(y_test, log_pred)

array([[52,  0],
       [28,  0]])

In [83]:
print('Accuracy:',accuracy_score(y_test,log_pred))
print('Precision:',precision_score(y_test,log_pred))
print('recall:',recall_score(y_test,log_pred))
print('F1:',f1_score(y_test,log_pred))

Accuracy: 0.65
Precision: 0.0
recall: 0.0
F1: 0.0


  _warn_prf(average, modifier, msg_start, len(result))


# **Using KNN**

In [84]:
from sklearn.neighbors import KNeighborsClassifier
metric_k=[]
neighbors=np.arange(3,15)

In [85]:
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

In [86]:
for k in neighbors:
  classifier=KNeighborsClassifier(n_neighbors=k,metric='minkowski',p=2)
  classifier.fit(X_train,y_train)
  knn_pred=classifier.predict(X_test)

In [87]:
classifier.predict([[ 1,19,19000]])



array([0])

In [88]:
confusion_matrix(y_test,knn_pred)

array([[48,  4],
       [15, 13]])

In [89]:
print('Accuracy: ',accuracy_score(y_test,knn_pred))
print('Precision: ',precision_score(y_test,knn_pred))
print('recall: ',recall_score(y_test,knn_pred))
print('f1: ',f1_score(y_test,knn_pred))

Accuracy:  0.7625
Precision:  0.7647058823529411
recall:  0.4642857142857143
f1:  0.5777777777777777


# **Random Forest Model**

In [90]:
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

In [91]:
# Fitting Random Forest Classification to the Training set
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train,y_train)
# Predicting the Test set results
y_pred=model.predict(X_test)

In [92]:
y_pred

array([1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0])

In [93]:
# Making the Confusion Matrix
print(confusion_matrix(y_test,y_pred))

[[48  4]
 [ 3 25]]


In [94]:
print('Accuracy score is :',accuracy_score(y_test,y_pred))

Accuracy score is : 0.9125


In [95]:
X_test

Unnamed: 0,Gender,Age,EstimatedSalary
209,0,46,22000
280,0,59,88000
33,0,28,44000
210,0,48,96000
93,0,29,28000
...,...,...,...
246,0,35,50000
227,1,56,133000
369,0,54,26000
176,0,35,47000


Test Cases of prediction

In [96]:
model.predict([[1,19,19000]])



array([0])

In [107]:
model.predict([[0,46,41000]])



array([1])

The test cases are correct predicted.

**Saving the model in pickle format**

In [98]:
import pickle
pickle.dump(model,open('ad_model.pkl','wb'))

In [99]:
pickled_model = pickle.load(open('ad_model.pkl','rb'))

In [100]:
pickled_model.predict([[0,46,41000]])



array([1])