In [34]:
##Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import accuracy_score, classification_report, precision_score, confusion_matrix
import joblib

In [2]:
###Importing of data
def read_data():
    data = pd.read_csv('/content/iphone_purchase_records.csv')
    return data

In [12]:
data = read_data()
print(data)

##creating an ordinal encode object
enc = OrdinalEncoder()

     Gender  Age  Salary  Purchase Iphone
0      Male   19   19000                0
1      Male   35   20000                0
2    Female   26   43000                0
3    Female   27   57000                0
4      Male   19   76000                0
..      ...  ...     ...              ...
395  Female   46   41000                1
396    Male   51   23000                1
397  Female   50   20000                1
398    Male   36   33000                0
399  Female   49   36000                1

[400 rows x 4 columns]


In [64]:
def patterns():
  plt.figure(figsize=(10,7))
  sns.lmplot(data = data, x='Salary', y='Age', hue='Gender')
  sns.countplot(data = data, x='Age', y='Purchase Iphone', hue='Gender')
  sns.countplot(data=data, x='Purchase Iphone', hue='Gender')
  sns.countplot(data=data, x='Purchase Iphone', hue='Salary')
  return patterns

In [13]:
def data_prep(data, t):
  X = data[['Gender', 'Age', 'Salary']]
  y = data['Purchase Iphone']
  X[['Gender']] = enc.fit_transform(X[['Gender']])
  X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=t,random_state=42,stratify=y)
  return X_train, X_test, y_train, y_test


In [32]:
## calling the data prep function
X_train, X_test, y_train, y_test = data_prep(data, 0.2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[['Gender']] = enc.fit_transform(X[['Gender']])


In [49]:
def dec_model(X_train, y_train, X_test):
  dec_tree_model = DecisionTreeClassifier()
  dtc_model = dec_tree_model.fit(X_train, y_train)
  pred = dtc_model.predict(X_test)
  return pred

In [33]:
pred = dec_model(X_train, y_train, X_test)
pred

array([1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0])

In [26]:
##diagnostics of the model
def model_dtc_diag(y_test, pred):
  accuracy = accuracy_score(y_test, pred)
  precision = precision_score(y_test, pred)
  clf = classification_report(y_test, pred)
  conf_matrix = confusion_matrix(y_test, pred)
  return accuracy, precision, clf, conf_matrix

In [27]:
accuracy, precision, clf, conf_matrix =  model_dtc_diag(y_test, pred)
print(f"The accuracy score is: ",accuracy)
print(f"The precision score is: ",precision)
print(f"The CLASSIFICATION report is: ",clf)
print(f"The confusion matrix is: ",conf_matrix)

The accuracy score is:  0.8625
The precision score is:  0.7647058823529411
The CLASSIFICATION report is:                precision    recall  f1-score   support

           0       0.93      0.84      0.89        51
           1       0.76      0.90      0.83        29

    accuracy                           0.86        80
   macro avg       0.85      0.87      0.86        80
weighted avg       0.87      0.86      0.86        80

The confusion matrix is:  [[43  8]
 [ 3 26]]


In [46]:
def main_function():
  data = read_data()
  X_train, X_test, y_train, y_test = data_prep(data, 0.2)
  pred = dec_model(X_train, y_train, X_test)
  accuracy, precision, clf, conf_matrix =  model_dtc_diag(y_test, pred)
  return clf


In [31]:
%%time
results = main_function()
print(results)

              precision    recall  f1-score   support

           0       0.93      0.84      0.89        51
           1       0.76      0.90      0.83        29

    accuracy                           0.86        80
   macro avg       0.85      0.87      0.86        80
weighted avg       0.87      0.86      0.86        80

CPU times: user 29.9 ms, sys: 0 ns, total: 29.9 ms
Wall time: 35.7 ms


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[['Gender']] = enc.fit_transform(X[['Gender']])


In [52]:
###saving the model
dec_tree_model = DecisionTreeClassifier()
dtc_model = dec_tree_model.fit(X_train, y_train)
joblib.dump(dtc_model, 'iphone_purchase_model.pkl')

['iphone_purchase_model.pkl']