## Import required Libraries

In [1]:
pip install kmodes

Collecting kmodes
  Downloading kmodes-0.11.1-py2.py3-none-any.whl (19 kB)
Installing collected packages: kmodes
Successfully installed kmodes-0.11.1


In [2]:
import pickle
from kmodes.kprototypes import KPrototypes
import pandas as pd
import pickle
import torch
import torch.nn as nn

## Load and review  test data

In [8]:
X=pd.read_csv("test.csv")

### Data Preprocessing starts

In [9]:
#Remove following columns
X.drop(labels=['User_ID','Product_ID','Product_Category_2','Product_Category_3'], axis=1,inplace=True)


In [10]:
#Divide into labels and features
X.head()

Unnamed: 0,Gender,Age,Occupation,City_Category,Stay_In_Current_City_Years,Marital_Status,Product_Category_1
0,M,46-50,7,B,2,1,1
1,M,26-35,17,C,0,0,3
2,F,36-45,1,B,4+,1,5
3,F,36-45,1,B,4+,1,4
4,F,26-35,1,C,1,0,4


In [11]:
#Assigning numerical values to Age Categories
dic_to_replace = {"Age": {"0-17": 1, "18-25": 2,"26-35": 3,"36-45": 4,"46-50":5,"51-55":6,"55+": 7},
                  "Stay_In_Current_City_Years":{"4+":5}}
X.replace(dic_to_replace, inplace=True)

In [12]:
X['Stay_In_Current_City_Years']=X['Stay_In_Current_City_Years'].astype('int64')
#only run below for clustering
X['Marital_Status']=X['Marital_Status'].astype('object')
X['Occupation']=X['Occupation'].astype('object')
X['Product_Category_1']=X['Product_Category_1'].astype('object')

### Data Preprocessing ends

### Determine cluster No of each data and then use the respective model for prediction

In [14]:
loaded_model = pickle.load(open('clusteringModel.sav', 'rb'))
clusters_label = loaded_model.predict(X, categorical=[0, 2, 3,5, 6])

In [15]:
X['clusterNo']=clusters_label
for col in ['Gender','Marital_Status','City_Category','Occupation','Product_Category_1']:
        X=pd.get_dummies(X, columns=[col], prefix=[col], drop_first=True)
X.head()

Unnamed: 0,Age,Stay_In_Current_City_Years,clusterNo,Gender_M,Marital_Status_1,City_Category_B,City_Category_C,Occupation_1,Occupation_2,Occupation_3,Occupation_4,Occupation_5,Occupation_6,Occupation_7,Occupation_8,Occupation_9,Occupation_10,Occupation_11,Occupation_12,Occupation_13,Occupation_14,Occupation_15,Occupation_16,Occupation_17,Occupation_18,Occupation_19,Occupation_20,Product_Category_1_2,Product_Category_1_3,Product_Category_1_4,Product_Category_1_5,Product_Category_1_6,Product_Category_1_7,Product_Category_1_8,Product_Category_1_9,Product_Category_1_10,Product_Category_1_11,Product_Category_1_12,Product_Category_1_13,Product_Category_1_14,Product_Category_1_15,Product_Category_1_16,Product_Category_1_17,Product_Category_1_18
0,5,2,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,3,0,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,4,5,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,4,5,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3,1,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [16]:
#Our test data does not contain categories such as 19 and 20 ,So explicitly adding this category and assigning its value as 0
X['Product_Category_1_19']=0
X['Product_Category_1_20']=0  

### Load model for each cluster and use them for prediction

In [18]:
class MLP(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(45, 20),
      nn.ReLU(),
      nn.Linear(20, 15),
      nn.ReLU(),
      nn.Linear(15, 1)
    )


  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)

In [24]:

model_cluster0=torch.load('MLP_cluster0.pt')
cluster0=X[X['clusterNo']==0]
cluster0=cluster0.drop(['clusterNo'],axis=1)
with torch.no_grad():
  pred_cluster0=model_cluster0(torch.Tensor(cluster0.to_numpy()))

pd.DataFrame(pred_cluster0.numpy(),columns=["Purchase"]).head()

Unnamed: 0,Purchase
0,14035.305664
1,11682.691406
2,13988.981445
3,11682.691406
4,14636.104492


In [25]:

model_cluster1=torch.load('MLP_cluster1.pt')
cluster1=X[X['clusterNo']==1]
cluster1=cluster1.drop(['clusterNo'],axis=1)
with torch.no_grad():
  pred_cluster1=model_cluster1(torch.Tensor(cluster1.to_numpy()))

pd.DataFrame(pred_cluster1.numpy(),columns=["Purchase"]).head()

Unnamed: 0,Purchase
0,5824.398438
1,2239.817871
2,5864.129883
3,11580.808594
4,13235.960938


In [26]:
model_cluster2=torch.load('MLP_cluster2.pt')
cluster2=X[X['clusterNo']==2]
cluster2=cluster2.drop(['clusterNo'],axis=1)
with torch.no_grad():
  pred_cluster2=model_cluster2(torch.Tensor(cluster2.to_numpy()))

pd.DataFrame(pred_cluster2.numpy(),columns=["Purchase"]).head()

Unnamed: 0,Purchase
0,10357.825195
1,2480.595215
2,16454.892578
3,13390.629883
4,5827.830078


### Cloud Deployment (AWS Elastic Beanstalk)

Once the training is completed, we need to expose the trained model as an API for the user to consume it. For prediction, the saved model is loaded first and then the predictions are made using it. The same app is deployed to the cloud platform.

The best models for each cluster Type are Multi Layer Perceptron Model.Those models are saved in binary format and  are loaded and used for prediction when users put any query through the Web application as shown below:

###### Flask App

As we’ll expose the created model as a web Application  to be consumed by the client, we’d do it using the flask framework. 


Create the project structure, as shown below:
<img src="flask.PNG" width= "1000">

#### Deployment to AWS Elastic Beanstalk


Web UI  where user can input the feature values.

Application URL is :http://blackfridaypurchaseprediction-env.eba-qxmu82rp.us-east-1.elasticbeanstalk.com/

<img src="Inputs.PNG" width= "1000">

Prediction of above inputs:

<img src="Output.PNG" width= "1000">