In [None]:
## Import packages and Read Dataset
import sys
import numpy as np
import pandas as pd
import xgboost as xgb

from sklearn.preprocessing import LabelEncoder

dataset = pd.read_csv("train.csv")
print(len(dataset))
dataset.head()

In [None]:
## Baseline Performance Model
meanVal = sum(dataset['Purchase'])/len(dataset['Purchase'])
rmse = 0
count = 0
for i in dataset['Purchase']:
    rmse += (i - meanVal)**2
    count+=1
print((rmse/len(dataset))**(1/2))

In [None]:
## Replace NaN with mean
dataset['Product_Category_2'] = dataset['Product_Category_2'].fillna(np.mean(dataset['Product_Category_2']))
dataset['Product_Category_3'] = dataset['Product_Category_3'].fillna(np.mean(dataset['Product_Category_3']))

In [None]:
## Set up Target Variable and Independent Variable
X = dataset.iloc[:, :-1]
Y = dataset.iloc[:, -1]

In [None]:
## Split dataset into Training and Testing
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2)

In [None]:
Y_test = Y_test.reset_index()
Y_test.drop('index', axis = 1, inplace = True)
Y_test.head()

In [None]:
# Label Encoding
le = LabelEncoder()
for i in X_train.columns:
    X_train[i] = le.fit_transform(X_train[i])
    X_test[i] = le.fit_transform(X_test[i])

In [None]:
# Setting up Model's Parameters
dtrain = xgb.DMatrix(X_train, label=Y_train, missing=np.nan)

In [None]:
## Training the model with Default Parameters.
num_round = 200
clf = xgb.train(dtrain=dtrain,params={})
dtest = xgb.DMatrix(X_test, missing=np.nan)

In [None]:
## Prediction
test_preds = clf.predict(dtest)

In [None]:
## Loss functions calculation
rmse = 0
count=0
for i in test_preds:
    rmse += (i - Y_test['Purchase'][count])**2
    count+=1
print((rmse/len(Y_test))**(1/2))

In [None]:
import pickle as pkl
pkl.dump(clf, open('xgb_model.pkl','wb'))
xgb_model_api = pkl.load(open('xgb_model.pkl','rb'))

In [None]:
test = xgb.DMatrix(X_test, missing=np.nan)
test_predictions = xgb_model_api.predict(test)

In [None]:
## Loss functions calculation
rmse = 0
count=0
for i in test_preds:
    rmse += (i - Y_test['Purchase'][count])**2
    count+=1
print((rmse/len(Y_test))**(1/2))

In [None]:
# Setting up Model's Parameters
dtrain = xgb.DMatrix(X_train, label=Y_train, missing=np.nan)

param = {'objective': 'reg:linear', 'booster': 'gbtree', 'silent': 1,
         'max_depth': 100, 'eta': 0.1, 'nthread': 4,
         'subsample': 0.8, 'colsample_bytree': 0.8, 'min_child_weight': 5,
         'max_delta_step': 0, 'gamma': 0}

num_round = 200

In [None]:
## Training the model.
clf = xgb.train(param, dtrain, num_round)
dtest = xgb.DMatrix(X_test, missing=np.nan)

In [None]:
## Prediction
test_preds = clf.predict(dtest)

In [None]:
## Loss functions calculation
rmse = 0
count=0
for i in test_preds:
    rmse += (i - Y_test['Purchase'][count])**2
    count+=1
print((rmse/len(Y_test))**(1/2))


In [None]:
# Setting up Model's Parameters
dtrain = xgb.DMatrix(X_train, label=Y_train, missing=np.nan)

param = {'objective': 'reg:linear', 'booster': 'gbtree', 'silent': 1,
         'max_depth': 150, 'eta': 0.1, 'nthread': 4,
         'subsample': 0.8, 'colsample_bytree': 0.8, 'min_child_weight': 10,
         'max_delta_step': 0, 'gamma': 0}
num_round = 200

In [None]:
## Training the model.
clf = xgb.train(param, dtrain, num_round)
dtest = xgb.DMatrix(X_test, missing=np.nan)

In [None]:
## Prediction
test_preds = clf.predict(dtest)

In [None]:
## Loss functions calculation
rmse = 0
count=0
for i in test_preds:
    rmse += (i - Y_test['Purchase'][count])**2
    count+=1
print((rmse/len(Y_test))**(1/2))


In [None]:
## custom root mean square function
from keras import backend as K
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

In [None]:
## Neural Network
import keras
from keras.models import Sequential
from keras.layers import Dense

regressor = Sequential()
regressor.add(Dense(units = X_train.shape[1], kernel_initializer = 'normal', 
                    activation = 'relu', input_dim = X_train.shape[1]))
regressor.add(Dense(units = 64, kernel_initializer = 'normal', activation = 'relu'))
regressor.add(Dense(units = 128, kernel_initializer = 'normal', activation = 'relu'))
regressor.add(Dense(units = 1, kernel_initializer = 'normal', activation = 'relu'))
regressor.compile(optimizer = "rmsprop", loss = root_mean_squared_error, metrics =["accuracy"])
regressor.fit(X_train, Y_train, batch_size = 75, epochs = 50)

In [None]:
from keras.models import load_model
classifier.save("purchase_prediction.h5")

In [None]:
regressor = load_model("purchase_prediction.h5", custom_objects={'root_mean_squared_error':root_mean_squared_error})