In [1]:
import ipynb
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.preprocessing import OrdinalEncoder
from numpy import asarray
from statistics import mean

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from ipynb.fs.full.TrainingFunction import Train_Models_CV10
from ipynb.fs.full.TrainingFunction import SavePredictionsToFile

from ipynb.fs.full.ParameterTuning import ParameterTuning

In [4]:
preprocessed_df = pd.read_csv("../data/preprocessedMarketingCampaign.csv",  index_col=0)

# Reseting the index
preprocessed_df.reset_index(drop=True, inplace=True)

categorical_features = [ 'Education_2n_Cycle','Education_Basic','Education_Graduation','Education_Master','Education_PhD',
                        'AcceptedCmp5_0','AcceptedCmp5_1','AcceptedCmp1_0','AcceptedCmp1_1' ]
target = "Teenhome"

# Convert categorical feature types
preprocessed_df[categorical_features] = preprocessed_df[categorical_features].astype('category') 
preprocessed_df[target] = preprocessed_df[target].astype('category') 

print('Revenue distribution:\n'+ str(preprocessed_df[target].value_counts()))

Revenue distribution:
1    1158
0    1158
Name: Teenhome, dtype: int64


In [5]:
preprocessed_df

Unnamed: 0,MntMeatProducts,MntFishProducts,Income,MntFruits,MntSweetProducts,NumDealsPurchases,Dt_Customer,NumWebPurchases,NumCatalogPurchases,MntGoldProds,...,Teenhome,Education_2n_Cycle,Education_Basic,Education_Graduation,Education_Master,Education_PhD,AcceptedCmp5_0,AcceptedCmp5_1,AcceptedCmp1_0,AcceptedCmp1_1
0,1.679702,2.462147,0.235327,1.551577,1.476500,0.349414,-1.531185,1.409304,2.510890,0.843207,...,0,0,0,1,0,0,1,0,1,0
1,-0.177032,1.345274,0.773633,0.570804,-0.146905,-0.685887,0.205773,1.409304,-0.226541,-0.038766,...,0,0,0,1,0,0,1,0,1,0
2,-0.651187,-0.503974,-1.022732,-0.560857,-0.583043,-0.168236,1.061881,-0.750450,-0.910898,-0.748179,...,0,0,0,1,0,0,1,0,1,0
3,-0.216914,0.155164,0.241519,0.419916,-0.001525,1.384715,0.953012,0.329427,0.115638,-0.556446,...,0,0,0,0,0,1,1,0,1,0
4,-0.491658,-0.632140,-0.750763,-0.409969,-0.631503,-0.168236,-0.313830,-0.030532,-0.910898,-0.403059,...,0,0,0,0,0,1,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2311,-0.620167,-0.503974,0.088236,-0.661449,-0.655733,0.349414,0.002881,0.689386,-0.568720,-0.556446,...,1,0,0,0,0,1,1,0,1,0
2312,-0.425188,-0.412427,0.318020,-0.083045,-0.655733,4.490616,-1.367881,1.409304,0.115638,0.287180,...,1,0,0,1,0,0,1,0,1,0
2313,-0.699931,-0.650449,-1.541025,-0.611153,-0.631503,-0.168236,-0.417750,-1.110409,-0.910898,-0.748179,...,1,0,0,1,0,0,1,0,1,0
2314,-0.668912,-0.687068,0.000000,-0.636301,-0.655733,0.867064,-0.640437,-0.750450,-0.568720,-0.671486,...,1,0,0,1,0,0,1,0,1,0


# Models

Get Parameters from "MarketingCampaign_ParameterTuning"

In [6]:
knn_model = KNeighborsClassifier(metric='manhattan', n_neighbors=4, weights='distance')
svm_model = SVC(kernel= 'rbf')
dt_model = DecisionTreeClassifier(criterion='gini', max_depth= 9, min_samples_leaf= 1, min_samples_split = 8)
rf_model = RandomForestClassifier(bootstrap= True, max_depth=110, max_features= 8, min_samples_leaf= 3, min_samples_split = 8, n_estimators = 100)
mlp_model = MLPClassifier(activation='tanh', alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate='adaptive', solver='adam')
gde_model = GradientBoostingClassifier(learning_rate=0.1, max_depth=9, n_estimators=500)

models = [knn_model, svm_model, dt_model, rf_model, mlp_model, gde_model]

model_names = []

for mod in models:
    model_names.append(type(mod).__name__)

In [7]:
stats = Train_Models_CV10 (preprocessed_df, target, models)

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 6
Fold 7
Fold 8
Fold 9
Fold 10


In [8]:
print(stats)

{'KNeighborsClassifier': [0.8706896551724138, 0.8793103448275862, 0.9008620689655172, 0.9094827586206896, 0.8793103448275862, 0.8663793103448276, 0.8528138528138528, 0.8484848484848485, 0.8831168831168831, 0.8658008658008658], 'SVC': [0.8405172413793104, 0.9094827586206896, 0.8879310344827587, 0.9267241379310345, 0.9008620689655172, 0.9181034482758621, 0.8831168831168831, 0.8614718614718615, 0.8831168831168831, 0.9134199134199135], 'DecisionTreeClassifier': [0.8448275862068966, 0.9051724137931034, 0.8706896551724138, 0.9353448275862069, 0.853448275862069, 0.9267241379310345, 0.8614718614718615, 0.8744588744588745, 0.8484848484848485, 0.8744588744588745], 'RandomForestClassifier': [0.875, 0.9396551724137931, 0.9181034482758621, 0.9525862068965517, 0.8879310344827587, 0.9267241379310345, 0.8874458874458875, 0.8831168831168831, 0.9264069264069265, 0.935064935064935], 'MLPClassifier': [0.8663793103448276, 0.9181034482758621, 0.896551724137931, 0.9051724137931034, 0.8879310344827587, 0.875,

# Overall Results

In [11]:
# To copy-paste into the report

outputString = ""
print("      KNN       SVM       DT         RF        MLP       GDE")
for i in range (10):
    outputString = outputString+str(i+1)+' & %.5f & %.5f & %.5f & %.5f & %.5f & %.5f \\\ \\hline \n' % (stats[model_names[0]][i],stats[model_names[1]][i],stats[model_names[2]][i],stats[model_names[3]][i],stats[model_names[4]][i],stats[model_names[5]][i])
    
outputString = outputString+'avg'+' & %.5f & %.5f & %.5f & %.5f & %.5f & %.5f \\\ \\hline \n' % (mean(stats[model_names[0]]),mean(stats[model_names[1]]),mean(stats[model_names[2]]),mean(stats[model_names[3]]),mean(stats[model_names[4]]),mean(stats[model_names[5]]))
outputString = outputString+'std'+' & %.5f & %.5f & %.5f & %.5f & %.5f & %.5f \\\ \\hline \n' % (np.std(stats[model_names[0]]),np.std(stats[model_names[1]]),np.std(stats[model_names[2]]),np.std(stats[model_names[3]]),np.std(stats[model_names[4]]),np.std(stats[model_names[5]]))
    
print(outputString)

      KNN       SVM       DT         RF        MLP       GDE
1 & 0.87069 & 0.84052 & 0.84483 & 0.87500 & 0.86638 & 0.91379 \\ \hline 
2 & 0.87931 & 0.90948 & 0.90517 & 0.93966 & 0.91810 & 0.96983 \\ \hline 
3 & 0.90086 & 0.88793 & 0.87069 & 0.91810 & 0.89655 & 0.93966 \\ \hline 
4 & 0.90948 & 0.92672 & 0.93534 & 0.95259 & 0.90517 & 0.96983 \\ \hline 
5 & 0.87931 & 0.90086 & 0.85345 & 0.88793 & 0.88793 & 0.90517 \\ \hline 
6 & 0.86638 & 0.91810 & 0.92672 & 0.92672 & 0.87500 & 0.92672 \\ \hline 
7 & 0.85281 & 0.88312 & 0.86147 & 0.88745 & 0.87879 & 0.90043 \\ \hline 
8 & 0.84848 & 0.86147 & 0.87446 & 0.88312 & 0.87013 & 0.91775 \\ \hline 
9 & 0.88312 & 0.88312 & 0.84848 & 0.92641 & 0.88312 & 0.93939 \\ \hline 
10 & 0.86580 & 0.91342 & 0.87446 & 0.93506 & 0.91775 & 0.93074 \\ \hline 
avg & 0.87563 & 0.89247 & 0.87951 & 0.91320 & 0.88989 & 0.93133 \\ \hline 
std & 0.01825 & 0.02547 & 0.03050 & 0.02603 & 0.01785 & 0.02291 \\ \hline 



In [10]:
SavePredictionsToFile("../accuracy/accuracyMarketingCampaign.csv", stats)