In [419]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [420]:
import sys
sys.path.insert(0,"Loading/")
sys.path.insert(0,"Preprocessing/")
sys.path.insert(0,"Modeling/")
sys.path.insert(0,"Evaluation/")
sys.path.insert(0,"Interpretability/")
sys.path.insert(0,"Monitoring/")
sys.path.insert(0,"Utils/")


import loading
import preprocessing
import modeling
import evaluation
import interpretability
import monitoring 
import utils as u

import json 
import pandas as pd
import numpy as np

#Package allowing to reload a package (or a script), without having to reload the whole notebook
#import importlib #no use if the magic commands in the first cell are working

In [421]:
# Here we find the parameters needed for the run. You have to launch this cell only if you do not launch the main script.

path_conf ="../params/conf/conf.json"

# path_conf ='../conf/conf.json'
conf = json.load(open(path_conf, 'r'))

path_log = conf['path_log'] # "../log/my_log_file.txt"
log_level = conf['log_level'] # "DEBUG"

# Be careful to launch the logger only once, otherwise each lines will be duplicated
logger = u.my_get_logger(path_log, log_level, my_name="main_logger")

In [422]:
#Reload of the conf file (useful when you do modifications)
conf = json.load(open(path_conf, 'r'))

## Loading and Preprocessing

In [423]:
#Reading of the dataset selected in the conf file
df = loading.read_csv_from_name(conf)

In [424]:
df.columns

Index(['Unnamed: 0', 'ID', 'Year_Birth', 'Education', 'Marital_Status',
       ' Income ', 'Kidhome', 'Teenhome', 'Dt_Customer', 'Recency', 'MntWines',
       'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases',
       'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth',
       'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1',
       'AcceptedCmp2', 'Response', 'Complain', 'Country'],
      dtype='object')

In [425]:
#Preprocessing of the selected dataset
df_preprocessed, X_columns, y_column = preprocessing.main_preprocessing_from_name(df,conf)

#Writting of the preprocessed dataset
loading.write_preprocessed_csv_from_name(df_preprocessed,conf)

'OK'

In [426]:
#Basic Splitting between train and test
X_train, X_test, y_train, y_test = preprocessing.basic_split( df_preprocessed , 0.25 , X_columns, y_column)

## Modeling

In [427]:
#Modelisation using the model selected in the conf file
clf, best_params = modeling.main_modeling_from_name(X_train,y_train,conf)

#Saving the model
u.save_model(clf, conf)

Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  12 | elapsed:    5.8s remaining:    1.1s
[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:    5.8s finished


'OK'

## Evaluation

In [428]:
#Independent step from the other, we reload what we need:

#Loading of the model
clf = u.load_model(conf)
#Loading of the preprocessed dataset
df = loading.load_preprocessed_csv_from_name(conf)

#Basic Splitting:
y_column = u.get_y_column_from_conf(conf)
X_columns = [x for x in df.columns if x != y_column ]
X_train, X_test, y_train, y_test = preprocessing.basic_split( df , 0.25 , X_columns, y_column)

../Outputs/Models/marketing_random_forest.sav


In [429]:
#Computing metrics
dict_metrics = evaluation.main_evaluation(clf, X_test, y_test, conf)
dict_metrics

{'f1_score': 0.41379310344827586,
 'accuracy': 0.875,
 'recall': 0.32432432432432434,
 'precision': 0.5714285714285714,
 'confusion_matrix': {'tn': 226, 'fp': 9, 'fn': 25, 'tp': 12}}

In [430]:
# Here we find the parameters needed for the run. You have to launch this cell only if you do not launch the main script.

path_conf ="../params/conf/conf_2.json"
conf = json.load(open(path_conf, 'r'))

path_log = conf['path_log'] # "../log/my_log_file.txt"
log_level = conf['log_level'] # "DEBUG"

# Be careful to launch the logger only once, otherwise each lines will be duplicated
logger = u.my_get_logger(path_log, log_level, my_name="main_logger")

In [431]:
#Reload of the conf file (useful when you do modifications)
conf = json.load(open(path_conf, 'r'))

## Loading and Preprocessing

In [432]:

conf["paths"]["Inputs_path"]+ conf["dict_info_files"][conf['selected_dataset']]["path_file"]

'../Inputs/marketing_data_2.csv'

In [433]:
#Reading of the dataset selected in the conf file
df2 = loading.read_csv_from_name(conf)

In [434]:
#Preprocessing of the selected dataset
df2_preprocessed, X_columns, y_column = preprocessing.main_preprocessing_from_name(df2,conf)

#Writting of the preprocessed dataset
loading.write_preprocessed_csv_from_name(df2_preprocessed,conf)

'OK'

## Evaluation

In [435]:
#Independent step from the other, we reload what we need:

#Loading of the model
clf = u.load_model(conf,name="marketing_random_forest")
#Loading of the preprocessed dataset
df2 = loading.load_preprocessed_csv_from_name(conf)

#Basic Splitting:
y_column = u.get_y_column_from_conf(conf)
X_columns = [x for x in df.columns if x != y_column ]


../Outputs/Models/marketing_random_forest.sav


In [445]:
#Computing metrics
batches= monitoring.create_batches(df2,4)
for batch in batches:
    y_monitored= batch[y_column]
    X_monitored= batch.drop(y_column,axis=1)
    dict_metrics = evaluation.main_evaluation(clf, X_monitored, y_monitored, conf)
    print(dict_metrics)


{'f1_score': 0.4222222222222222, 'accuracy': 0.8200692041522492, 'recall': 0.2878787878787879, 'precision': 0.7916666666666666, 'confusion_matrix': {'tn': 218, 'fp': 5, 'fn': 47, 'tp': 19}}
{'f1_score': 0.23728813559322037, 'accuracy': 0.84375, 'recall': 0.1590909090909091, 'precision': 0.4666666666666667, 'confusion_matrix': {'tn': 236, 'fp': 8, 'fn': 37, 'tp': 7}}
{'f1_score': 0.38095238095238093, 'accuracy': 0.9097222222222222, 'recall': 0.25, 'precision': 0.8, 'confusion_matrix': {'tn': 254, 'fp': 2, 'fn': 24, 'tp': 8}}
{'f1_score': 0.29411764705882354, 'accuracy': 0.9166666666666666, 'recall': 0.22727272727272727, 'precision': 0.4166666666666667, 'confusion_matrix': {'tn': 259, 'fp': 7, 'fn': 17, 'tp': 5}}


## Interpretabily 

In [446]:
batch= batches[0]
monitoring.main_monitoring(df,batches)

batch 1
	 ->kolmogorov_smirnov:
	 	 -NumDealsPurchases
	 	 -Response
	 	 -Complain
	 ->t_test:
	 	 -NumDealsPurchases
	 	 -Complain
	 ->levene_test:
	 	 -NumDealsPurchases
	 	 -NumStorePurchases
	 	 -Complain
**************************************** 

batch 2
	 ->kolmogorov_smirnov:
	 	 -Response
	 	 -Complain
	 ->t_test:
	 	 -Response
	 	 -Complain
	 ->levene_test:
	 	 -Teenhome
	 	 -NumDealsPurchases
	 	 -Response
	 	 -Complain
**************************************** 

batch 3
	 ->kolmogorov_smirnov:
	 	 -NumDealsPurchases
	 	 -Response
	 	 -Complain
	 ->t_test:
	 	 -Complain
	 ->levene_test:
	 	 -NumDealsPurchases
	 	 -NumStorePurchases
	 	 -Response
	 	 -Complain
**************************************** 

batch 4
	 ->kolmogorov_smirnov:
	 	 -NumDealsPurchases
	 	 -Response
	 	 -Complain
	 ->t_test:
	 	 -NumDealsPurchases
	 	 -Complain
	 ->levene_test:
	 	 -Income
	 	 -NumDealsPurchases
	 	 -NumStorePurchases
	 	 -Complain
**************************************** 



## Monitoring