In [137]:
import pandas as pd
import numpy as np
import pickle
from pathlib import Path
from datetime import datetime
from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
pd.options.display.max_columns=None

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.feature_selection import SequentialFeatureSelector
import xgboost as xgb

from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier 
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import plot_importance

from imblearn.over_sampling import SMOTE
from sklearn.utils import resample

In [66]:
model_path = './models/stratified/2025-03-24_00_36_18_162408'
grid_search_model_file = 'grid_search_Sale_CC_xgboost.pkl'
#final_model_file = 'Sale_CL_svm.pkl'
file_toopen = model_path+'/'+grid_search_model_file

In [67]:
with open(file_toopen,'rb') as f:
        grid_search = pickle.load(f)
cv_scores = grid_search.cv_results_['mean_test_score']

In [68]:
cv_res = pd.DataFrame(cv_scores,columns=['cv_scores'])

In [69]:
tmp =cv_res.describe()
print("mean cv : ",tmp.loc['mean',])
print("cv variation : ",100*tmp.loc['std',]/tmp.loc['mean',])

mean cv :  cv_scores    0.83838
Name: mean, dtype: float64
cv variation :  cv_scores    5.688346
dtype: float64


In [75]:
cv_res.describe([0.05,0.1,0.15,0.2,0.25,0.5,0.75,1])

Unnamed: 0,cv_scores
count,192.0
mean,0.83838
std,0.04769
min,0.679536
5%,0.728129
10%,0.750851
15%,0.788653
20%,0.812838
25%,0.823498
50%,0.855979


## Function to take test data as input for prediction for unlabeled cases

In [127]:
data = pd.read_pickle('data_for_modeling.pkl')

In [78]:
display(data.head())
print(data.shape)
print(data.columns)

Unnamed: 0,Client,Sex,Age,Tenure,Count_CA,Count_SA,Count_MF,Count_OVD,Count_CC,Count_CL,ActBal_CA,ActBal_SA,ActBal_MF,ActBal_OVD,ActBal_CC,ActBal_CL,VolumeCred,VolumeCred_CA,TransactionsCred,TransactionsCred_CA,VolumeDeb,VolumeDeb_CA,VolumeDebCash_Card,VolumeDebCashless_Card,VolumeDeb_PaymentOrder,TransactionsDeb,TransactionsDeb_CA,TransactionsDebCash_Card,TransactionsDebCashless_Card,TransactionsDeb_PaymentOrder,Sale_MF,Sale_CC,Sale_CL,Revenue_MF,Revenue_CC,Revenue_CL
0,1217,M,38,165,1,0.0,0.0,0.0,0.0,0.0,6752.244643,0.0,0.0,0.0,0.0,0.0,0.002143,0.002143,1,1,714.285714,714.285714,0.0,0.0,714.285714,1,1,0,0,1,0,0,0,0.0,0.0,0.0
1,850,F,49,44,1,0.0,0.0,0.0,0.0,0.0,43.523214,0.0,0.0,0.0,0.0,0.0,1392.402857,1392.402857,3,3,1226.345357,1226.345357,0.0,0.0,121.928571,6,6,0,0,1,0,1,0,0.0,0.893929,0.0
3,1038,M,29,106,1,0.0,0.0,0.0,0.0,0.0,27.035714,0.0,0.0,0.0,0.0,0.0,0.006071,0.006071,1,1,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0
4,225,M,14,187,1,0.0,0.0,1.0,0.0,0.0,345.686071,0.0,0.0,618.3975,0.0,0.0,0.0075,0.0075,1,1,130.521429,130.521429,0.0,111.235714,19.285714,5,5,0,4,1,0,0,1,0.0,0.0,8.130714
5,827,M,57,50,1,0.0,0.0,1.0,1.0,0.0,49.193571,0.0,0.0,0.0,0.464286,0.0,1755.281786,1750.404286,9,6,1474.321429,1455.035714,607.142857,17.857143,843.25,26,23,4,1,17,0,1,1,0.0,7.928571,12.242143


(736, 36)
Index(['Client', 'Sex', 'Age', 'Tenure', 'Count_CA', 'Count_SA', 'Count_MF',
       'Count_OVD', 'Count_CC', 'Count_CL', 'ActBal_CA', 'ActBal_SA',
       'ActBal_MF', 'ActBal_OVD', 'ActBal_CC', 'ActBal_CL', 'VolumeCred',
       'VolumeCred_CA', 'TransactionsCred', 'TransactionsCred_CA', 'VolumeDeb',
       'VolumeDeb_CA', 'VolumeDebCash_Card', 'VolumeDebCashless_Card',
       'VolumeDeb_PaymentOrder', 'TransactionsDeb', 'TransactionsDeb_CA',
       'TransactionsDebCash_Card', 'TransactionsDebCashless_Card',
       'TransactionsDeb_PaymentOrder', 'Sale_MF', 'Sale_CC', 'Sale_CL',
       'Revenue_MF', 'Revenue_CC', 'Revenue_CL'],
      dtype='object')


In [128]:
data_without_labels = data.iloc[np.where((data.Sale_MF+data.Sale_CC+data.Sale_CL)==0)[0],].copy()

In [80]:
data_without_labels.shape

(318, 36)

In [81]:
print(data_without_labels.columns)

Index(['Client', 'Sex', 'Age', 'Tenure', 'Count_CA', 'Count_SA', 'Count_MF',
       'Count_OVD', 'Count_CC', 'Count_CL', 'ActBal_CA', 'ActBal_SA',
       'ActBal_MF', 'ActBal_OVD', 'ActBal_CC', 'ActBal_CL', 'VolumeCred',
       'VolumeCred_CA', 'TransactionsCred', 'TransactionsCred_CA', 'VolumeDeb',
       'VolumeDeb_CA', 'VolumeDebCash_Card', 'VolumeDebCashless_Card',
       'VolumeDeb_PaymentOrder', 'TransactionsDeb', 'TransactionsDeb_CA',
       'TransactionsDebCash_Card', 'TransactionsDebCashless_Card',
       'TransactionsDeb_PaymentOrder', 'Sale_MF', 'Sale_CC', 'Sale_CL',
       'Revenue_MF', 'Revenue_CC', 'Revenue_CL'],
      dtype='object')


## Run regression model on this data

In [107]:
cols_to_drop=['Sale_MF', 'Sale_CC', 'Sale_CL','Revenue_MF', 'Revenue_CC', 'Revenue_CL']

In [108]:
data_without_labels.drop(columns=cols_to_drop,inplace=True)

In [90]:
data_without_labels.columns

Index(['Client', 'Sex', 'Age', 'Tenure', 'Count_CA', 'Count_SA', 'Count_MF',
       'Count_OVD', 'Count_CC', 'Count_CL', 'ActBal_CA', 'ActBal_SA',
       'ActBal_MF', 'ActBal_OVD', 'ActBal_CC', 'ActBal_CL', 'VolumeCred',
       'VolumeCred_CA', 'TransactionsCred', 'TransactionsCred_CA', 'VolumeDeb',
       'VolumeDeb_CA', 'VolumeDebCash_Card', 'VolumeDebCashless_Card',
       'VolumeDeb_PaymentOrder', 'TransactionsDeb', 'TransactionsDeb_CA',
       'TransactionsDebCash_Card', 'TransactionsDebCashless_Card',
       'TransactionsDeb_PaymentOrder'],
      dtype='object')

In [109]:
# Encode categorical variable
cat_cols_to_encode = ['Sex']
data_encoded = pd.get_dummies(data_without_labels, columns=cat_cols_to_encode)

In [94]:
model_features =  ['ActBal_CC', 'TransactionsDeb', 'TransactionsCred', 'Count_MF', 'Sex_F',
       'VolumeDeb_CA', 'ActBal_MF', 'TransactionsDebCashless_Card',
       'VolumeCred_CA', 'VolumeDeb_PaymentOrder', 'ActBal_CL', 'VolumeCred',
       'Sex_M', 'TransactionsDeb_CA', 'VolumeDeb']

In [110]:
data_encoded.set_index('Client',inplace=True)
#data_encoded = data_encoded[model_features]

In [100]:
data_encoded.head()

Unnamed: 0_level_0,ActBal_CC,TransactionsDeb,TransactionsCred,Count_MF,Sex_F,VolumeDeb_CA,ActBal_MF,TransactionsDebCashless_Card,VolumeCred_CA,VolumeDeb_PaymentOrder,ActBal_CL,VolumeCred,Sex_M,TransactionsDeb_CA,VolumeDeb
Client,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1217,0.0,1,1,0.0,0,714.285714,0.0,0,0.002143,714.285714,0.0,0.002143,1,1,714.285714
1038,0.0,0,1,0.0,0,0.0,0.0,0,0.006071,0.0,0.0,0.006071,1,0,0.0
1219,0.0,13,2,0.0,0,368.492857,0.0,3,347.322143,246.321429,0.0,347.322143,1,13,368.492857
461,0.0,18,5,0.0,1,1590.598214,0.0,3,1463.800714,719.964286,0.0,1463.916071,0,18,1590.598214
1048,0.0,3,2,0.0,1,541.25,0.0,0,729.275714,0.0,0.0,729.275714,0,3,541.25


In [169]:
numeric_cols_to_standardize = ['Age', 'Tenure', 'Count_CA', 'Count_SA', 'Count_MF',
       'Count_OVD', 'Count_CC', 'Count_CL', 'ActBal_CA', 'ActBal_SA',
       'ActBal_MF', 'ActBal_OVD', 'ActBal_CC', 'ActBal_CL', 'VolumeCred',
       'VolumeCred_CA', 'TransactionsCred', 'TransactionsCred_CA', 'VolumeDeb',
       'VolumeDeb_CA', 'VolumeDebCash_Card', 'VolumeDebCashless_Card',
       'VolumeDeb_PaymentOrder', 'TransactionsDeb', 'TransactionsDeb_CA',
       'TransactionsDebCash_Card', 'TransactionsDebCashless_Card',
       'TransactionsDeb_PaymentOrder']

False

In [113]:
# Load the scalar
file_toopen = './models/Regression/scaler.pkl'
with open(file_toopen,'rb') as f:
        scaler = pickle.load(f)
data_encoded[numeric_cols_to_standardize] = scaler.transform(data_encoded[numeric_cols_to_standardize])

In [116]:
data_selected = data_encoded.loc[:, model_features]

In [124]:
data_selected.head()

Unnamed: 0,Client,ActBal_CC,TransactionsDeb,TransactionsCred,Count_MF,Sex_F,VolumeDeb_CA,ActBal_MF,TransactionsDebCashless_Card,VolumeCred_CA,VolumeDeb_PaymentOrder,ActBal_CL,VolumeCred,Sex_M,TransactionsDeb_CA,VolumeDeb,Predicted_revenue
0,1217,-0.190496,-0.946779,-0.727636,-0.173651,0,-0.108203,-0.216118,-0.676363,-0.899303,0.929591,-0.221113,-0.870508,1,-1.018349,-0.190698,11.283205
1,1038,-0.190496,-1.010282,-0.727636,-0.173651,0,-0.973017,-0.216118,-0.676363,-0.899299,-0.700439,-0.221113,-0.870505,1,-1.09964,-0.895385,11.667446
2,1219,-0.190496,-0.184737,-0.566048,-0.173651,0,-0.526868,-0.216118,-0.287513,-0.525633,-0.138323,-0.221113,-0.543142,1,-0.042862,-0.531844,11.169191
3,461,-0.190496,0.13278,-0.081284,-0.173651,1,0.952782,-0.216118,-0.287513,0.675547,0.94255,-0.221113,0.509302,0,0.363591,0.673837,12.039372
4,1048,-0.190496,-0.819772,-0.566048,-0.173651,1,-0.317704,-0.216118,-0.676363,-0.114703,-0.700439,-0.221113,-0.183133,0,-0.855768,-0.361409,12.568632


In [119]:
# Load model
file_toopen = './models/Regression/best_model.pkl'
with open(file_toopen,'rb') as f:
        model = pickle.load(f)
# make predictions
predicted_revenue = model.predict(data_selected)

In [121]:
data_selected['Predicted_revenue'] = predicted_revenue

In [123]:
data_selected.reset_index(inplace=True,drop=False)

In [125]:
data_selected.Predicted_revenue.describe()

count    318.000000
mean      11.450184
std        1.038723
min        4.812787
25%       11.189282
50%       11.623941
75%       11.952269
max       15.352900
Name: Predicted_revenue, dtype: float64

In [126]:
data_selected.to_csv('./models/Regression/PredictedRevenues.csv')

## Run classification models on this data

In [129]:
cols_to_drop=['Sale_MF', 'Sale_CC', 'Sale_CL','Revenue_MF', 'Revenue_CC', 'Revenue_CL','VolumeCred_CA','TransactionsCred_CA','VolumeDeb_CA','TransactionsDeb_CA']

In [130]:
numeric_cols_to_standardize = ['Age', 'Tenure', 'Count_CA', 'Count_SA', 'Count_MF',
       'Count_OVD', 'Count_CC', 'Count_CL', 'ActBal_CA', 'ActBal_SA',
       'ActBal_MF', 'ActBal_OVD', 'ActBal_CC', 'ActBal_CL', 'VolumeCred',
       'TransactionsCred', 'VolumeDeb', 'VolumeDebCash_Card', 'VolumeDebCashless_Card',
       'VolumeDeb_PaymentOrder', 'TransactionsDeb','TransactionsDebCash_Card', 'TransactionsDebCashless_Card',
       'TransactionsDeb_PaymentOrder']
cat_cols_to_encode=['Sex']

In [131]:
data_without_labels.drop(columns=cols_to_drop,inplace=True)

In [132]:
data_without_labels.head()

Unnamed: 0,Client,Sex,Age,Tenure,Count_CA,Count_SA,Count_MF,Count_OVD,Count_CC,Count_CL,ActBal_CA,ActBal_SA,ActBal_MF,ActBal_OVD,ActBal_CC,ActBal_CL,VolumeCred,TransactionsCred,VolumeDeb,VolumeDebCash_Card,VolumeDebCashless_Card,VolumeDeb_PaymentOrder,TransactionsDeb,TransactionsDebCash_Card,TransactionsDebCashless_Card,TransactionsDeb_PaymentOrder
0,1217,M,38,165,1,0.0,0.0,0.0,0.0,0.0,6752.244643,0.0,0.0,0.0,0.0,0.0,0.002143,1,714.285714,0.0,0.0,714.285714,1,0,0,1
3,1038,M,29,106,1,0.0,0.0,0.0,0.0,0.0,27.035714,0.0,0.0,0.0,0.0,0.0,0.006071,1,0.0,0.0,0.0,0.0,0,0,0,0
9,1219,M,33,19,1,0.0,0.0,0.0,0.0,0.0,5164.206429,0.0,0.0,0.0,0.0,0.0,347.322143,2,368.492857,71.428571,19.671429,246.321429,13,2,3,5
12,461,F,17,64,1,1.0,0.0,0.0,0.0,0.0,6.537857,6128.227857,0.0,0.0,0.0,0.0,1463.916071,5,1590.598214,0.0,340.751786,719.964286,18,0,3,12
13,1048,F,65,71,1,0.0,0.0,1.0,0.0,0.0,1911.491429,0.0,0.0,0.0,0.0,0.0,729.275714,2,541.25,535.714286,0.0,0.0,3,1,0,0


In [133]:
data_encoded = pd.get_dummies(data_without_labels, columns=cat_cols_to_encode)

### Building for MF

In [134]:
data_encoded_MF = data_encoded.copy()

In [135]:
# Load the scalar
folder_path = './models/Classification/Sale_MF/'
file_toopen = 'scaler.pkl'
with open(folder_path+file_toopen,'rb') as f:
        scaler = pickle.load(f)
data_encoded_MF[numeric_cols_to_standardize] = scaler.transform(data_encoded_MF[numeric_cols_to_standardize])

In [138]:
# Load the model
file_toopen = 'Sale_MF_xgboost_model.json'
loaded_xgb_model = xgb.XGBClassifier()
loaded_xgb_model.load_model(folder_path+file_toopen)

In [140]:
data_encoded_MF.set_index('Client',inplace=True)

In [143]:
xgboost_column_order = ['ActBal_SA', 'VolumeCred', 'VolumeDebCash_Card', 'VolumeDeb_PaymentOrder', 'ActBal_CC', 'Count_CC', 'TransactionsDeb', 'ActBal_MF', 'Count_MF', 'TransactionsDebCashless_Card', 'TransactionsDebCash_Card', 'Count_CL', 'Sex_F', 'Count_OVD', 'Sex_M', 'TransactionsDeb_PaymentOrder', 'Age', 'ActBal_CL', 'VolumeDeb', 'VolumeDebCashless_Card', 'ActBal_CA', 'Count_CA', 'Tenure', 'ActBal_OVD', 'TransactionsCred', 'Count_SA']

In [144]:
MF_predictions = loaded_xgb_model.predict(data_encoded_MF[xgboost_column_order])

In [146]:
data_encoded_MF['Sale_MF']=MF_predictions

In [147]:
data_encoded_MF.reset_index(inplace=True,drop=False)

In [148]:
data_encoded_MF.head()

Unnamed: 0,Client,Age,Tenure,Count_CA,Count_SA,Count_MF,Count_OVD,Count_CC,Count_CL,ActBal_CA,ActBal_SA,ActBal_MF,ActBal_OVD,ActBal_CC,ActBal_CL,VolumeCred,TransactionsCred,VolumeDeb,VolumeDebCash_Card,VolumeDebCashless_Card,VolumeDeb_PaymentOrder,TransactionsDeb,TransactionsDebCash_Card,TransactionsDebCashless_Card,TransactionsDeb_PaymentOrder,Sex_F,Sex_M,Sale_MF
0,1217,-0.151879,0.847268,-0.276266,-0.478657,-0.16359,-0.621202,-0.347571,-0.299283,3.274917,-0.341393,-0.21421,-0.234645,-0.206853,-0.226233,-0.908844,-0.769182,-0.205144,-0.664825,-0.587701,0.906674,-0.980229,-0.702096,-0.663151,-0.67995,0,1,0
1,1038,-0.639644,-0.04154,-0.276266,-0.478657,-0.16359,-0.621202,-0.347571,-0.299283,-0.64129,-0.341393,-0.21421,-0.234645,-0.206853,-0.226233,-0.90884,-0.769182,-0.897298,-0.664825,-0.587701,-0.727652,-1.046051,-0.702096,-0.663151,-0.896858,0,1,1
2,1219,-0.42286,-1.352156,-0.276266,-0.478657,-0.16359,-0.621202,-0.347571,-0.299283,2.350175,-0.341393,-0.21421,-0.234645,-0.206853,-0.226233,-0.576129,-0.600939,-0.540223,-0.446635,-0.489036,-0.164055,-0.19037,-0.064868,-0.266685,0.187684,0,1,0
3,461,-1.289998,-0.674251,-0.276266,1.470995,-0.16359,-0.621202,-0.347571,-0.299283,-0.653226,3.697771,-0.21421,-0.234645,-0.206853,-0.226233,0.49351,-0.096211,0.644016,-0.664825,1.12139,0.919666,0.138738,-0.702096,-0.266685,1.706044,1,0,0
4,1048,1.311416,-0.568799,-0.276266,-0.478657,-0.16359,1.609782,-0.347571,-0.299283,0.456062,-0.341393,-0.21421,-0.234645,-0.206853,-0.226233,-0.210238,-0.600939,-0.372819,0.9716,-0.587701,-0.727652,-0.848586,-0.383482,-0.663151,-0.896858,1,0,1


In [149]:
data_encoded_MF.to_csv(folder_path+'Predictions_sale_MF.csv')

### Building for CC

In [151]:
data_encoded_CC = data_encoded.copy()

In [152]:
# Load the scalar
folder_path = './models/Classification/Sale_CC/'
file_toopen = 'scaler.pkl'
with open(folder_path+file_toopen,'rb') as f:
        scaler = pickle.load(f)
data_encoded_CC[numeric_cols_to_standardize] = scaler.transform(data_encoded_CC[numeric_cols_to_standardize])

In [153]:
# Load the model
file_toopen = 'Sale_CC_xgboost_model.json'
loaded_xgb_model = xgb.XGBClassifier()
loaded_xgb_model.load_model(folder_path+file_toopen)

In [154]:
data_encoded_CC.set_index('Client',inplace=True)

In [155]:
xgboost_column_order = ['ActBal_SA', 'VolumeCred', 'VolumeDebCash_Card', 'VolumeDeb_PaymentOrder', 'ActBal_CC', 'Count_CC', 'TransactionsDeb', 'ActBal_MF', 'Count_MF', 'TransactionsDebCashless_Card', 'TransactionsDebCash_Card', 'Count_CL', 'Sex_F', 'Count_OVD', 'Sex_M', 'TransactionsDeb_PaymentOrder', 'Age', 'ActBal_CL', 'VolumeDeb', 'VolumeDebCashless_Card', 'ActBal_CA', 'Count_CA', 'Tenure', 'ActBal_OVD', 'TransactionsCred', 'Count_SA']

In [156]:
CC_predictions = loaded_xgb_model.predict(data_encoded_CC[xgboost_column_order])

In [157]:
data_encoded_CC['Sale_CC']=CC_predictions

In [158]:
data_encoded_CC.reset_index(inplace=True,drop=False)
data_encoded_CC.head()

Unnamed: 0,Client,Age,Tenure,Count_CA,Count_SA,Count_MF,Count_OVD,Count_CC,Count_CL,ActBal_CA,ActBal_SA,ActBal_MF,ActBal_OVD,ActBal_CC,ActBal_CL,VolumeCred,TransactionsCred,VolumeDeb,VolumeDebCash_Card,VolumeDebCashless_Card,VolumeDeb_PaymentOrder,TransactionsDeb,TransactionsDebCash_Card,TransactionsDebCashless_Card,TransactionsDeb_PaymentOrder,Sex_F,Sex_M,Sale_CC
0,1217,-0.182683,0.845548,-0.250376,-0.470764,-0.179349,-0.607332,-0.33665,-0.296246,2.890179,-0.348975,-0.208101,-0.227724,-0.199689,-0.228696,-0.876937,-0.74312,-0.165915,-0.66,-0.618694,0.944452,-0.940391,-0.71479,-0.662251,-0.656065,0,1,1
1,1038,-0.676075,-0.037086,-0.250376,-0.470764,-0.179349,-0.607332,-0.33665,-0.296246,-0.629706,-0.348975,-0.208101,-0.227724,-0.199689,-0.228696,-0.876933,-0.74312,-0.923744,-0.66,-0.618694,-0.701094,-1.003284,-0.71479,-0.662251,-0.878077,0,1,0
2,1219,-0.45679,-1.338597,-0.250376,-0.470764,-0.179349,-0.607332,-0.33665,-0.296246,2.059021,-0.348975,-0.208101,-0.227724,-0.199689,-0.228696,-0.547288,-0.579506,-0.532788,-0.444264,-0.520051,-0.133628,-0.185668,-0.082367,-0.290916,0.231982,0,1,0
3,461,-1.333931,-0.665402,-0.250376,1.519554,-0.179349,-0.607332,-0.33665,-0.296246,-0.640434,3.395644,-0.208101,-0.227724,-0.199689,-0.228696,0.512496,-0.088665,0.763817,-0.66,1.090015,0.957534,0.1288,-0.71479,-0.290916,1.786065,1,0,0
4,1048,1.297493,-0.560683,-0.250376,-0.470764,-0.179349,1.646545,-0.33665,-0.296246,0.356593,-0.348975,-0.208101,-0.227724,-0.199689,-0.228696,-0.184767,-0.579506,-0.349499,0.958019,-0.618694,-0.701094,-0.814604,-0.398578,-0.662251,-0.878077,1,0,0


In [159]:
data_encoded_CC.to_csv(folder_path+'Predictions_sale_CC.csv')

### Building for CL

In [160]:
data_encoded_CL = data_encoded.copy()

In [161]:
# Load the scalar
folder_path = './models/Classification/Sale_CL/'
file_toopen = 'scaler.pkl'
with open(folder_path+file_toopen,'rb') as f:
        scaler = pickle.load(f)
data_encoded_CL[numeric_cols_to_standardize] = scaler.transform(data_encoded_CL[numeric_cols_to_standardize])

In [162]:
# Load the model
file_toopen = 'Sale_CL_xgboost_model.json'
loaded_xgb_model = xgb.XGBClassifier()
loaded_xgb_model.load_model(folder_path+file_toopen)

In [163]:
data_encoded_CL.set_index('Client',inplace=True)

In [164]:
xgboost_column_order = ['ActBal_SA', 'VolumeCred', 'VolumeDebCash_Card', 'VolumeDeb_PaymentOrder', 'ActBal_CC', 'Count_CC', 'TransactionsDeb', 'ActBal_MF', 'Count_MF', 'TransactionsDebCashless_Card', 'TransactionsDebCash_Card', 'Count_CL', 'Sex_F', 'Count_OVD', 'Sex_M', 'TransactionsDeb_PaymentOrder', 'Age', 'ActBal_CL', 'VolumeDeb', 'VolumeDebCashless_Card', 'ActBal_CA', 'Count_CA', 'Tenure', 'ActBal_OVD', 'TransactionsCred', 'Count_SA']

In [165]:
CL_predictions = loaded_xgb_model.predict(data_encoded_CL[xgboost_column_order])

In [166]:
data_encoded_CL['Sale_CL']=CL_predictions

In [167]:
data_encoded_CL.reset_index(inplace=True,drop=False)
data_encoded_CL.head()

Unnamed: 0,Client,Age,Tenure,Count_CA,Count_SA,Count_MF,Count_OVD,Count_CC,Count_CL,ActBal_CA,ActBal_SA,ActBal_MF,ActBal_OVD,ActBal_CC,ActBal_CL,VolumeCred,TransactionsCred,VolumeDeb,VolumeDebCash_Card,VolumeDebCashless_Card,VolumeDeb_PaymentOrder,TransactionsDeb,TransactionsDebCash_Card,TransactionsDebCashless_Card,TransactionsDeb_PaymentOrder,Sex_F,Sex_M,Sale_CL
0,1217,-0.201989,0.856457,-0.261832,-0.474815,-0.234571,-0.625833,-0.347571,-0.27232,3.089319,-0.341032,-0.21642,-0.231193,-0.205397,-0.197389,-0.881738,-0.73353,-0.213904,-0.639547,-0.622229,0.855818,-0.935019,-0.706549,-0.675067,-0.67704,0,1,1
1,1038,-0.684644,-0.013958,-0.261832,-0.474815,-0.234571,-0.625833,-0.347571,-0.27232,-0.661882,-0.341032,-0.21642,-0.231193,-0.205397,-0.197389,-0.881734,-0.73353,-0.89778,-0.639547,-0.622229,-0.717345,-0.996909,-0.706549,-0.675067,-0.889569,0,1,0
2,1219,-0.470131,-1.29745,-0.261832,-0.474815,-0.234571,-0.625833,-0.347571,-0.27232,2.20354,-0.341032,-0.21642,-0.231193,-0.205397,-0.197389,-0.548132,-0.567878,-0.544975,-0.423736,-0.526611,-0.17484,-0.19234,-0.037081,-0.301562,0.173078,0,1,1
3,461,-1.328183,-0.633575,-0.261832,1.61187,-0.234571,-0.625833,-0.347571,-0.27232,-0.673316,3.621338,-0.21642,-0.231193,-0.205397,-0.197389,0.524371,-0.070923,0.625101,-0.639547,1.03408,0.868325,0.117109,-0.706549,-0.301562,1.660784,1,0,1
4,1048,1.245975,-0.530305,-0.261832,-0.474815,-0.234571,1.597871,-0.347571,-0.27232,0.389233,-0.341032,-0.21642,-0.231193,-0.205397,-0.197389,-0.181261,-0.567878,-0.379573,0.979034,-0.622229,-0.717345,-0.811239,-0.371815,-0.675067,-0.889569,1,0,0


In [168]:
data_encoded_CL.to_csv(folder_path+'Predictions_sale_CL.csv')