In [1]:
import pandas as pd 
import numpy as np 
import re
import openpyxl
import datetime
import pickle
from functools import reduce
import os
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

## Read CSV

In [10]:
# sample customer
df_sample = pd.read_csv('sample_customer_ids.csv')
df_sample.columns = [i.lower() for i in df_sample.columns.values]

Unnamed: 0,customer_id
0,b4de0353429533924e779183adfb1cf9
1,07c32749f8e297d3231ac20c28afa537
2,07ec4f2a77cb99fd0af4495f6661fda4
3,c6ed93cbaa003d364994413cd53f2420
4,fd8a4b30b1600bec91561d00cb04f944
...,...
95,1add71a15419b5c976077637409da248
96,61139f330db231ea740f7a95fa27e2db
97,31f03194be06a4efe41e906740a52e3a
98,bee36dcd59d38d1ea86c2f6c94e369e6


## Read pickle

In [4]:
df_customer = pd.read_pickle("./trg/customer_details.pkl")

with open('./trg/model_pipeline.pkl', 'rb') as f:
    model_pipeline = pickle.load(f)

## get customer details of sample customers

In [None]:
cols_to_keep = ['age_grp','gender','country','product_type']
rename_dict = {'id':'customer_id'}
dfs = [df_sample, df_customer[cols_to_keep].rename(columns=rename_dict)]
df = reduce(lambda left,right: pd.merge(left,right,on='client_id',how='left'), dfs)

for col in ['gender','country','product_type']:
    df[col] = df[col].apply(lambda x: str(x).lower())

## Prepare eligible time to call for each country

In [32]:
## get day of week of Oct 4th to 10th

unique_dow = list(set([int(datetime.datetime(2021,10,x,0,0,0).isoweekday()) for x in range(4,11)]))

date_dict = {}
for num in range(4,11):
    key = datetime.datetime(2021,10,num,0,0,0)
    val = int(key.isoweekday())
    date_dict[key] = val

reverse_date_dict = {v: k for k, v in date_dict.items()}

## eligible time by country

# Canada: 9am to 5pm (No weekends)
# New Zealand: 9am to 6pm (No weekends)
# UK: 8am to 8pm (No weekends)

time_range_dict = {
    'ca':{'start':9,'end':17},
    'nz':{'start':9,'end':18},
    'uk':{'start':8,'end':20}
}

{1: datetime.datetime(2021, 10, 4, 0, 0),
 2: datetime.datetime(2021, 10, 5, 0, 0),
 3: datetime.datetime(2021, 10, 6, 0, 0),
 4: datetime.datetime(2021, 10, 7, 0, 0),
 5: datetime.datetime(2021, 10, 8, 0, 0),
 6: datetime.datetime(2021, 10, 9, 0, 0),
 7: datetime.datetime(2021, 10, 10, 0, 0)}

In [23]:
cols = [i for i in df.columns.values]
i=0
for idx,row in df.iterrows():
    time_start_end = time_range_dict[row['country']]
    time_range = range(time_start_end['start'],time_start_end['end']+1)
    for dow in unique_dow:
        for hour in time_range:
            df_temp = df.iloc[idx:idx+1].copy()
            df_temp['time_of_week'] = (dow*100)+hour
            if i==0:
                df_abt = df_temp.copy()
            else:
                df_abt = pd.concat([df_abt,df_temp],ignore_index=True).reset_index(drop=True)
            i=i+1

df_abt


Unnamed: 0,customer_id,age_grp,gender,country,product_type,time_of_week
0,b4de0353429533924e779183adfb1cf9,d.50up,m,uk,creditcard,108
1,b4de0353429533924e779183adfb1cf9,d.50up,m,uk,creditcard,109
2,b4de0353429533924e779183adfb1cf9,d.50up,m,uk,creditcard,110
3,b4de0353429533924e779183adfb1cf9,d.50up,m,uk,creditcard,111
4,b4de0353429533924e779183adfb1cf9,d.50up,m,uk,creditcard,112
...,...,...,...,...,...,...
7940,6efb3db2657800ef400953e39616795c,b.25-39,m,uk,bnpl,716
7941,6efb3db2657800ef400953e39616795c,b.25-39,m,uk,bnpl,717
7942,6efb3db2657800ef400953e39616795c,b.25-39,m,uk,bnpl,718
7943,6efb3db2657800ef400953e39616795c,b.25-39,m,uk,bnpl,719


## Score and pick best hour to call

In [43]:
df_abt[['prob_0','prob_1']] = model_pipeline.predict_proba(df_abt)

df_abt = df_abt.sort_values(by=['customer_id','prob_1'],ascending=[1,0])
df_best_time = df_abt.drop_duplicates(subset='customer_id',keep='first')
df_best_time['best_time'] = df_best_time['time_of_week'].apply(lambda x: reverse_date_dict[int(str(x)[0])].strftime('%d-%b-%Y')+' hour:'+str(x)[1:])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_best_time['best_time'] = df_best_time['time_of_week'].apply(lambda x: reverse_date_dict[int(str(x)[0])].strftime('%d-%b-%Y')+' hour:'+str(x)[1:])


In [44]:
df_best_time[['customer_id','best_time']].to_csv('sample_best_time_to_call.csv', index=False)