In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sqlalchemy as sa
import csv
import datetime as dtt
from ortools.linear_solver import pywraplp as py

%matplotlib inline

In [2]:
df_master = pd.read_csv('master_calldata_feb17.csv')

In [3]:
df_master.dtypes

call type                object
call result              object
lead source              object
tutoring for             object
ew_gw                    object
hagrid who               object
hagrid when              object
lead age                 object
ordinal attempt          object
previous cc90s           object
attempts                float64
lead count              float64
connects                float64
cc90 count              float64
closed that call        float64
closed ever             float64
talk duration (mins)    float64
dtype: object

In [4]:
#df_master.rename(columns={'talk_duration_mins':'talk duration (mins)','lead_count':'lead count','cc90_count':'cc90 count',
#                         'closed_that_call':'closed that call','closed_ever':'closed ever'} , inplace=True)

df_master['talk duration (mins)'].fillna(0.0,inplace=True)

df_master[['attempts','connects','lead count', 'cc90 count','closed that call',
           'closed ever']]=df_master[['attempts','connects','lead count', 'cc90 count',
                         'closed that call','closed ever']].astype(float)

df_master.sort_values(by=['call type','call result', 'lead source', 
                          'ew_gw','tutoring for','hagrid who', 'hagrid when','lead age',
                          'ordinal attempt', 
                          'previous cc90s','attempts'],
                          ascending=False, 
                          inplace=True, 
                          kind='quicksort', 
                          na_position='last')

In [5]:
df_invalid=df_master.loc[(df_master['lead age'] =='INVALID')]

df_vmh_dataset=df_master.loc[
                         ((df_master['call result'] =='IB-VOICEMAIL') | 
                         (df_master['call result'] =='IB-HANGUP')) &
                         (df_master['lead age'] !='INVALID')
                    ]
df_IB_dataset=df_master.loc[(df_master['call result'] =='IB-ANSWERED') & (df_master['lead age'] !='INVALID')]
df_OB_dataset=df_master.loc[(df_master['call result'] =='OB-ALL') & (df_master['lead age'] !='INVALID')]

In [6]:
aggregation_functions = {'attempts': 'sum', 'connects': 'sum', 'lead count': 'sum','cc90 count': 'sum', 'closed that call':'sum', 'closed ever':'sum', 'talk duration (mins)':'sum'}
df_IB_temp = df_IB_dataset.drop(columns=['hagrid who','hagrid when']).groupby(['call type','call result','lead source','ew_gw','tutoring for','lead age','ordinal attempt','previous cc90s'],as_index=False).aggregate(aggregation_functions)
df_OB_temp = df_OB_dataset.drop(columns=['hagrid who','hagrid when']).groupby(['call type','call result','lead source','ew_gw','tutoring for','lead age','ordinal attempt','previous cc90s'],as_index=False).aggregate(aggregation_functions)
df_vmh_temp=df_vmh_dataset.drop(columns=['hagrid who','hagrid when']).groupby(['call type','call result','lead source','ew_gw','tutoring for','lead age','ordinal attempt','previous cc90s'],as_index=False).aggregate(aggregation_functions)

df_IB=df_IB_temp.copy(deep=True)
df_OB=df_OB_temp.copy(deep=True)
df_vmh=df_vmh_temp.copy(deep=True)

In [7]:
df_OB_specific=df_OB.drop(columns=['call type','call result', 'lead source','ordinal attempt', 
                          'ew_gw','tutoring for']).groupby(['lead age','previous cc90s'],as_index=False).aggregate(aggregation_functions)
df_IB_specific=df_OB.drop(columns=['call type','call result', 'lead source', 
                          'ew_gw','tutoring for','ordinal attempt']).groupby(['lead age','previous cc90s'],as_index=False).aggregate(aggregation_functions)



df_OB_specific['pGC']=df_OB_specific['closed that call'].divide(df_OB_specific['cc90 count'], fill_value=0)
df_OB_specific['CC90/connect']=df_OB_specific['cc90 count'].divide(df_OB_specific['connects'], fill_value=0)
df_OB_specific['connect rate']=df_OB_specific['connects'].divide(df_OB_specific['attempts'], fill_value=0)
df_OB_specific['talktime/connect']=df_OB_specific['talk duration (mins)'].divide(df_OB_specific['connects'], fill_value=0)

df_IB_specific['pGC']=df_IB_specific['closed that call'].divide(df_IB_specific['cc90 count'], fill_value=0)
df_IB_specific['CC90/connect']=df_IB_specific['cc90 count'].divide(df_IB_specific['connects'], fill_value=0)
df_IB_specific['connect rate']=df_IB_specific['connects'].divide(df_IB_specific['attempts'], fill_value=0)
df_IB_specific['talktime/connect']=df_IB_specific['talk duration (mins)'].divide(df_IB_specific['connects'], fill_value=0)

In [8]:
# Creating metrics for outbound dataframe

df_OB['pGC']=df_OB['closed that call'].divide(df_OB['cc90 count'], fill_value=0)
df_OB['CC90/connect']=df_OB['cc90 count'].divide(df_OB['connects'], fill_value=0)
df_OB['connect rate']=df_OB['connects'].divide(df_OB['attempts'], fill_value=0)
df_OB['talktime/connect']=df_OB['talk duration (mins)'].divide(df_OB['connects'], fill_value=0)

df_OB['connect rate'].replace(np.nan, 0.0,inplace=True)
df_OB['CC90/connect'].replace(np.nan, 0.0,inplace=True)
df_OB['pGC'].replace(np.nan, 0.0,inplace=True)

df_OB_TEMP=df_OB.copy(deep=True)

df_OB_FINAL=pd.merge(df_OB_TEMP, df_OB_specific,
                     how='left', 
                     on=['lead age','previous cc90s'])

df_OB_FINAL.drop(columns=['attempts_y','connects_y','lead count_y','cc90 count_y','closed that call_y',
                           'closed ever_y','talk duration (mins)_y'],inplace=True)

df_OB_FINAL.rename(index=str, columns={
                                       "attempts_x": "attempts",
                                       "lead count_x":"lead count",
                                       "connects_x": "connects",
                                       "cc90 count_x": "cc90 count",
                                       "closed that call_x": "closed that call",
                                       "closed ever_x": "closed ever",
                                       "talk duration (mins)_x": "talk duration (mins)"
                                       },inplace=True)


df_OB_FINAL['pGC_final']=np.where(df_OB_FINAL['pGC_x']==0,df_OB_FINAL['pGC_y'],df_OB_FINAL['pGC_x'])
df_OB_FINAL['CC90/connect_final']=np.where(df_OB_FINAL['CC90/connect_x']==0,df_OB_FINAL['CC90/connect_y'],df_OB_FINAL['CC90/connect_x'])
df_OB_FINAL['connect rate_final']=np.where(df_OB_FINAL['connect rate_x']==0,df_OB_FINAL['connect rate_y'],df_OB_FINAL['connect rate_x'])
df_OB_FINAL['talktime/connect_final']=np.where(df_OB_FINAL['attempts']<20,df_OB_FINAL['talktime/connect_y'],df_OB_FINAL['talktime/connect_x'])

df_OB_FINAL.drop(columns=['pGC_x','pGC_y',
                          'CC90/connect_x','CC90/connect_y',
                          'connect rate_x','connect rate_y',
                          'talktime/connect_x','talktime/connect_y'],inplace=True)


df_OB_FINAL.rename(index=str, columns={"pGC_final":"pGC",
                                       "CC90/connect_final":"CC90/connect",
                                       "connect rate_final":"connect rate",
                                       "talktime/connect_final":"talktime/connect"                                                                               
                                       },inplace=True)


df_OB_FINAL['temp']=1
df_OB_FINAL['no cc90/connect']=df_OB_FINAL['temp']-df_OB_FINAL['CC90/connect']
df_OB_FINAL['post call time']=df_OB_FINAL['CC90/connect']*2.5 + df_OB_FINAL['no cc90/connect']
df_OB_FINAL.drop(columns=['no cc90/connect','temp'],inplace=True)

pgc_OB_overall=(df_OB_FINAL['closed that call'].sum())/(df_OB_FINAL['cc90 count'].sum())
df_OB_FINAL['pGC'].replace(np.nan, pgc_OB_overall ,inplace=True)

print("OUTBOUND overall pGC-",pgc_OB_overall)

OUTBOUND overall pGC- 0.07850622646256904


In [9]:
#creating metrics for inbound dataframe

df_IB['pGC']=df_IB['closed that call'].divide(df_IB['cc90 count'], fill_value=0)
df_IB['CC90/connect']=df_IB['cc90 count'].divide(df_IB['connects'], fill_value=0)
df_IB['connect rate']=df_IB['connects'].divide(df_IB['attempts'], fill_value=0)
df_IB['talktime/connect']=df_IB['talk duration (mins)'].divide(df_IB['connects'], fill_value=0)

df_IB['connect rate'].replace(np.nan, 0.0,inplace=True)
df_IB['CC90/connect'].replace(np.nan, 0.0,inplace=True)
df_IB['pGC'].replace(np.nan, 0.0,inplace=True)

df_IB_TEMP=df_IB.copy(deep=True)

df_IB_FINAL=pd.merge(df_IB_TEMP, df_IB_specific,
                     how='left', 
                     on=['lead age','previous cc90s'])

df_IB_FINAL.drop(columns=['attempts_y','connects_y','lead count_y','cc90 count_y','closed that call_y',
                           'closed ever_y','talk duration (mins)_y'],inplace=True)

df_IB_FINAL.rename(index=str, columns={
                                       "attempts_x": "attempts",
                                       "lead count_x":"lead count",
                                       "connects_x": "connects",
                                       "cc90 count_x": "cc90 count",
                                       "closed that call_x": "closed that call",
                                       "closed ever_x": "closed ever",
                                       "talk duration (mins)_x": "talk duration (mins)"
                                       },inplace=True)


df_IB_FINAL['pGC_final']=np.where(df_IB_FINAL['pGC_x']==0,df_IB_FINAL['pGC_y'],df_IB_FINAL['pGC_x'])
df_IB_FINAL['CC90/connect_final']=np.where(df_IB_FINAL['CC90/connect_x']==0,df_IB_FINAL['CC90/connect_y'],df_IB_FINAL['CC90/connect_x'])
df_IB_FINAL['connect rate_final']=np.where(df_IB_FINAL['connect rate_x']==0,df_IB_FINAL['connect rate_y'],df_IB_FINAL['connect rate_x'])
df_IB_FINAL['talktime/connect_final']=np.where(df_IB_FINAL['attempts']<20,df_IB_FINAL['talktime/connect_y'],df_IB_FINAL['talktime/connect_x'])

df_IB_FINAL.drop(columns=['pGC_x','pGC_y',
                          'CC90/connect_x','CC90/connect_y',
                          'connect rate_x','connect rate_y',
                          'talktime/connect_x','talktime/connect_y'],inplace=True)


df_IB_FINAL.rename(index=str, columns={"pGC_final":"pGC",
                                       "CC90/connect_final":"CC90/connect",
                                       "connect rate_final":"connect rate",
                                       "talktime/connect_final":"talktime/connect"                                                                               
                                       },inplace=True)

df_IB_FINAL['temp']=1
df_IB_FINAL['no cc90/connect']=df_IB_FINAL['temp']-df_IB_FINAL['CC90/connect']
df_IB_FINAL['post call time']=df_IB_FINAL['CC90/connect']*2.5 + df_IB_FINAL['no cc90/connect']
df_IB_FINAL.drop(columns=['no cc90/connect','temp'],inplace=True)

pgc_IB_overall=(df_IB_FINAL['closed that call'].sum())/(df_IB_FINAL['cc90 count'].sum())
df_IB_FINAL['pGC'].replace(np.nan, pgc_IB_overall ,inplace=True)
print("INBOUND overall pGC-",pgc_IB_overall)

INBOUND overall pGC- 0.13727320780103383


In [10]:
df_vmh_full=pd.merge(df_vmh, df_IB_FINAL,
                     how='left', 
                     on=['call type', 'lead source',
                         'ew_gw','tutoring for','lead age','ordinal attempt', 
                         'previous cc90s'])

"""df_vmh_full=pd.merge(df_vmh, df_IB,
                     how='left', 
                     on=['call type', 'lead source',
                         'ew_gw','tutoring for','hagrid who', 'hagrid when','lead age','ordinal attempt', 
                         'previous cc90s'])"""

df_vmh_full.drop(columns=['call result_y','attempts_y','connects_y','lead count_y','cc90 count_y','closed that call_y',
           'closed ever_y','talk duration (mins)_y'],inplace=True)

df_vmh_full.rename(index=str, columns={
                                        "call result_x": "call result", 
                                       "attempts_x": "attempts",
                                       "lead count_x":"lead count",
                                       "connects_x": "connects",
                                       "cc90 count_x": "cc90 count",
                                       "closed that call_x": "closed that call",
                                       "closed ever_x": "closed ever",
                                       "talk duration (mins)_x": "talk duration (mins)"
                                       },inplace=True)

In [11]:
frames=[df_OB_FINAL,df_IB_FINAL,df_vmh_full]
df_FINAL= pd.concat(frames, axis=0, ignore_index=True)
df_FINAL['timespent/connect']=df_FINAL['talktime/connect']+df_FINAL['post call time']
df_FINAL.drop(columns=['lead count','talktime/connect','post call time'],inplace=True)
df_FINAL.rename(index=str, columns={"timespent/connect":"talktime/connect"},inplace=True)

In [12]:
df_final_percents=df_FINAL.copy(deep=True)
total_hist_attempts=df_final_percents['attempts'].sum()
df_final_percents['percentage of cohort']=df_final_percents['attempts']/total_hist_attempts

In [13]:
#df_attempt = pd.read_csv('attempt-sep21.csv')

In [14]:
# creating attempt dataframe
def attempt_creator(df_attempt , df_FINAL):
    
    df_attempt['talk duration (mins)'].replace(np.nan, 0.0,inplace=True)

    df_attempt[['attempts','connects','cc90 count','closed that call',
           'closed ever']]=df_attempt[['attempts','connects','cc90 count',
                         'closed that call','closed ever']].astype(float)

    """df_attempt.sort_values(by=['call type','call result', 'lead source', 
                          'ew_gw','hagrid who', 'hagrid when','lead age',
                          'ordinal attempt', 
                          'previous cc90s'],
                          ascending=False, 
                          inplace=True, 
                          kind='quicksort', 
                          na_position='last')"""

    df_attempt_temp = df_attempt.drop(columns=['hagrid who','hagrid when']).groupby(['call type','call result','lead source','ew_gw','tutoring for','lead age','ordinal attempt','previous cc90s'],as_index=False).aggregate(aggregation_functions)

    df_attempt_final=df_attempt_temp.copy(deep=True)

    """df_attempt_full=pd.merge(df_attempt_final, df_FINAL,
                     how='left', 
                     on=['call type','call result', 'lead source',
                         'ew_gw','tutoring for','hagrid who', 'hagrid when','lead age','ordinal attempt', 
                         'previous cc90s'])"""

    df_attempt_full=pd.merge(df_attempt_final, df_FINAL,
                         how='left', 
                         on=['call type','call result', 'lead source',
                             'ew_gw','tutoring for','lead age','ordinal attempt', 
                             'previous cc90s'])

    df_attempt_full.drop(columns=['attempts_y','connects_y','cc90 count_y','closed that call_y',
             'closed ever_y','talk duration (mins)_y'],inplace=True)

    df_attempt_full.rename(index=str, columns={
                                       "attempts_x": "attempts",
                                       "connects_x": "connects",
                                       "cc90 count_x": "cc90 count",
                                       "closed that call_x": "closed that call",
                                       "closed ever_x": "closed ever",
                                       "talk duration (mins)_x": "talk duration (mins)",
                                       },inplace=True)


    #df_char=pd.DataFrame({"separator": [ "|" for n in range(len(df_attempt_full.index))]})
      
    #df_attempt_full['identifier'] = df_attempt_full["call type"]+ "|"+ df_attempt_full["call result"]+ df_attempt_full["lead source"] + "|"+df_attempt_full["ew_gw"]+ "|"+df_attempt_full["hagrid who"]+ "|"+df_attempt_full["hagrid when"]+ "|"+df_attempt_full["lead age"]+ "|"+df_attempt_full["ordinal attempt"]+ "|"+ df_attempt_full["previous cc90s"]
    df_attempt_full['identifier'] = df_attempt_full["call result"]+ "|"+df_attempt_full["lead source"] + "|"+df_attempt_full["ew_gw"]+ "|"+df_attempt_full["tutoring for"]+ "|"+df_attempt_full["lead age"]+ "|"+df_attempt_full["ordinal attempt"]+ "|"+ df_attempt_full["previous cc90s"]
    return(df_attempt_full)

In [15]:
#df_todo_dataset=pd.read_csv('mytodo-sep21.csv')

In [16]:
# adding todo files

def todo_creator(df_todo_dataset):
    df_todomatch=pd.DataFrame(
            { "todo_type": ["1 Day Lead","2 Day Follow Up","2 Day Leads","6 Hour Lead","Lead Action","New Lead"],
              "identifier":["OB-ALL|Non-Phone|EW|ACADEMIC|1 day old|First time|No CC90",
                        "OB-ALL|Non-Phone|EW|ACADEMIC|2 day old|Between 1 and 5|<3 CC90s",
                        "OB-ALL|Non-Phone|EW|ACADEMIC|2 day old|First time|No CC90",
                        "OB-ALL|Non-Phone|EW|ACADEMIC|<24HR|First time|No CC90",
                        "OB-ALL|Non-Phone|EW|ACADEMIC|2 day old|Between 1 and 5|<3 CC90s",
                        "OB-ALL|Non-Phone|EW|ACADEMIC|<24HR|First time|No CC90"]})
    df_todo_final=pd.merge(df_todomatch,df_todo_dataset,
                     how='left', 
                     on=['todo_type'])

    df_todo_final.drop(columns=['date_orig_exec', 'todo_type', 'total_todos', 'completion_rate',
       'reschedule_rate'],inplace=True)
    df_todo_final["pending_todos"]=df_todo_final["pending_todos"].astype(float)
    df_todo_fin=df_todo_final.groupby(["identifier"], as_index=False).aggregate({'pending_todos':'sum'})
    return(df_todo_fin)

In [17]:
# craeting optimization dataframe and converting it to list

def opti_dataset_creator(df_attempt_full, df_todo_fin):
    cols = list(df_attempt_full.columns.values)
    df_optimization_dataset=pd.merge(df_attempt_full,df_todo_fin,
                     how='left', 
                     on=['identifier'])
    df_optimization_dataset['pending_todos'].replace(np.nan, 0.0,inplace=True)

    # This is where you disable/enable todo addition in optimization dataset or not
    df_optimization_dataset["upper bound"]=df_optimization_dataset["attempts"]+df_optimization_dataset["pending_todos"]



    df_optimization_dataset.drop(columns=['lead source','ew_gw','tutoring for','lead age','ordinal attempt',
                                      'previous cc90s','connects','cc90 count',
                                      'closed ever','talk duration (mins)','pending_todos'],inplace=True)

    df_optimization_dataset=df_optimization_dataset[['identifier','call type','call result','attempts','upper bound','pGC','CC90/connect','connect rate','talktime/connect','closed that call']]

    df_optimization_dataset.insert(loc=4, 
                               column='lower bound', 
                               value = np.where((df_optimization_dataset['call result']=='IB-ANSWERED') 
                                                , df_optimization_dataset['attempts'] , 0.0)
                               )
    df_optimization_dataset["ib_vmh coefficient"]=np.where((df_optimization_dataset['call result']!='IB-ANSWERED') & (df_optimization_dataset['call type']=='INBOUND'), 1, 0)
    df_optimization_dataset["outbound coefficient"]=np.where((df_optimization_dataset['call result']=='OB-ALL'), 1, 0)
    df_optimization_dataset["prob of conversion"]=df_optimization_dataset["pGC"]*df_optimization_dataset["CC90/connect"]*df_optimization_dataset["connect rate"]                              

    print("Number of unique values in attempt set ->",len(df_optimization_dataset['identifier'].unique()))

    df_optimization_dataset['pGC'].replace(np.nan, 0.0,inplace=True)
    df_optimization_dataset['talktime/connect'].replace(np.nan, 0.0,inplace=True)
    df_optimization_dataset['CC90/connect'].replace(np.nan, 0.0,inplace=True)
    df_optimization_dataset['connect rate'].replace(np.nan, 0.0,inplace=True)
    opti_dataset = df_optimization_dataset.values.tolist()
    return(df_optimization_dataset)

In [18]:
"""# estimating upper bound values for optimization

OBMAX=df_optimization_dataset.loc[df_optimization_dataset['call result'] == 'OB-ALL','attempts'].sum()
IBMH=df_optimization_dataset.loc[(df_optimization_dataset['call result'] != 'IB-ANSWERED') &
                                  (df_optimization_dataset['call type'] == 'INBOUND')
                                  ,'attempts'].sum()

df_temp=df_optimization_dataset.loc[(df_optimization_dataset['call result']=='OB-ALL') | (df_optimization_dataset['call result'] == 'IB-ANSWERED')].copy(deep=True)
df_temp["time taken"]=df_temp["attempts"]*df_temp["connect rate"]*df_temp["talktime/connect"]
max_time_data=df_temp["time taken"].sum()"""

'# estimating upper bound values for optimization\n\nOBMAX=df_optimization_dataset.loc[df_optimization_dataset[\'call result\'] == \'OB-ALL\',\'attempts\'].sum()\nIBMH=df_optimization_dataset.loc[(df_optimization_dataset[\'call result\'] != \'IB-ANSWERED\') &\n                                  (df_optimization_dataset[\'call type\'] == \'INBOUND\')\n                                  ,\'attempts\'].sum()\n\ndf_temp=df_optimization_dataset.loc[(df_optimization_dataset[\'call result\']==\'OB-ALL\') | (df_optimization_dataset[\'call result\'] == \'IB-ANSWERED\')].copy(deep=True)\ndf_temp["time taken"]=df_temp["attempts"]*df_temp["connect rate"]*df_temp["talktime/connect"]\nmax_time_data=df_temp["time taken"].sum()'

In [19]:
def optimizer(df_optimization_dataset):
    OBMAX=df_optimization_dataset.loc[df_optimization_dataset['call result'] == 'OB-ALL','attempts'].sum()
    IBMH=df_optimization_dataset.loc[(df_optimization_dataset['call result'] != 'IB-ANSWERED') &
                                  (df_optimization_dataset['call type'] == 'INBOUND')
                                  ,'attempts'].sum()

    df_temp=df_optimization_dataset.loc[(df_optimization_dataset['call result']=='OB-ALL') | (df_optimization_dataset['call result'] == 'IB-ANSWERED')].copy(deep=True)
    df_temp["time taken"]=df_temp["attempts"]*df_temp["connect rate"]*df_temp["talktime/connect"]
    max_time_data=df_temp["time taken"].sum()
    
    opti_dataset = df_optimization_dataset.values.tolist()

    solver=py.Solver('Optimal_Calls', py.Solver.GLOP_LINEAR_PROGRAMMING)

    # Set objective function and simultaneously defining its bounds
    optimized_calls=[[]]*len(opti_dataset)
    objective = solver.Objective()
    ctr=0

    for single_record in opti_dataset:
        #print("Now printing 1", single_record[2], "And now 2", single_record [0])
        optimized_calls[ctr] = solver.IntVar(single_record[4], single_record[5], single_record[0])
        objective.SetCoefficient(optimized_calls[ctr], single_record[6]*single_record[7]*single_record[8]) 
        ctr=ctr+1

    objective.SetMaximization()
   

    #constraints
    #max_time=min(21.25*7.65*160,60*418.59)
    max_IB_VMH=0.23*0.3*IBMH
    # 23% accounting for those whom we did not reach out the same day as per todo
    constraints_list=[]


    # Adding time constraint
    constraints_list.append(solver.Constraint(0.0,max_time_data)) 
    ctr=0
    #constraints_list[0].SetCoefficient(optimized_calls[],single_record[9]*single_record[8])

    for single_record in opti_dataset:
        constraints_list[0].SetCoefficient(optimized_calls[ctr],single_record[9]*single_record[8])
        ctr=ctr+1
 
    
    # Adding inbound voicemail and hangup constraint
    constraints_list.append(solver.Constraint(0.0,max_IB_VMH)) 
    ctr=0
    for single_record in opti_dataset:
        constraints_list[1].SetCoefficient(optimized_calls[ctr],single_record[11])
        ctr=ctr+1

    
    #Adding outbound constraint
    constraints_list.append(solver.Constraint(0.0,OBMAX)) 
    ctr=0
    for single_record in opti_dataset:
        constraints_list[2].SetCoefficient(optimized_calls[ctr],single_record[12])
        ctr=ctr+1

    # Solving

    status = solver.Solve()
    flag_value=0
    
    if (status == solver.OPTIMAL):
        flag_value=1
        ctr=0
        #print('Successful solve.')
        # The problem has an optimal solution.
        print(('\nAttempts successfully optimized in %f milliseconds' % solver.wall_time()))
        # The objective value of the solution.
        #print(('Optimal objective value = %f' % solver.Objective().Value()))
        # The value of each variable in the solution.
        for callm in opti_dataset:
        
            for call in optimized_calls:
                if callm[0]==call.name():
                    temp=[round(call.solution_value())]
                    #print(('%s = %f' % (call.name(), call.solution_value())))
                    opti_dataset[ctr].extend(temp)
                    break
            ctr=ctr+1
        
    elif (status!=solver.OPTIMAL):
        print("No optimal solution found")
    
    return(flag_value , opti_dataset)

In [20]:
"""if (status == solver.OPTIMAL):
    flag_value=1
    ctr=0
    #print('Successful solve.')
    # The problem has an optimal solution.
    print(('\nAttempts successfully optimized in %f milliseconds' % solver.wall_time()))
    # The objective value of the solution.
    #print(('Optimal objective value = %f' % solver.Objective().Value()))
    # The value of each variable in the solution.
    for callm in opti_dataset:
        
        for call in optimized_calls:
            if callm[0]==call.name():
                temp=[round(call.solution_value())]
                #print(('%s = %f' % (call.name(), call.solution_value())))
                opti_dataset[ctr].extend(temp)
                break
        ctr=ctr+1
        
elif (status!=solver.OPTIMAL):
    print("No optimal solution found")"""

'if (status == solver.OPTIMAL):\n    flag_value=1\n    ctr=0\n    #print(\'Successful solve.\')\n    # The problem has an optimal solution.\n    print((\'\nAttempts successfully optimized in %f milliseconds\' % solver.wall_time()))\n    # The objective value of the solution.\n    #print((\'Optimal objective value = %f\' % solver.Objective().Value()))\n    # The value of each variable in the solution.\n    for callm in opti_dataset:\n        \n        for call in optimized_calls:\n            if callm[0]==call.name():\n                temp=[round(call.solution_value())]\n                #print((\'%s = %f\' % (call.name(), call.solution_value())))\n                opti_dataset[ctr].extend(temp)\n                break\n        ctr=ctr+1\n        \nelif (status!=solver.OPTIMAL):\n    print("No optimal solution found")'

In [21]:
def optimized_results(df_attempt_full, opti_dataset , leads_that_day):
    
    labels = ['identifier', 'call type', 'call result', 'attempts actual',
              'lower bound','upper bound','pGC','cc90/connect','connect rate',
              'time/connect','closed that call','coeff-IBVMH','coeff-OB','probability of conversion','optimized attempts'
              ]
    df_optimized_results = pd.DataFrame.from_records(opti_dataset, columns=labels)
    df_optimized_results.drop(columns=['coeff-IBVMH','coeff-OB'],inplace=True)
    
    df_optimized_results=df_optimized_results[['identifier','call type','call result','attempts actual',
                                               'optimized attempts','lower bound','upper bound',
                                               'pGC','cc90/connect','connect rate','probability of conversion',
                                               'time/connect','closed that call']]
    
    df_optimized_results["predicted initial converts"]=np.where((df_optimized_results['call result'] == "IB-ANSWERED") |
                                                                (df_optimized_results['call result'] == "OB-ALL"),
                                                                round(df_optimized_results['attempts actual']*df_optimized_results['probability of conversion']),0)
    
    df_optimized_results["predicted final converts"]=round(df_optimized_results['optimized attempts']*df_optimized_results['probability of conversion'])
    df_optimized_results["attempts actual"]=np.where((df_optimized_results['call result'] == "IB-HANGUP") | 
                                                        (df_optimized_results['call result'] == "IB-VOICEMAIL"),
                                                        0.0,df_optimized_results["attempts actual"])
                                                                
    df_optimized_results["attempts reshuffled"]=abs(df_optimized_results["attempts actual"]-df_optimized_results["optimized attempts"])
    df_optimized_results.sort_values(by=['attempts reshuffled'], inplace=True, ascending=False)
    
    
    leads=leads_that_day
    convert_actual=df_optimized_results["closed that call"].sum()
    CTR_IB_actual=df_optimized_results.loc[(df_optimized_results["call result"]== 'IB-ANSWERED')
                                   ,'attempts actual'].sum()
    CTR_IBVMH_actual=df_attempt_full.loc[(df_attempt_full["call result"]!= 'IB-ANSWERED') &
                                                (df_attempt_full["call type"]== 'INBOUND')
                                   ,'attempts'].sum()
    CTR_OB_actual=df_optimized_results.loc[(df_optimized_results["call result"]== 'OB-ALL')
                                   ,'attempts actual'].sum()
    
    CTR_IB_optimized=df_optimized_results.loc[(df_optimized_results["call type"]== 'INBOUND')
                                   ,'optimized attempts'].sum()
    CTR_OB_optimized=df_optimized_results.loc[(df_optimized_results["call type"]== 'OUTBOUND')
                                   ,'optimized attempts'].sum()
    convert_initial=round(df_optimized_results["predicted initial converts"].sum())
    convert_opti=round(df_optimized_results["predicted final converts"].sum())
    
    print("\n\n-----Summary of results -----\n")
    print("Inbounds initial ->",CTR_IB_actual)
    print("Inbounds missed ->",CTR_IBVMH_actual)
    print("Inbounds Optimized ->",round(CTR_IB_optimized))
    print("Outbounds initial ->",CTR_OB_actual)
    print("Outbounds optimised ->",round(CTR_OB_optimized))
    print("Clients converted as per records ->",convert_actual)
    #print("Converts by model values ->",convert_initial)
    #print("Converts by model values optimized ->",convert_opti)
    print("Model error % in estimating conversions->",round((convert_initial-convert_actual)*100/convert_actual,2))
   # print("Original conversion actual % ->",round(convert_actual*100/leads,2))
    #print("Model estimated original conversion % ->",round(convert_initial*100/leads,2))
    #print("Optimised conversion by model % ->",round(convert_opti*100/leads,2))
    print("Lift predicted {} bps".format(round((convert_opti-convert_initial)*10000/leads,2)))
    
    pd.set_option('display.max_colwidth', -1)
    #print(df_optimized_results[['identifier','attempts actual','optimized attempts']].head(20))
    return(df_optimized_results)

In [22]:
df_master_attempts=pd.read_csv('attempt_dataset-March07.csv')
df_master_attempts.date_call=pd.to_datetime(df_master_attempts.date_call)


df_master_todos=pd.read_excel('todos_dataset-Mar07.xlsx')
df_master_todos.rename(columns={'1.) Todos Original Execution Date':'date_orig_exec',
       '2.) Todo Descriptions Todo Description':'todo_type', 
        '1.) Todos Todo Count':'total_todos',
       '1.) Todos Same Day Completion %':'completion_rate', 
        '1.) Todos Reschedule %':'reschedule_rate'} , inplace=True)
df_master_todos=df_master_todos.assign(pending_todos=df_master_todos.apply(lambda x: np.around(x.total_todos - 
                                                    x.total_todos*x.completion_rate - 
                                                    x.total_todos*x.reschedule_rate, decimals=0) , axis=1))
df_master_todos.pending_todos=df_master_todos.pending_todos.astype('int64')

df_all_leads=pd.read_excel('all_leads-Mar07.xlsx')
df_all_leads.columns=['date','leads','clients']

In [23]:
#date=np.datetime64('2019-02-04')

In [24]:
df_master_todos.todo_type.unique()

array(['Missed Call', '1 Day Lead', 'Lead Action', 'Lead Voicemail',
       'Bad 1st Session', 'No First Session Review', 'Not Met 2 Weeks',
       'Client Action - Revenue', 'Not on NAT', 'Bad Session',
       'Tutor Voicemail', 'Client Action - Other',
       'Tutor Location Change', 'Tutor Online Session No Show',
       'Note From Tutor', 'Client Check In', 'Client Low Hours',
       'No First Session', '2 Day Leads', 'No TOU', '2 Day Follow Up',
       'Client Action - Placememt', 'Client Welcome Call', '6 Hour Lead',
       'Schedule Welcome Call - Tutor', 'Inbound Email', 'Payment Error',
       'New Lead', 'Client TOU Signed', 'Lead Self Converted',
       'Client Voicemail', 'New Instant Free Trial', 'Inbound Call',
       'Former Clients'], dtype=object)

In [25]:
df_all_optimized = pd.DataFrame(columns=['identifier','attempts actual','optimized attempts','date_call'])
df_all_optimized2 = pd.DataFrame(columns=['identifier','attempts actual','optimized attempts','date_call'])
for date in np.arange('2019-01-01', '2019-03-01', dtype='datetime64'):
    df_attempts = attempt_creator(df_master_attempts[df_master_attempts.date_call==date] , df_FINAL)
    df_todoset = todo_creator(df_master_todos[(df_master_todos.date_orig_exec==date) & 
                                          (df_master_todos.todo_type.isin(['Lead Action', '6 Hour Lead','1 Day Lead']))])

    df_ODC = opti_dataset_creator(df_attempts , df_todoset)
    flag , opti_dataset = optimizer(df_ODC)
    
    leads_that_day=df_all_leads[df_all_leads.date==date].leads.values[0]

    if flag==1:
        df_optimal = optimized_results(df_attempts , opti_dataset , leads_that_day)
        df_temp=df_optimal[(df_optimal['call type']=='OUTBOUND') & (df_optimal['attempts actual']!=df_optimal['optimized attempts'])][['identifier','attempts actual','optimized attempts']].reset_index(drop=True)
        df_temp['date_call']=date
        df_all_optimized=pd.concat([df_all_optimized,df_temp])
        
        df_temp2=df_optimal[['identifier','attempts actual','optimized attempts']].reset_index(drop=True)
        df_temp2['date_call']=date
        df_all_optimized2=pd.concat([df_all_optimized2,df_temp2])

df_all_optimized = df_all_optimized.reset_index(drop=True)
df_all_optimized2 = df_all_optimized2.reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


Number of unique values in attempt set -> 73

Attempts successfully optimized in 1.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 0.0
Inbounds missed -> 138.0
Inbounds Optimized -> 1
Outbounds initial -> 1.0
Outbounds optimised -> 0
Clients converted as per records -> 0.0
Model error % in estimating conversions-> nan
Lift predicted 0.0 bps




Number of unique values in attempt set -> 436

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 796.0
Inbounds missed -> 506.0
Inbounds Optimized -> 831
Outbounds initial -> 14112.0
Outbounds optimised -> 13851
Clients converted as per records -> 134.0
Model error % in estimating conversions-> -26.12
Lift predicted 48.01 bps
Number of unique values in attempt set -> 441

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 727.0
Inbounds missed -> 490.0
Inbounds Optimized -> 761
Outbounds initial -> 14740.0
Outbounds optimised -> 14482
Clients converted as per records -> 159.0
Model error % in estimating conversions-> -33.33
Lift predicted 31.61 bps
Number of unique values in attempt set -> 421

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 603.0
Inbounds missed -> 423.0
Inbounds Optimized -> 632
Outbounds 

Number of unique values in attempt set -> 480

Attempts successfully optimized in 3.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 893.0
Inbounds missed -> 554.0
Inbounds Optimized -> 931
Outbounds initial -> 13572.0
Outbounds optimised -> 13292
Clients converted as per records -> 131.0
Model error % in estimating conversions-> 3.82
Lift predicted 43.84 bps
Number of unique values in attempt set -> 480

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 655.0
Inbounds missed -> 504.0
Inbounds Optimized -> 690
Outbounds initial -> 12090.0
Outbounds optimised -> 11835
Clients converted as per records -> 141.0
Model error % in estimating conversions-> -29.79
Lift predicted 42.13 bps
Number of unique values in attempt set -> 379

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 425.0
Inbounds missed -> 353.0
Inbounds Optimized -> 449
Outbounds in

Number of unique values in attempt set -> 460

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 701.0
Inbounds missed -> 456.0
Inbounds Optimized -> 732
Outbounds initial -> 11656.0
Outbounds optimised -> 9949
Clients converted as per records -> 139.0
Model error % in estimating conversions-> -24.46
Lift predicted 99.01 bps
Number of unique values in attempt set -> 404

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 391.0
Inbounds missed -> 280.0
Inbounds Optimized -> 410
Outbounds initial -> 6967.0
Outbounds optimised -> 5803
Clients converted as per records -> 72.0
Model error % in estimating conversions-> -26.39
Lift predicted 108.8 bps
Number of unique values in attempt set -> 391

Attempts successfully optimized in 2.000000 milliseconds


-----Summary of results -----

Inbounds initial -> 356.0
Inbounds missed -> 308.0
Inbounds Optimized -> 377
Outbounds init

In [26]:
df_all_optimized.dtypes

identifier            object        
attempts actual       float64       
optimized attempts    object        
date_call             datetime64[ns]
dtype: object

In [27]:
df_all_optimized.head()

Unnamed: 0,identifier,attempts actual,optimized attempts,date_call
0,OB-ALL|Phone|EW|ACADEMIC|<24HR|Between 1 and 5|No CC90,1.0,0,2019-01-01
1,OB-ALL|Non-Phone|GW|ACADEMIC|11-14 days old|Between 5 and 10|No CC90,181.0,0,2019-01-02
2,OB-ALL|Non-Phone|GW|TEST PREP|>14 days old|More than 10|No CC90,29.0,0,2019-01-02
3,OB-ALL|Non-Phone|GW|ACADEMIC|>14 days old|More than 10|No CC90,410.0,383,2019-01-02
4,OB-ALL|Non-Phone|GW|TEST PREP|11-14 days old|Between 5 and 10|No CC90,18.0,0,2019-01-02


In [28]:
df_all_optimized['optimized attempts']=df_all_optimized['optimized attempts'].astype('float64')
#df_all_optimized['difference']=df_all_optimized['difference'].astype('float64')
df_all_optimized['difference']=df_all_optimized['optimized attempts'] - df_all_optimized['attempts actual']
df_all_optimized['month']=df_all_optimized.date_call.dt.strftime('%B')

df_all_optimized2['optimized attempts']=df_all_optimized2['optimized attempts'].astype('float64')
#df_all_optimized2['difference']=df_all_optimized2['difference'].astype('float64')
df_all_optimized2['difference']=df_all_optimized2['optimized attempts'] - df_all_optimized2['attempts actual']
df_all_optimized2['month']=df_all_optimized2.date_call.dt.strftime('%B')

In [29]:
df_all_optimized[(df_all_optimized.month=='November') & (df_all_optimized.identifier=='OB-ALL|Non-Phone|EW|ACADEMIC|>14 days old|More than 10|No CC90')]

Unnamed: 0,identifier,attempts actual,optimized attempts,date_call,difference,month


In [30]:
df_analysis=pd.pivot_table(df_all_optimized2
                          ,index='identifier'
                          ,columns=['month']
                          ,values=['attempts actual' , 'optimized attempts']
                          ,aggfunc=np.sum)
df_analysis.fillna(0, inplace=True)

In [None]:
df_analysis2=pd.pivot_table(df_all_optimized
                          ,index='identifier'
                          ,columns=['month']
                          ,values='difference'
                          ,aggfunc=np.mean)
df_analysis2.fillna(0, inplace=True)

In [32]:
df_analysis.to_excel('optimization_results-cum-March07.xlsx')

In [31]:
df_analysis.head()

Unnamed: 0_level_0,attempts actual,attempts actual,optimized attempts,optimized attempts
month,February,January,February,January
identifier,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
IB-ANSWERED|Non-Phone|EW|ACADEMIC|1 day old|Between 1 and 5|3-5 CC90s,3.0,1.0,3.0,1.0
IB-ANSWERED|Non-Phone|EW|ACADEMIC|1 day old|Between 1 and 5|<3 CC90s,263.0,268.0,263.0,268.0
IB-ANSWERED|Non-Phone|EW|ACADEMIC|1 day old|Between 1 and 5|No CC90,764.0,896.0,764.0,896.0
IB-ANSWERED|Non-Phone|EW|ACADEMIC|1 day old|Between 5 and 10|3-5 CC90s,8.0,8.0,8.0,8.0
IB-ANSWERED|Non-Phone|EW|ACADEMIC|1 day old|Between 5 and 10|<3 CC90s,48.0,37.0,48.0,37.0


In [33]:
df_FINAL['identifier'] = df_FINAL["call result"]+ "|"+df_FINAL["lead source"] + "|"+df_FINAL["ew_gw"]+ "|"+df_FINAL["tutoring for"]+ "|"+df_FINAL["lead age"]+ "|"+df_FINAL["ordinal attempt"]+ "|"+ df_FINAL["previous cc90s"]

In [34]:
df_FINAL.to_excel("metrics-March07.xlsx")