In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import pickle
import sqlalchemy as sa
import datetime as dtt
import calendar

from subject_query import subject_query
from schedule_query import schedule_query
from appointments_today import upcoming_appointments_today

In [2]:
# list of reps and subjects for which we want to do assignments. In future we will open it for all core
sub_list_csv = """'HS Test Prep','Grade K-6','Grad TP'"""
rep_list_csv = """'Amanda Newton', 'Bryce Schwanke', 'Tracy Fleischer', 'Emily Cox', 'Brian Kabat', 'Rachel Brown', 
'April Banadonna', 'April Bonadonna', 'Troy Hooper', 'Forrest Blattman'"""

In [3]:
# connection strings for redshift
connstr = 'redshift+psycopg2://XXX:XXX@tyson.bi.varsitytutors.com:5439/varsitytutors'
engine = sa.create_engine(connstr, connect_args={'sslmode': 'verify-ca'})

#load data
with engine.connect() as conn, conn.begin():
    df_categories = pd.read_sql(sa.text(subject_query.format(sub_list = sub_list_csv)), conn)
    df_schedules = pd.read_sql(sa.text(schedule_query.format(rep_list = rep_list_csv)), conn)
    df_appointments = pd.read_sql(sa.text(upcoming_appointments_today), conn)

In [4]:
df_categories.head()

Unnamed: 0,contact_id,contact_created_date,first_interaction_type,subject_group,callback_todo_id,adword_subject_type_2,callback_at_time,tags_name
0,4865009,2021-02-01 16:47:25,LFS callback,Grade K-6,38640095,Tutor,2021-02-02 10:00:00,GW main
1,4865657,2021-02-01 18:08:25,LFS callback,HS Test Prep,38642453,Prep,2021-02-02 17:00:00,GW main
2,1817119,2018-10-16 18:48:54,LFS callback,Grade K-6,38638745,Tutor,2021-02-02 15:30:00,EW
3,4861683,2021-02-01 10:33:57,LFS callback,Grad TP,38633133,Tutor,2021-02-02 17:15:00,EW
4,4865206,2021-02-01 17:10:19,LFS callback,HS Test Prep,38640608,Tutor,2021-02-02 14:30:00,EW


In [5]:
df_schedules['start_date'] = pd.to_datetime(df_schedules.schedule_start_time).dt.normalize()

#creating a dataframe of 15 min interval to join appoitnment data - this should match with the frequency of appointments
# this is done because scheduled callbacks are set for 15 min block intervals
dti = pd.Series(pd.date_range(start=np.datetime64('today'), periods=96, freq="15min") , name='interval').to_frame()
dti['start_date'] = pd.to_datetime(dti.interval).dt.normalize()

df_schedules_final = pd.merge(dti,df_schedules,on=['start_date'],how='left')
df_schedules_final = df_schedules_final[(df_schedules_final.schedule_start_time <= df_schedules_final.interval) &\
                            (df_schedules_final.schedule_end_time > df_schedules_final.interval)].sort_values(by=['mgr_name','interval']).reset_index(drop=True)

In [6]:
df_schedules_final.head()

Unnamed: 0,interval,start_date,mgr_name,sub_group,rd,mgr_id,user_id,schedule_start_time,schedule_end_time
0,2021-02-02 07:00:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00
1,2021-02-02 07:15:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00
2,2021-02-02 07:30:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00
3,2021-02-02 07:45:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00
4,2021-02-02 08:00:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00


In [7]:
# https://varsitytutors.looker.com/looks/10803
# manually overriding the queried appointments_today with the data from here for more realtime data as this looker queries replica
#df_appointments = pd.read_csv("Open todos - user id - for scheduling 2021-02-01T1248.csv")
df_appointments['interval'] = pd.to_datetime(df_appointments.interval)

In [8]:
df_available_slots = pd.merge(df_schedules_final , df_appointments , on = ['interval' , 'user_id'], how='left')
df_available_slots['is_available'] = np.where(df_available_slots.preassigned_todo_id.isna(),1,0)

In [9]:
df_available_slots.head()

Unnamed: 0,interval,start_date,mgr_name,sub_group,rd,mgr_id,user_id,schedule_start_time,schedule_end_time,preassigned_todo_id,is_available
0,2021-02-02 07:00:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00,,1
1,2021-02-02 07:15:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00,,1
2,2021-02-02 07:30:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00,,1
3,2021-02-02 07:45:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00,,1
4,2021-02-02 08:00:00,2021-02-02,Amanda Newton,Inbound,Samantha Townzen,1945,1044266872,2021-02-02 07:00:00,2021-02-02 15:00:00,,1


In [10]:
#load rep_tscvr
df_rep_tscvr = pd.read_csv("consultant_sub_tscvr_vals.csv")


# creating value scorecard
df_rep_tscvr['rep_tscvr'] = df_rep_tscvr.closed_client_count/df_rep_tscvr.cc90_count
df_rep_tscvr.sort_values(by = ['subject_group','rep_tscvr'] , ascending = [True,False] , inplace=True)

df_sub_tscvr = df_rep_tscvr.groupby('subject_group').agg(perc_25=pd.NamedAgg(column="rep_tscvr", aggfunc=lambda x: np.percentile(x,25)),
                                          perc_75=pd.NamedAgg(column="rep_tscvr", aggfunc=lambda x: np.percentile(x,75)),
                                          perc_50=pd.NamedAgg(column="rep_tscvr", aggfunc=lambda x: np.percentile(x,50))).reset_index()

df_tscvr_range = pd.merge(df_rep_tscvr,df_sub_tscvr, on = ['subject_group'],how='left')

df_tscvr_range['tscvr_list'] = df_tscvr_range.rep_tscvr-df_tscvr_range.perc_50

In [11]:
df_merge1 = pd.merge(df_categories,df_available_slots,left_on='callback_at_time', right_on='interval',how='left')
df_merge1 = df_merge1[df_merge1.is_available==1]
df_mega = pd.merge(df_merge1 , df_tscvr_range , on =['subject_group','mgr_id','mgr_name'] , how='left')

In [12]:
df_mega.columns

Index(['contact_id', 'contact_created_date', 'first_interaction_type',
       'subject_group', 'callback_todo_id', 'adword_subject_type_2',
       'callback_at_time', 'tags_name', 'interval', 'start_date', 'mgr_name',
       'sub_group', 'rd', 'mgr_id', 'user_id', 'schedule_start_time',
       'schedule_end_time', 'preassigned_todo_id', 'is_available',
       'cc90_count', 'closed_client_count', 'rep_tscvr', 'perc_25', 'perc_75',
       'perc_50', 'tscvr_list'],
      dtype='object')

In [13]:
df_final = df_mega[['contact_id', 'contact_created_date', 'subject_group', 'callback_todo_id', 'adword_subject_type_2',
       'callback_at_time', 'tags_name', 'interval', 'mgr_name',
        'mgr_id', 'user_id', 'schedule_start_time',
       'schedule_end_time', 'is_available',
       'rep_tscvr', 'perc_50', 'tscvr_list']]

df_final = df_final.sort_values(by=['perc_50','callback_todo_id','tscvr_list'], ascending=[False,True,False])
df_final = df_final[df_final.tscvr_list>0]

df_final_assigned = df_available_slots.copy()
df_final_assigned['assigned_todo_id'] = -1
df_final_assigned['assigned_todo_id'] = -1

list_df = df_final[['contact_id', 'contact_created_date', 'subject_group',
       'callback_todo_id','callback_at_time',
       'tags_name', 'interval', 'mgr_name', 'user_id',
       'schedule_start_time',  'is_available', 'rep_tscvr',
       'perc_50', 'tscvr_list']].values.tolist()

In [14]:
# assigning

just_assigned_todo = -1
for row in list_df:
    
    available_check = df_final_assigned.loc[(df_final_assigned['mgr_name']==row[7]) &
                          (df_final_assigned['interval']== np.datetime64(row[4])),'is_available'].values[0]
    
    if(available_check==0 or just_assigned_todo==row[3]):
        continue
    
    df_final_assigned.loc[(df_final_assigned['mgr_name']==row[7]) &
                          (df_final_assigned['interval']== np.datetime64(row[4])),'assigned_todo_id']=row[3]
    df_final_assigned.loc[(df_final_assigned['mgr_name']==row[7]) &
                          (df_final_assigned['interval']== np.datetime64(row[4])),'is_available']=0
    just_assigned_todo = row[3]
    print("Assigned " + str(row[3]) +" to "+ row[7] +" at " + str(np.datetime64(row[4]))+"\n")
    

Assigned 38609370 to Emily Cox at 2021-02-02T17:30:00.000000

Assigned 38621500 to Amanda Newton at 2021-02-02T08:45:00.000000

Assigned 38621669 to Amanda Newton at 2021-02-02T14:00:00.000000

Assigned 38630846 to Amanda Newton at 2021-02-02T11:30:00.000000

Assigned 38630925 to Amanda Newton at 2021-02-02T07:00:00.000000

Assigned 38631443 to Brian Kabat at 2021-02-02T16:45:00.000000

Assigned 38631698 to Brian Kabat at 2021-02-02T16:00:00.000000

Assigned 38632719 to Emily Cox at 2021-02-02T20:30:00.000000

Assigned 38633445 to Amanda Newton at 2021-02-02T11:00:00.000000

Assigned 38633769 to Amanda Newton at 2021-02-02T10:30:00.000000

Assigned 38634033 to Amanda Newton at 2021-02-02T09:00:00.000000

Assigned 38634667 to Rachel Brown at 2021-02-02T11:00:00.000000

Assigned 38635292 to Brian Kabat at 2021-02-02T14:00:00.000000

Assigned 38636416 to Brian Kabat at 2021-02-02T17:00:00.000000

Assigned 38636435 to Amanda Newton at 2021-02-02T12:00:00.000000

Assigned 38636677 to Amanda

In [15]:
df_final_assigned['action'] = 'todo reassign'
df_final_assigned['original_id'] = np.nan
timestamp_str = dtt.datetime.now().strftime("%Y%m%d%H%M%S")
filename = 'assignment_'+ timestamp_str +'.csv'
final_df = df_final_assigned[df_final_assigned.assigned_todo_id>0][['assigned_todo_id','action','user_id','original_id','mgr_name']]
final_df.to_csv(filename,index=False)

In [16]:
writer = pd.ExcelWriter('Collated_file' + timestamp_str + '.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.
final_df.to_excel(writer, sheet_name='Assignment Sheet')
df_categories.to_excel(writer, sheet_name='Callbacks list')
df_rep_tscvr.to_excel(writer, sheet_name='Rep tSCVR')

# Close the Pandas Excel writer and output the Excel file.
writer.save()