In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import pandasql as ps
from pandasql import sqldf

sns.set(font_scale=3)
sns.set(color_codes=True)
%matplotlib inline
import matplotlib.pyplot as plt


plt.rcParams["figure.figsize"] = [16, 16] 


pysqldf = lambda q: sqldf(q, globals())



In [3]:
def replace_space_in_col_names(df):
    new_cols = [x.replace(" ","_") for x in df.columns]
    df.columns = new_cols

In [4]:
trans_df = pd.read_excel("../data/QuVa-GAIL DataRequest_4_5.xlsx","Daily Orders")
sf_opp_df = pd.read_excel("../data/QuVa-GAIL DataRequest_6_7_8_without_commas.xlsx","SF_OpportunityDetails")
sf_activities_df = pd.read_excel("../data/QuVa-GAIL DataRequest_6_7_8_without_commas.xlsx","SF_ActivityDetails")
sf_case_df = pd.read_excel("../data/QuVa-GAIL DataRequest_6_7_8_without_commas.xlsx","SF_Case")

replace_space_in_col_names(trans_df)
replace_space_in_col_names(sf_opp_df)
replace_space_in_col_names(sf_activities_df)
replace_space_in_col_names(sf_case_df)

trans_df["Customer_ID"] = trans_df["Customer_ID"].astype(str)
sf_opp_df["Customer_ID"] = sf_opp_df["Customer_ID"].astype(str)
sf_activities_df["Customer_ID"] = sf_activities_df["Customer_ID"].astype(str)
sf_case_df["Customer_id"] = sf_case_df["Customer_id"].astype(str)


In [5]:
trans_df.Order_Date = pd.to_datetime(trans_df.Order_Date).dt.to_period('D')
sf_opp_df["opp_date"] = sf_opp_df.Opportunity_Created_Date.dt.to_period('D')
sf_opp_df["Close_Date"] = sf_opp_df.Close_Date.dt.to_period('D')
sf_activities_df["activity_date"] = sf_activities_df.Activity_Last_Modified_Date.dt.to_period('D')
sf_case_df["case_date"] = sf_case_df.CreatedDate.dt.to_period('D')
sf_case_df.rename(columns={"Customer_id":"Customer_ID"},inplace=True)

In [6]:
trans_states_df = trans_df[["Customer_ID","Order_Date","Daily_Revenue"]]
trans_states_df = trans_states_df.groupby(["Customer_ID","Order_Date"]).sum().reset_index()
sf_activity_state_df = sf_activities_df[["Customer_ID","activity_date","Activity_Type"]]
sf_case_state_df = sf_case_df[["Customer_ID","case_date","Type","Reason"]]

In [7]:
#d = pd.merge(trans_states_df,sf_activity_state_df, on='Customer_ID',how='outer')
#d[d["activity_date"].notnull()  d["Daily_Revenue"].notnull()]
trans_states_df.columns = ["Customer_ID","activity_date","Activity_Type"]
d = trans_states_df.append(sf_activity_state_df)
d.sort_values(by=["Customer_ID","activity_date"],inplace=True)
#d[d.Customer_ID == '10002'].to_clipboard(index=False)
#d.to_clipboard(index=False)
#d[d.Customer_ID == '10002']

In [8]:
#opportunity journey

#get the won and lost opportunities only (exclude the open ones)
win_lost_opps_df = sf_opp_df[sf_opp_df.Opportunity_Status.isin(["Closed","Closed Won"])]
win_lost_opps_df = win_lost_opps_df[["Close_Date","Customer_/_Prospect_Type","Opportunity_Status","SF_Account_ID","SF_Opportunity_ID"]]
win_lost_opps_df.columns =["Close_Date","Customer_Type","Opp_Status","SF_Account_ID","SF_Opp_ID"]

#get opportunity stages

opp_stages_df = pd.read_excel("../data/QuVa-GAIL DataRequest_6_7_8_Addition2.xlsx","OpportunityStage")
replace_space_in_col_names(opp_stages_df)
opp_stages_df["CreatedDate"] = pd.to_datetime(opp_stages_df.CreatedDate).dt.to_period('D')
opp_stages_df = opp_stages_df.sort_values(by=["OpportunityId","CreatedDate","SystemModstamp"])
#exclude open opportunities
opp_stages_df = opp_stages_df[opp_stages_df.OpportunityId.isin(win_lost_opps_df.SF_Opp_ID)]

#create a dictionary of opps: key is opp_id, value is a pair of close date and the annual amount
#print(opp_annual_amount_dic["0064100000RUwD4AAL"][1])
opp_close_date_annual_amount_dic = sf_opp_df[["SF_Opportunity_ID","Close_Date","Annual_Amount","Opportunity_Status"]].set_index('SF_Opportunity_ID').T.to_dict('list')



In [11]:
opp_stages_sub_df = opp_stages_df[["CreatedDate","StageName","OpportunityId"]].drop_duplicates()
opp_stages_sub_df[opp_stages_sub_df.OpportunityId == "0064100000C5kEvAAJ"]
#opp_close_date_annual_amount_dic["0064100000C5kEvAAJ"]

opp_stages_grouped = dict(list(opp_stages_sub_df.groupby("OpportunityId")))
#dict(list(gb))
opp_funnel_df = pd.DataFrame()
#print(opp_stages_grouped.get_group("0064100000C5kEvAAJ"))
for opp_id in opp_stages_grouped.keys():
    opp_df = opp_stages_grouped[opp_id]
    stages_set = set()
    for stage in opp_df.StageName:
        stages_set.add(stage)

    if (len(stages_set) < 2):
        continue
    opp_path_string = '/'.join(stages_set)    
    opp_amount = opp_close_date_annual_amount_dic[opp_id][1]
    opp_days_gap = opp_close_date_annual_amount_dic[opp_id][0] - opp_df.CreatedDate.min()
    opp_final_status = opp_close_date_annual_amount_dic[opp_id][2]
    opp_path_string = opp_path_string + ("/" + opp_final_status)
    if "New" not in opp_path_string.split("/")[0] and "Lost" not in opp_path_string.split("/")[0]:
        opp_path_string = "01_New/" + opp_path_string

    opp_summary = pd.DataFrame({"opp_id": opp_id,
                                "opp_path_string" : opp_path_string,
                                "opp_amount":opp_amount,
                                "opp_days_gap": opp_days_gap},index=[0])
    opp_funnel_df = opp_funnel_df.append(opp_summary)


In [12]:
nodifier_df = opp_funnel_df.groupby("opp_path_string").agg({"opp_amount":"sum","opp_id":"count","opp_days_gap":"mean"}).reset_index()
nodifier_df = nodifier_df[["opp_path_string", "opp_id","opp_amount","opp_days_gap"]]
nodifier_df["label"] = nodifier_df.apply(lambda x : "{}\n{}@{}\n{}days".format(x["opp_path_string"].split("/")[-1],x["opp_id"],x["opp_amount"],int(x["opp_days_gap"])),axis=1)
nodifier_df.columns=["pathString","count","revenue","daysToAct","label"]
nodifier_df["label"] = nodifier_df["label"].str.replace(" ","_")
nodifier_df["label"] = nodifier_df["label"].str.replace("-","_")

nodifier_df["pathString"] = nodifier_df["pathString"].str.replace(" ","_")
nodifier_df["pathString"] = nodifier_df["pathString"].str.replace("-","_")


nodifier_df.to_csv("Qu_nodifierv1.csv",index=False)
nodifier_df
#list(nodifier_df["opp_path_string"])


Unnamed: 0,pathString,count,revenue,daysToAct,label
0,01_New/20_Needs_Assessment/40_Negotiation/60_F...,1,17445.00,93.000000,Closed_Won\n1@17445.0\n93days
1,01_New/20_Needs_Assessment/60_Fourth_Cycle_Com...,1,37200.00,49.000000,Closed_Won\n1@37200.0\n49days
2,01_New/20_Needs_Assessment/90_Lost/01_New/Closed,4,5962224.00,91.250000,Closed\n4@5962224.0\n91days
3,01_New/20_Needs_Assessment/90_Lost/10_Qualific...,4,141864.00,111.250000,Closed\n4@141864.0\n111days
4,01_New/20_Needs_Assessment/90_Lost/10_Qualific...,3,13200.00,109.333333,Closed\n3@13200.0\n109days
5,01_New/20_Needs_Assessment/90_Lost/25_Crosswal...,1,21762.00,120.000000,Closed\n1@21762.0\n120days
6,01_New/20_Needs_Assessment/90_Lost/25_Crosswal...,2,387613.20,29.500000,Closed\n2@387613.2\n29days
7,01_New/20_Needs_Assessment/90_Lost/55_Onboardi...,1,0.00,36.000000,Closed\n1@0.0\n36days
8,01_New/30_Received_Presentation/50_Contract_Si...,1,3139089.00,173.000000,Closed\n1@3139089.0\n173days
9,01_New/30_Received_Presentation/60_Fourth_Cycl...,1,5832.00,33.000000,Closed_Won\n1@5832.0\n33days
