In [1]:
# read in and examine data
import pandas as pd

timeserie_df = pd.read_csv("timeseries.csv")

# fixing the index
timeserie_df = timeserie_df.set_index("time")
timeserie_df.index = pd.to_datetime(timeserie_df.index)

timeserie_df.head()

Unnamed: 0_level_0,ATT__avg_cust_sent,ATT__cust_sent_improvement,ATT__comp_resp_timehrs,ATT__issue_was_resolved,BoostCare__avg_cust_sent,BoostCare__cust_sent_improvement,BoostCare__comp_resp_timehrs,BoostCare__issue_was_resolved,TMobileHelp__avg_cust_sent,TMobileHelp__cust_sent_improvement,...,VerizonSupport__comp_resp_timehrs,VerizonSupport__issue_was_resolved,sprintcare__avg_cust_sent,sprintcare__cust_sent_improvement,sprintcare__comp_resp_timehrs,sprintcare__issue_was_resolved,indavg__issue_was_resolved,indavg__avg_cust_sent,indavg__cust_sent_improvement,indavg__comp_resp_timehrs
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-10-05 01:00:00,-0.240121,-0.073483,127.366926,0.147465,-0.147979,-0.194545,12.188163,0.124167,-0.180056,-0.145045,...,215.027747,0.144137,-0.218992,-0.134986,118.042006,0.155786,0.180541,-0.171887,-0.074546,111.041849
2017-10-05 02:00:00,-0.240705,-0.073566,127.452215,0.147882,-0.148063,-0.194628,12.198046,0.125417,-0.180083,-0.144584,...,215.02855,0.144762,-0.219508,-0.135284,118.042247,0.156381,0.181268,-0.172144,-0.074439,111.051205
2017-10-05 03:00:00,-0.241517,-0.074128,127.503188,0.14809,-0.147896,-0.194462,12.211114,0.127083,-0.180302,-0.144525,...,215.028822,0.144762,-0.220445,-0.135254,118.047464,0.156679,0.182124,-0.172426,-0.074394,111.058236
2017-10-05 04:00:00,-0.242559,-0.07517,127.519845,0.14809,-0.147479,-0.194045,12.227365,0.129167,-0.180813,-0.144492,...,215.028955,0.145456,-0.221357,-0.135463,118.049308,0.156679,0.18284,-0.172841,-0.074201,111.063357
2017-10-05 05:00:00,-0.243913,-0.074128,127.534111,0.14809,-0.147063,-0.193628,12.241876,0.13125,-0.181163,-0.143803,...,215.029076,0.147123,-0.222607,-0.136713,118.049532,0.156679,0.183766,-0.17328,-0.073748,111.07028


In [2]:
# function to average metric over some period

from scipy.stats import percentileofscore
from ranking import Ranking
import numpy as np

# keeping only relevant times
histstr2timedel_d = {"2 months": pd.Timedelta(days = 62), 
                     "2 weeks": pd.Timedelta(days = 14)}

# function to get ranks
def get_ranks(li):

    # get descending list
    li = np.array(li)
    desc_ixs = (-li).argsort()
    li_desc = li[desc_ixs]

    ## mapping from desc list to original list
    reg_ixs = range(len(li))
    # this will sort reg_ix by desc_ix
    desc_reg_ix_tups = sorted(zip(desc_ixs, reg_ixs))
    undo_desc_ixs = [t[1] for t in desc_reg_ix_tups]

    # ranking
    ranks = np.array(
        [tup[0] for tup in Ranking(li_desc)])[undo_desc_ixs] + 1
    
    return ranks

# function for averageness
def get_averageness(li):
    li = np.array(li)
    mean = li.mean()
    std = li.std()
    
    # set all to average
    avstrs = np.array(["average" for _ in li], dtype = "<U20")
    
    # find below / above averages
    avstrs[li < mean - std/2] = "below average"
    avstrs[li > mean + std/2] = "above average"
    
    return avstrs

def get_statmetdf(ts_df, metric, histstr,
    histstr2timedel_d = histstr2timedel_d):
    
    # keep only relevan period
    ts_df = ts_df.loc[
        ts_df.index[-1] - histstr2timedel_d[histstr] : None]

    # get the averages
    ts_met = ts_df[[c for c in list(ts_df) if 
                    metric in c
                    and "indavg" not in c]]
    met_avgs = ts_met.values.mean(axis = 0)
    met_avg_ser = pd.Series(met_avgs, index = list(ts_met))

    # creating df with raw metric vals
    statmetric_df = met_avg_ser.to_frame()
    statmetric_df = statmetric_df.rename(columns={0: metric})

    # fixing index
    statmetric_df = statmetric_df.sort_index()
    ixs = statmetric_df.index
    statmetric_df.index = [ix[None:ix.index("__")] for ix in ixs]
    statmetric_df.index.name = "company"

    # keeping metvals for reuse
    metvals = statmetric_df[metric].values

    # get rank
    ranks = get_ranks(metvals)
    statmetric_df[metric + "_rank"] = ranks
    
    # averageness
    statmetric_df[metric + "_averageness"] = get_averageness(metvals)
    
    # prepending all colnames with histstr
    hsprepend = "_".join(histstr.split())
    statmetric_df = statmetric_df.rename(columns = dict([
        (c, hsprepend + "_" + c) for c in list(statmetric_df)
        ]))
    
    return statmetric_df

get_statmetdf(timeserie_df, "cust_sent_improvement", "2 months")

Unnamed: 0_level_0,2_months_cust_sent_improvement,2_months_cust_sent_improvement_rank,2_months_cust_sent_improvement_averageness
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ATT,-0.129226,4,below average
BoostCare,-0.150508,6,below average
TMobileHelp,-0.142019,5,below average
USCellularCares,0.025701,1,above average
VMUcare,-0.08491,3,average
VerizonSupport,0.020175,2,above average
sprintcare,-0.172914,7,below average


In [3]:
# creating the statmet_df for both metrics and periods

# getting dfs for both metrics and both periods
metrics = ["issue_was_resolved", "cust_sent_improvement"]
histstrs = ["2 months", "2 weeks"]
statmet_dfs = []
for metric in metrics:
    for hs in histstrs:
        statmet_dfs.append(get_statmetdf(timeserie_df, metric, hs))
statmet_df = pd.concat(statmet_dfs, axis = "columns")

def get_combined_metrics(hspre, statmet_df = statmet_df):
    
    # getting percentiles
    resolved_scores = statmet_df[hspre + "_issue_was_resolved"]
    resolved_percs = resolved_scores.apply(
        lambda score: percentileofscore(resolved_scores, score))
    sentrise_scores = statmet_df[hspre + "_cust_sent_improvement"]
    sentrise_percs = sentrise_scores.apply(
        lambda score: percentileofscore(sentrise_scores, score))

    # weighted sum of percentiles
    percsums = (resolved_percs + sentrise_percs*2).round(2)

    # ranking percsums
    ranks = get_ranks(percsums)
    
    # getting averageness
    avstrs = get_averageness(percsums)
    
    return {"ranks": ranks, "avstrs": avstrs}

# assingning overall metrics
for histstr in histstrs:
    hspre = "_".join(histstr.split())
    combined_metrics = get_combined_metrics(hspre)
    statmet_df[hspre + "_rank"] = combined_metrics["ranks"]
    statmet_df[hspre + "_averageness"] = combined_metrics["avstrs"]
    
# saving
statmet_df.to_csv("static_metrics.csv")

# loading
statmet_df = pd.read_csv("static_metrics.csv").set_index("company")
statmet_df

Unnamed: 0_level_0,2_months_issue_was_resolved,2_months_issue_was_resolved_rank,2_months_issue_was_resolved_averageness,2_weeks_issue_was_resolved,2_weeks_issue_was_resolved_rank,2_weeks_issue_was_resolved_averageness,2_months_cust_sent_improvement,2_months_cust_sent_improvement_rank,2_months_cust_sent_improvement_averageness,2_weeks_cust_sent_improvement,2_weeks_cust_sent_improvement_rank,2_weeks_cust_sent_improvement_averageness,2_months_rank,2_months_averageness,2_weeks_rank,2_weeks_averageness
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
ATT,0.235414,6,below average,0.230792,6,below average,-0.129226,4,below average,-0.159731,7,below average,5,average,7,below average
BoostCare,0.303725,1,above average,0.293857,1,above average,-0.150508,6,below average,-0.126747,3,average,4,average,2,above average
TMobileHelp,0.240491,5,average,0.241808,5,below average,-0.142019,5,below average,-0.144988,5,below average,6,below average,5,below average
USCellularCares,0.265588,3,average,0.274275,3,above average,0.025701,1,above average,-0.014563,2,above average,1,above average,2,above average
VMUcare,0.270162,2,above average,0.285356,2,above average,-0.08491,3,average,-0.147796,6,below average,2,above average,4,average
VerizonSupport,0.2472,4,average,0.266637,4,average,0.020175,2,above average,0.034308,1,above average,2,above average,1,above average
sprintcare,0.19883,7,below average,0.217148,7,below average,-0.172914,7,below average,-0.140022,4,below average,7,below average,5,below average
