In [206]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import matplotlib.ticker as mtick
from matplotlib.ticker import FormatStrFormatter
from matplotlib.lines import Line2D

In [207]:
df = pd.read_csv('BuyTitleOfferEval.csv', header=0, index_col=0)

In [208]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 689510 entries, 0 to 689509
Data columns (total 16 columns):
LocationNo                             689510 non-null object
CatalogID                              689510 non-null int64
CatalogBinding                         689510 non-null object
count_ItemsPriced                      689510 non-null int64
count_ItemsSold                        689510 non-null int64
avg_CatalogAccDays_NR                  689510 non-null float64
avg_CatalogAccDays_TrashPenalty_R40    689510 non-null float64
avg_CatalogAccDays_TrashPenalty_R41    689510 non-null float64
actual_AvgSalePrice                    299524 non-null float64
actual_BuyGradeName_R40                689510 non-null object
actual_BuyOfferPct_R40                 689510 non-null float64
actual_BuyOfferAmt_R40                 299524 non-null float64
pred_AvgSalePrice                      689510 non-null float64
pred_BuyOfferPct_R40                   689510 non-null float64
pred_SuggestedOf

In [209]:
df['count_ItemsPriced'].sum()

3062505

In [240]:
df['Date_Generated'].max()

'2019-10-07'

In [210]:
filt_Chain = df['LocationNo'] == 'Chain'
filt_Loc = ~filt_Chain
df_Chain = df[filt_Chain].reset_index()
df_Loc = df[filt_Loc].reset_index()

In [228]:
def create_PredCompDF(df, minSampleNum, binding='', **kwargs):
    list_Bindings = df['CatalogBinding'].unique()
    if (binding in list_Bindings) == False:
        binding = list_Bindings
    else: 
        binding = [binding]
    filt_QtyNPlus= (df['count_ItemsPriced'] >= minSampleNum) & (df['CatalogBinding'].isin(binding))
    df_PredCompUngrouped = df[filt_QtyNPlus].reset_index(drop=True)
    df_PredComp = df_PredCompUngrouped.groupby(['pred_BuyOfferPct_R40', 'actual_BuyOfferPct_R40']).sum()[['count_ItemsPriced', 'count_ItemsSold']].reset_index()
    return df_PredComp

def calc_CatAcc(df, gradeThreshold=0.3, **kwargs):
    filt_EqualGrades = df['pred_BuyOfferPct_R40'] == df['actual_BuyOfferPct_R40']
    filt_GradeThreshold = df['pred_BuyOfferPct_R40'] >= gradeThreshold
    df_AccByGrade = (df[filt_EqualGrades & filt_GradeThreshold]['count_ItemsPriced'].sum() /
                     df[filt_GradeThreshold]['count_ItemsPriced'].sum())
    return df_AccByGrade

def calc_CatAccByGrade(df):
    filt_EqualGrades = df['pred_BuyOfferPct_R40'] == df['actual_BuyOfferPct_R40']
    df_AccByGrade = pd.DataFrame((df[filt_EqualGrades].groupby('pred_BuyOfferPct_R40').sum()['count_ItemsPriced'] /
                     df.groupby('pred_BuyOfferPct_R40').sum()['count_ItemsPriced']))
    df_AccByGrade= df_AccByGrade.merge(df.groupby('pred_BuyOfferPct_R40').sum()['count_ItemsPriced'], on='pred_BuyOfferPct_R40').reset_index()
    df_AccByGrade.rename(columns={'pred_BuyOfferPct_R40': 'Suggested Offer Grade', 'count_ItemsPriced_x': 'Pct Accuracy', 'count_ItemsPriced_y': 'Total Qty'}, inplace=True)
    return df_AccByGrade

def calc_CatPredPctsByGrade(df, gradeThreshold=0, **kwargs):
    df_PredPctsByGrade = df.merge(df.groupby('pred_BuyOfferPct_R40').sum()['count_ItemsPriced'], on='pred_BuyOfferPct_R40')
    df_PredPctsByGrade['pct_ActualGrades'] = df_PredPctsByGrade['count_ItemsPriced_x'] / df_PredPctsByGrade['count_ItemsPriced_y']
    df_PredPctsByGrade.rename(columns={'pred_BuyOfferPct_R40': 'Suggested Offer Grade', 
                                       'actual_BuyOfferPct_R40': 'Actual Grade',
                                       'count_ItemsPriced_x': 'Qty Actual Grades',
                                       'pct_ActualGrades': 'Pct Actual Grades'}, inplace=True)
    filt_GradeThreshold = df_PredPctsByGrade['Suggested Offer Grade'] >= gradeThreshold
    return df_PredPctsByGrade[filt_GradeThreshold][['Suggested Offer Grade', 'Actual Grade', 'Qty Actual Grades', 'Pct Actual Grades']]

In [250]:
df_ChainPredComp = create_PredCompDF(df_Chain, 1)
print(calc_CatAcc(df_ChainPredComp, gradeThreshold=0.3))
calc_CatAccByGrade(df_ChainPredComp)

0.7029134101988702


Unnamed: 0,Suggested Offer Grade,Pct Accuracy,Total Qty
0,0.0,0.007284,701689
1,0.05,0.00879,934715
2,0.1,0.445044,211671
3,0.2,0.519787,874747
4,0.3,0.737418,276447
5,0.4,0.419908,33705


In [251]:
calc_CatPredPctsByGrade(df_ChainPredComp, gradeThreshold=0.2)

Unnamed: 0,Suggested Offer Grade,Actual Grade,Qty Actual Grades,Pct Actual Grades
18,0.2,0.0,825,0.000943
19,0.2,0.05,2904,0.00332
20,0.2,0.1,8221,0.009398
21,0.2,0.2,454682,0.519787
22,0.2,0.3,382022,0.436723
23,0.2,0.4,26093,0.029829
24,0.3,0.0,169,0.000611
25,0.3,0.05,309,0.001118
26,0.3,0.1,3203,0.011586
27,0.3,0.2,45096,0.163127


In [238]:
df_LocPredComp = create_PredCompDF(df_Loc, 1)
calc_CatAccByGrade(df_LocPredComp)

Unnamed: 0,Suggested Offer Grade,Pct Accuracy,Total Qty
0,0.0,0.038927,4547
1,0.05,0.036329,6854
2,0.1,0.483126,1689
3,0.2,0.479772,9813
4,0.3,0.417155,5293
5,0.4,0.574532,1335


In [239]:
df_LocPredComp

Unnamed: 0,pred_BuyOfferPct_R40,actual_BuyOfferPct_R40,count_ItemsPriced,count_ItemsSold
0,0.0,0.0,177,3
1,0.0,0.05,79,7
2,0.0,0.1,754,80
3,0.0,0.2,2495,237
4,0.0,0.3,709,228
5,0.0,0.4,333,75
6,0.05,0.0,276,0
7,0.05,0.05,249,35
8,0.05,0.1,104,11
9,0.05,0.2,3810,421
