In [131]:
import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

plt.style.use("ggplot")


In [132]:
import json



In [133]:
df = pd.read_excel('../data/Data_2020_P06/all_billings_inputs_08_20.xlsx',
                  sheet_name = 'base_billings')

In [134]:
df.rename(
        index=str,
        columns={
            "Contrct Duration in Months": "duration",
            "Document Currency": "curr",
            "Enterprise BU Desc": "BU",
            "Invoice Fiscal Year Period Desc": "period",
            "POB Type": "POB_type",
            "Product Config Type": "config",
            "Rev Rec Category": "rev_req_type",
            "Rule For Bill Date": "rebill_rule",
            "Sales Document Type": "sales_doc",
            "Sales Type": "sales_type",
            "Subscription Term": "sub_term",
            "Completed Sales ( DC )": "DC_amount",
            "Completed Sales": "US_amount",
        },
        inplace=True,
    )


In [135]:
df.head(3)

Unnamed: 0,duration,curr,BU,period,POB_type,config,rev_req_type,rebill_rule,sales_doc,sales_type,sub_term,DC_amount,US_amount
0,0,AUD,Experience Cloud,2017-01,,OCONS,,,ZCC,PRO-SVC-INV,0,375.0,278.01
1,0,AUD,Experience Cloud,2017-01,,ONORE,,,ZCC,RECOGNIZED,0,0.0,45.75
2,0,AUD,Experience Cloud,2017-01,,ONORE,D,YA,ZCC,DEFERRED,0,-684738.0,-512177.81


In [136]:
vc = df["curr"].value_counts()
keep_these = vc.values > 20
keep_curr = vc[keep_these]
list_keepers = keep_curr.index
remove_these = vc[vc.values <= 20].index
model_dict = {"curr_removed": list(vc[remove_these].index)}
delete_curr = list(remove_these)

if "TRY" not in model_dict["curr_removed"]:
    model_dict["curr_removed"].append("TRY")
    delete_curr.append("TRY")
    list_keepers = list_keepers.drop("TRY")

df = df[df["curr"].isin(list_keepers)]

In [137]:
print(len(df))
df = df[df["DC_amount"] != 0]
print(len(df))

293832
286744


In [138]:
model_dict

{'curr_removed': ['BHD',
  'JOD',
  'EGP',
  'OMR',
  'LBP',
  'BMD',
  'AED',
  'MXP',
  'TRY']}

In [139]:
df['POB_type'].value_counts()

RR       73256
IR-NA     7424
IR        4819
BNDL      3604
CR        2956
RR-NA     2308
CR-NA      102
LFB         56
Name: POB_type, dtype: int64

## Grouping by POB Type

The new 606 has everything grouped by sales type. There are blanks
We need to separate into groups based on the POB type


In [140]:
list_IR = ['IR', 'IR-NA', 'LFB']
list_service = ['CR', 'CR-NA']
list_deferred = ['RR', 'RR-NA']
list_hybrid = ['BNDL']

list_all = list_IR + list_service + list_deferred + list_hybrid
print(list_all)

['IR', 'IR-NA', 'LFB', 'CR', 'CR-NA', 'RR', 'RR-NA', 'BNDL']


In [141]:
rec = df[df["POB_type"].isin(list_IR)].copy()
svc = df[df["POB_type"].isin(list_service)].copy()
dfr = df[df["POB_type"].isin(list_deferred)].copy()
hyb = df[df["POB_type"].isin(list_hybrid)].copy()
blank = df[~df["POB_type"].isin(list_all)].copy()

In [142]:
print('rec', len(rec))
print('svc', len(svc))
print('dfr', len(dfr))
print('hyb', len(hyb))
print('blank', len(blank))
print('ALL', len(df))

rec 12299
svc 3058
dfr 75564
hyb 3604
blank 192219
ALL 286744


In [143]:
len(rec)+len(svc)+ len(dfr)+ len(blank)+len(hyb)

286744

In [144]:
gb_rec = rec.groupby(["curr", "BU", "period"], as_index=False).sum()

In [145]:
gb_rec.drop(labels=["duration", "sub_term"] , axis=1, inplace=True)

In [146]:
gb_rec.head(4)

Unnamed: 0,curr,BU,period,DC_amount,US_amount
0,ARS,Creative,2019-07,6786.0,155.12
1,ARS,Creative,2019-08,16472.0,390.02
2,ARS,Creative,2019-09,19205.0,405.51
3,ARS,Creative,2019-10,30382.19,532.13


In [147]:
gb_svc = svc.groupby(["curr", "BU", "period"], as_index=False).sum()

In [148]:
gb_svc

Unnamed: 0,curr,BU,period,duration,sub_term,DC_amount,US_amount
0,AUD,Creative,2017-10,18,0,8544.00,6837.59
1,AUD,Creative,2019-01,12,0,8472.55,6135.45
2,AUD,Creative,2019-02,12,0,3635.13,2587.61
3,AUD,Creative,2019-03,36,24,10437.33,7531.36
4,AUD,Creative,2019-04,24,12,6914.89,4896.03
...,...,...,...,...,...,...,...
576,USD,Print & Publishing,2019-12,22,0,70834.00,70834.00
577,USD,Print & Publishing,2020-01,15,0,35000.00,35000.00
578,USD,Print & Publishing,2020-02,8,0,33333.00,33333.00
579,USD,Print & Publishing,2020-04,8,0,33333.00,33333.00


In [149]:
gb_svc.head(4)

Unnamed: 0,curr,BU,period,duration,sub_term,DC_amount,US_amount
0,AUD,Creative,2017-10,18,0,8544.0,6837.59
1,AUD,Creative,2019-01,12,0,8472.55,6135.45
2,AUD,Creative,2019-02,12,0,3635.13,2587.61
3,AUD,Creative,2019-03,36,24,10437.33,7531.36


# Deferred billings
## Type B Service based/

In [150]:
dfr_b = dfr[dfr["rev_req_type"] == "B"].copy()

In [151]:
print(len(dfr))
print(len(dfr_b))

75564
0


In [152]:
dfr.head(10)

Unnamed: 0,duration,curr,BU,period,POB_type,config,rev_req_type,rebill_rule,sales_doc,sales_type,sub_term,DC_amount,US_amount
15,0,EUR,Experience Cloud,2017-01,RR,ONORE,D,YA,ZCC,DEFERRED,0,117600.0,123853.97
47,1,EUR,Experience Cloud,2017-01,RR,ONORE,D,YA,ZCC,DEFERRED,0,399.0,426.24
48,1,JPY,Experience Cloud,2017-01,RR,ONORE,D,YA,ZCC,DEFERRED,0,72772.0,637.48
55,1,USD,Experience Cloud,2017-01,RR,ONORE,D,Y1,ZCC,DEFERRED,0,1232.5,1232.5
56,1,USD,Experience Cloud,2017-01,RR,ONORE,D,YA,ZCC,DEFERRED,0,18094.74,18094.74
57,1,USD,Experience Cloud,2017-01,RR,ONORE,D,YQ,ZCC,DEFERRED,0,4064.52,4064.52
60,2,EUR,Experience Cloud,2017-01,RR,ONORE,D,YA,ZCC,DEFERRED,0,19485.9,20724.03
65,2,USD,Experience Cloud,2017-01,RR,ONORE,D,YA,ZCC,DEFERRED,0,13850.0,13850.0
69,3,CAD,Experience Cloud,2017-01,RR,ONORE,D,YQ,ZCC,DEFERRED,0,78810.0,58634.64
74,3,EUR,Experience Cloud,2017-01,RR,ONORE,D,YA,ZCC,DEFERRED,0,79251.79,84287.45


In [153]:
dfr.rev_req_type.value_counts()

D    51470
A    23504
Name: rev_req_type, dtype: int64

## Type A Deferred billings

### First sort by config type

### Then use sub term

In [154]:
dfr_a = dfr[dfr["rev_req_type"] == "A"].copy()
print("length of dfr_a", len(dfr_a))
dfr_a.drop(labels='duration', axis=1, inplace=True)
gb_a = dfr_a.groupby(["curr", "BU", "period", "config", 'sub_term'], as_index=False).sum()


length of dfr_a 23504


In [121]:
len(gb_a)

2744

In [122]:
gb_a.head(4)


Unnamed: 0,curr,BU,period,config,sub_term,DC_amount,US_amount
0,AUD,Creative,2017-01,1Y,12,52763.75,39147.16
1,AUD,Creative,2017-01,2Y,0,562.0,417.19
2,AUD,Creative,2017-01,2Y,12,0.0,0.0
3,AUD,Creative,2017-02,1Y,0,11438.0,8498.43


In [123]:
gb_a.config.value_counts()

1Y       1757
2Y        664
OUNIV     112
3Y        102
MTHLY      82
OCONS      14
ONORE      13
Name: config, dtype: int64

In [124]:
gb_a.sub_term.value_counts()

0     1524
12     927
1      184
36     102
24       7
Name: sub_term, dtype: int64

In [125]:
config_type_keepers = ['MTHLY', '1Y', '2Y', '3Y']


In [126]:
gb_a_keepers = gb_a[gb_a["config"].isin(config_type_keepers)].copy()
a_blank_config = gb_a[~gb_a["config"].isin(config_type_keepers)].copy()

In [127]:
print('len gb_a', len(gb_a))
print('gb_a_keepers', len(gb_a_keepers))
print('len a_blank_config', len(a_blank_config))


len gb_a 2744
gb_a_keepers 2605
len a_blank_config 139


In [128]:
gb_a_keepers['config'].value_counts()

1Y       1757
2Y        664
3Y        102
MTHLY      82
Name: config, dtype: int64

In [115]:
gb_a_keepers.head(10)

Unnamed: 0,curr,BU,period,config,sub_term,DC_amount,US_amount
0,AUD,Creative,2017-01,1Y,12,52763.75,39147.16
1,AUD,Creative,2017-01,2Y,0,562.0,417.19
2,AUD,Creative,2017-01,2Y,12,0.0,0.0
3,AUD,Creative,2017-02,1Y,0,11438.0,8498.43
4,AUD,Creative,2017-02,1Y,12,69634.32,51472.25
5,AUD,Creative,2017-02,2Y,12,61992.0,44608.2
6,AUD,Creative,2017-03,1Y,12,81471.66,62593.81
7,AUD,Creative,2017-03,2Y,0,96.0,73.59
8,AUD,Creative,2017-04,1Y,12,67584.71,51860.04
9,AUD,Creative,2017-05,1Y,12,15534.23,11716.53


In [116]:
print('Total USD Equivalent Billings of Type A with bad configs', 
      a_blank_config.US_amount.sum())

Total USD Equivalent Billings of Type A with bad configs 2276183.34


## For the type A billings that we can use, we need to split them apart by sub_term and config
If subterm == 0, then we use the config period. (Assuming these are PUP?)

Otherwise, 12 = annual, 24 = two years, 36 = 3 years

Old code has these as gb_a_1M, gb_a_1Y, gb_a_2Y, gb_a_3Y

Here we will pull out the types from the gb_a_keepers dataframe into a new dataframe and then delete these old records from the gb_a_keepers dataframe

In [117]:
print('start length', len(gb_a_keepers))
df_1M = gb_a_keepers[(gb_a_keepers['config']=='MTHLY') |
                       (gb_a_keepers['sub_term']==1)].copy()

index_1M = df_1M.index


gb_a_keepers.drop(index_1M, inplace=True)

print(df_1M.head(10))
print('len df_1M', len(df_1M))
print(len(gb_a_keepers))


start length 2605
    curr              BU   period config  sub_term  DC_amount  US_amount
69   AUD        Creative  2018-10     1Y         1   -1055.63    -765.06
73   AUD        Creative  2018-11     1Y         1 -101077.16  -72130.89
76   AUD        Creative  2018-11  MTHLY         1  -21963.21  -15688.45
78   AUD        Creative  2018-12     1Y         1  -12507.27   -8897.35
81   AUD        Creative  2018-12  MTHLY         1      32.99      23.88
83   AUD        Creative  2019-01     1Y         1     -89.96     -65.15
193  AUD  Document Cloud  2018-10     1Y         1     -16.99     -12.31
198  AUD  Document Cloud  2018-11     1Y         1    -914.96    -652.99
201  AUD  Document Cloud  2018-11  MTHLY         1    -746.73    -532.24
203  AUD  Document Cloud  2018-12     1Y         1     -16.99     -12.11
len df_1M 185
2420


In [118]:
print(len(gb_a_keepers))

2420


In [119]:
1596+1009


2605

In [90]:
df_1M['sub_term'].value_counts()

12    927
1      81
0       1
Name: sub_term, dtype: int64

In [100]:
df_1M['config'].value_counts()

1Y       748
2Y       179
MTHLY     82
Name: config, dtype: int64