In [None]:
import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

plt.style.use("ggplot")

import json

In [None]:
df = pd.read_excel('../data/Data_2020_P06/all_billings_inputs_08_20.xlsx',
                  sheet_name = 'base_billings')

In [None]:
print('df length: ', len(df))

In [None]:
df.rename(
        index=str,
        columns={
            "Contrct Duration in Months": "duration",
            "Document Currency": "curr",
            "Enterprise BU Desc": "BU",
            "Invoice Fiscal Year Period Desc": "period",
            "POB Type": "POB_type",
            "Product Config Type": "config",
            "Rev Rec Category": "rev_req_type",
            "Rule For Bill Date": "rebill_rule",
            "Sales Document Type": "sales_doc",
            "Sales Type": "sales_type",
            "Subscription Term": "sub_term",
            "Completed Sales ( DC )": "DC_amount",
            "Completed Sales": "US_amount",
        },
        inplace=True,
    )


In [None]:
df.head(3)

In [None]:
vc = df["curr"].value_counts()
keep_these = vc.values > 20
keep_curr = vc[keep_these]
list_keepers = keep_curr.index
remove_these = vc[vc.values <= 20].index
model_dict = {"curr_removed": list(vc[remove_these].index)}
delete_curr = list(remove_these)

if "TRY" not in model_dict["curr_removed"]:
    model_dict["curr_removed"].append("TRY")
    delete_curr.append("TRY")
    list_keepers = list_keepers.drop("TRY")

df = df[df["curr"].isin(list_keepers)]

In [None]:
print(len(df))
df = df[df["DC_amount"] != 0]
print(len(df))

In [None]:
model_dict

In [None]:
df['POB_type'].value_counts(dropna=False)

## Grouping by POB Type

The new 606 has everything grouped by sales type. There are blanks
We need to separate into groups based on the POB type


In [None]:
list_IR = ['IR', 'IR-NA', 'LFB']
list_service = ['CR', 'CR-NA']
list_deferred = ['RR', 'RR-NA']
list_hybrid = ['BNDL']

list_all = list_IR + list_service + list_deferred + list_hybrid
print(list_all)

In [None]:
rec = df[df["POB_type"].isin(list_IR)].copy()
svc = df[df["POB_type"].isin(list_service)].copy()
dfr = df[df["POB_type"].isin(list_deferred)].copy()
hyb = df[df["POB_type"].isin(list_hybrid)].copy()
blank = df[~df["POB_type"].isin(list_all)].copy()

In [None]:
print('rec', len(rec))
print('svc', len(svc))
print('dfr', len(dfr))
print('hyb', len(hyb))
print('blank', len(blank))
print('ALL', len(df))

In [None]:
len(rec)+len(svc)+ len(dfr)+ len(blank)+len(hyb)

In [None]:
gb_rec = rec.groupby(["curr", "BU", "period"], as_index=False).sum()

In [None]:
gb_rec.drop(labels=["duration", "sub_term"] , axis=1, inplace=True)

In [None]:
gb_rec.head(4)

In [None]:
gb_svc = svc.groupby(["curr", "BU", "period"], as_index=False).sum()

In [None]:
gb_svc

In [None]:
gb_svc.head(4)

# Deferred billings
## Type B Service based/

In [None]:
dfr_b = dfr[dfr["rev_req_type"] == "B"].copy()

In [None]:
print(len(dfr))
print(len(dfr_b))

In [None]:
dfr.head(10)

In [None]:
dfr.rev_req_type.value_counts(dropna=False)

## Type A Deferred billings

### First sort by config type

### Then use sub term

In [None]:
dfr_a = dfr[dfr["rev_req_type"] == "A"].copy()
print("length of dfr_a", len(dfr_a))
dfr_a.drop(labels='duration', axis=1, inplace=True)
gb_a = dfr_a.groupby(["curr", "BU", "period", "config", 'sub_term'], as_index=False).sum()


In [None]:
len(gb_a)

In [None]:
gb_a.head(4)


In [None]:
gb_a.config.value_counts(dropna=False)

In [None]:
gb_a.sub_term.value_counts(dropna=False)

In [None]:
config_type_keepers = ['MTHLY', '1Y', '2Y', '3Y']


In [None]:
gb_a_keepers = gb_a[gb_a["config"].isin(config_type_keepers)].copy()
a_bad_config = gb_a[~gb_a["config"].isin(config_type_keepers)].copy()

In [None]:
print('len gb_a', len(gb_a))
print('gb_a_keepers', len(gb_a_keepers))
print('len a_blank_config', len(a_bad_config))


In [None]:
gb_a_keepers['config'].value_counts(dropna=False)

In [None]:
gb_a_keepers['sub_term'].value_counts(dropna=False)

In [None]:
gb_a_keepers.head(10)

In [None]:
print('Total USD Equivalent Billings of Type A with bad configs', 
      a_bad_config.US_amount.sum())

## For the type A billings that we can use, we need to split them apart by sub_term and config
If subterm == 0, then we use the config period. (Assuming these are PUP?)

Otherwise, 12 = annual, 24 = two years, 36 = 3 years

Old code has these as gb_a_1M, gb_a_1Y, gb_a_2Y, gb_a_3Y

Here we will pull out the types from the gb_a_keepers dataframe into a new dataframe and then delete these old records from the gb_a_keepers dataframe

In [None]:
print('start length', len(gb_a_keepers))
df_1M = gb_a_keepers[(gb_a_keepers['config']=='MTHLY') |
                       (gb_a_keepers['sub_term']==1)].copy()

index_1M = df_1M.index


gb_a_keepers.drop(index_1M, inplace=True)

print(df_1M.head(10))
print('len df_1M', len(df_1M))
print(len(gb_a_keepers))


In [None]:
print(len(gb_a_keepers))

In [None]:
#df_1M['sub_term'].value_counts(dropna=False)
df_1M['config'].value_counts(dropna=False)

In [None]:
gb_a_keepers.head(10)

##### Dealing with the 1 year rebillings

In [None]:
print('start length', len(gb_a_keepers))
df_1Y = gb_a_keepers[(gb_a_keepers['sub_term']==12) | 
                       ((gb_a_keepers['sub_term']==0) &
                        (gb_a_keepers['config']=='1Y'))].copy()

index_1Y = df_1Y.index


gb_a_keepers.drop(index_1Y, inplace=True)

print(df_1Y.head(10))
print('len df_1Y', len(df_1Y))
print(len(gb_a_keepers))

In [None]:
df_2Y = gb_a_keepers[gb_a_keepers['config']=='2Y'].copy()
df_3Y = gb_a_keepers[gb_a_keepers['config']=='3Y'].copy()

print('length gb_a_keepers', len(gb_a_keepers))
print('length df_2Y', len(df_2Y))
print('df_3Y', len(df_3Y))

In [None]:
test2 = gb_a_keepers[gb_a_keepers['config']=='3Y']
test2.sub_term.value_counts()

In [None]:
df_1Y.config.value_counts(dropna=False)

In [None]:
gb_a_keepers.sample(50)