# Testing Deferred Revenue in Python
Will this be easier for everyone to use than Matlab?

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Step 1: Processing Base Billings Data

In [2]:
df = pd.read_excel('../data/old/base_billings.xlsx', sheet_name='bill_DC')

In [3]:
df.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
8379,EUR,Creative,,2016-10,IDRT,,,RECOGNIZED,0,98391.0
27881,USD,Experience Cloud,,2016-08,,D,YA,DEFERRED,0,6480.0
6663,DKK,Creative,,2016-11,MTHLY,A,,DEFERRED,1,-9116.29
15672,GBP,Experience Cloud,ACTL,2017-04,,D,,DEFERRED,0,3068.0
5759,CHF,Experience Cloud,CERT,2015-06,1Y,D,YA,DEFERRED,12,90000.0
1573,AUD,Document Cloud,,2018-12,MTHLY,D,Y3,DEFERRED,1,48432.62
25282,USD,Creative,ONGO,2018-09,,D,,DEFERRED,12,2000711.0
22664,SEK,Document Cloud,,2016-01,MTHLY,D,Y3,DEFERRED,1,38375.22
27822,USD,Experience Cloud,1TME,2016-07,OCONS,A,,DEFERRED,0,15000.0
18479,JPY,Document Cloud,,2016-05,,,,RECOGNIZED,0,229991000.0


## Filter that removes any currency that has  < 10 transactions. 


In [4]:
# creates a list of the currencies and the number of transactions for each currency
vc = df['Document Currency'].value_counts()

In [5]:
# Create variable that is true if the number of transaction is greater than 10, false otherwise
keep_these = vc.values > 10
# filtering only currencies that were greater than 10
keep_curr = vc[keep_these]
a = keep_curr.index
# filtering the dataframe to remove any of teh currencies not in our list
df = df[df['Document Currency'].isin(a)]

## Just keeping track of the currencies we removed in our model_dict data strucutre

In [6]:
remove_these = vc.values <= 10
model_dict = {'curr_removed': list(vc[remove_these].index)}
model_dict

{'curr_removed': ['CLP', 'COP', 'ARS', 'PEN', 'BMD', 'MXP', 'INR']}

# Removing any of the values that are zero

In [7]:
print('This is the length of the dataframe before removing zeros: ', len(df))
df = df[df['Completed Sales Doc Currency']!=0]
print('This is the length of the dataframe after removing zeros: ', len(df))

This is the length of the dataframe before removing zeros:  31418
This is the length of the dataframe after removing zeros:  28896


In [8]:
df.sample(20)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
24450,USD,Creative,1TME,2016-11,ONORE,,,RECOGNIZED,0,602.3
15982,GBP,Experience Cloud,1TME,2018-02,ONORE,,,RECOGNIZED,0,52254.21
28115,USD,Experience Cloud,1TME,2016-11,OENSV,D,,DEFERRED,0,1237.5
30710,USD,Print & Publishing,,2016-06,,,,PRO-SVC-INV,0,108000.0
17165,HKD,Creative,,2018-02,1Y,D,Y3,DEFERRED,1,159020.4
23405,SEK,Experience Cloud,ONGO,2019-03,ONORE,D,,DEFERRED,0,6766757.73
21305,NOK,Document Cloud,,2017-09,,A,,DEFERRED,12,-11858.05
28794,USD,Experience Cloud,ONGO,2017-08,1Y,D,Y1,DEFERRED,0,52468.58
11143,EUR,Experience Cloud,1TME,2017-06,,,,DEFERRED,0,920.0
16730,GBP,Print & Publishing,,2016-08,1Y,D,Y3,DEFERRED,1,26876.6


## Clearing out the Non-Revenue billings from the file
 - No Idea what these are

In [9]:
df['Sales Type'].value_counts()

DEFERRED       22919
RECOGNIZED      5041
PRO-SVC-INV      844
NON-REV           92
Name: Sales Type, dtype: int64

In [10]:
print('Length of the dataframe before removing non-revenue billings: ', len(df))
df = df[df['Sales Type']!='NON-REV']
print('Length of the dataframe after removing non-revenue billings:  ', len(df))


Length of the dataframe before removing non-revenue billings:  28896
Length of the dataframe after removing non-revenue billings:   28804


## Starting to group the revenue by period, industry, etc

Attempting to group by the following categories
 - currency
 - period
 - sale type
 
May need to process the data differently with the deferred billings so we will start with the recognized and then the service billings

# DOING THIS ALL IN PANDAS WITH SPLIT APPLY COMBINE on Sales Type 


In [11]:
# First split the data into three dataframes
# Recognized billings
rec = df[df['Sales Type']=='RECOGNIZED']
svc = df[df['Sales Type']=='PRO-SVC-INV']
dfr = df[df['Sales Type']=='DEFERRED']

In [13]:
rec.columns

Index(['Document Currency', 'Enterprise Bu', 'Frequency',
       'Invoicing Fiscal Year-Period Desc', 'Product Configtype ID',
       'Revenue Recognition Category New', 'Rule For Bill Date', 'Sales Type',
       'Subscription Term', 'Completed Sales Doc Currency'],
      dtype='object')

### Recognized Revenue

# NOW WORKING ON THE REVENUE

In [14]:
rec.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
16843,GBP,Print & Publishing,,2017-04,2V,,,RECOGNIZED,0,2419.0
16643,GBP,Print & Publishing,,2015-12,IDRT,,,RECOGNIZED,0,2085.0
23485,SEK,Other Solutions,,2018-07,,,,RECOGNIZED,0,-274.62
12492,EUR,Other Solutions,,2018-07,,,,RECOGNIZED,0,-5106.18
11834,EUR,Experience Cloud,ONGO,2018-07,ONORE,,,RECOGNIZED,0,5079.6
23882,USD,Creative,,2015-10,2V,,,RECOGNIZED,0,14247.0
7583,DKK,Experience Cloud,1TME,2017-10,ONORE,,,RECOGNIZED,0,53114.86
16703,GBP,Print & Publishing,,2016-06,1V,,,RECOGNIZED,0,4723.0
17407,JPY,Creative,,2015-06,2V,,,RECOGNIZED,0,234100.0
26845,USD,Experience Cloud,,2015-07,,,,RECOGNIZED,0,537576.42


In [15]:
# testing groupby object
gb_rec = rec.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'], as_index=False).sum()

In [28]:
# the Subscription term hangs around. We are dropping that here
gb_rec.drop(labels='Subscription Term', axis=1,inplace =True)

In [29]:
gb_rec

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Completed Sales Doc Currency
0,AUD,Creative,2015-06,490156.86
1,AUD,Creative,2015-07,235045.62
2,AUD,Creative,2015-08,284334.70
3,AUD,Creative,2015-09,369648.88
4,AUD,Creative,2015-10,287292.48
5,AUD,Creative,2015-11,361723.22
6,AUD,Creative,2015-12,276178.28
7,AUD,Creative,2016-01,270306.21
8,AUD,Creative,2016-02,116996.45
9,AUD,Creative,2016-03,235382.84


In [17]:
print(type(gb_rec))

<class 'pandas.core.frame.DataFrame'>


## Now doing this for the service billings

In [21]:
# testing groupby object
gb_svc = svc.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'], as_index=False).sum()

In [30]:
gb_svc.drop(labels='Subscription Term', axis=1,inplace =True)
gb_svc

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Completed Sales Doc Currency
0,AUD,Experience Cloud,2015-06,589499.45
1,AUD,Experience Cloud,2015-07,656493.50
2,AUD,Experience Cloud,2015-08,370780.74
3,AUD,Experience Cloud,2015-09,1251726.13
4,AUD,Experience Cloud,2015-10,291324.07
5,AUD,Experience Cloud,2015-11,770327.50
6,AUD,Experience Cloud,2015-12,700684.00
7,AUD,Experience Cloud,2016-01,528857.50
8,AUD,Experience Cloud,2016-02,367175.25
9,AUD,Experience Cloud,2016-03,974604.14


# NOW WORKING ON DEFERRED BILLINGS

## Type B billings are service agreements that will have invoices submitted before the billings are reclassified to revenue. If no invoices are assigned to the billings, the billings become revenue in 12 months

In [34]:
# filter out the type B first then do a group_by
dfr_b = dfr[dfr['Revenue Recognition Category New']=='B']

In [37]:
gb_b = dfr_b.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'], as_index=False).sum()

In [39]:
gb_b.drop(labels='Subscription Term', axis=1, inplace=True)

In [31]:
dfr.columns

Index(['Document Currency', 'Enterprise Bu', 'Frequency',
       'Invoicing Fiscal Year-Period Desc', 'Product Configtype ID',
       'Revenue Recognition Category New', 'Rule For Bill Date', 'Sales Type',
       'Subscription Term', 'Completed Sales Doc Currency'],
      dtype='object')

In [32]:
print(len(dfr))

22919


In [36]:
print(len(dfr_b))

997


In [40]:
gb_b

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Completed Sales Doc Currency
0,AUD,Creative,2017-10,8544.00
1,AUD,Document Cloud,2017-12,3283.20
2,AUD,Document Cloud,2018-06,3425.00
3,AUD,Document Cloud,2018-12,3425.00
4,AUD,Document Cloud,2019-03,4375.00
5,AUD,Experience Cloud,2015-06,258231.69
6,AUD,Experience Cloud,2015-07,261159.25
7,AUD,Experience Cloud,2015-08,270139.50
8,AUD,Experience Cloud,2015-09,482725.30
9,AUD,Experience Cloud,2015-10,287254.60
