# Testing Deferred Revenue in Python
Will this be easier for everyone to use than Matlab?

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Step 1: Processing Base Billings Data

In [2]:
df = pd.read_excel('../data/old/base_billings.xlsx', sheet_name='bill_DC')

In [3]:
df.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
12073,EUR,Experience Cloud,ONGO,2018-12,1Y,D,YQ,DEFERRED,12,360238.19
1445,AUD,Document Cloud,,2018-06,1Y,D,Y3,DEFERRED,1,282174.17
2575,AUD,Experience Cloud,ONGO,2018-03,ONORE,D,,DEFERRED,0,2205143.7
26433,USD,Document Cloud,,2018-05,1Y,A,,DEFERRED,1,-5562.88
27815,USD,Experience Cloud,1TME,2016-07,,A,,DEFERRED,0,2500.0
712,AUD,Creative,,2018-09,1Y,A,,DEFERRED,12,4473476.11
20726,NOK,Creative,,2016-06,1Y,D,Y3,DEFERRED,1,8041527.48
6550,DKK,Creative,,2015-11,1V,,,RECOGNIZED,0,2230.0
19030,JPY,Experience Cloud,,2015-06,1Y,,,RECOGNIZED,0,0.0
17460,JPY,Creative,,2015-09,1Y,,,DEFERRED,12,0.0


## Filter that removes any currency that has  < 10 transactions. 


In [4]:
# creates a list of the currencies and the number of transactions for each currency
vc = df['Document Currency'].value_counts()

In [5]:
# Create variable that is true if the number of transaction is greater than 10, false otherwise
keep_these = vc.values > 10
# filtering only currencies that were greater than 10
keep_curr = vc[keep_these]
a = keep_curr.index
# filtering the dataframe to remove any of teh currencies not in our list
df = df[df['Document Currency'].isin(a)]

## Just keeping track of the currencies we removed in our model_dict data strucutre

In [6]:
remove_these = vc.values <= 10
model_dict = {'curr_removed': list(vc[remove_these].index)}
model_dict

{'curr_removed': ['CLP', 'COP', 'ARS', 'PEN', 'INR', 'BMD', 'MXP']}

# Removing any of the values that are zero

In [7]:
print('This is the length of the dataframe before removing zeros: ', len(df))
df = df[df['Completed Sales Doc Currency']!=0]
print('This is the length of the dataframe after removing zeros: ', len(df))

This is the length of the dataframe before removing zeros:  31418
This is the length of the dataframe after removing zeros:  28896


In [8]:
df.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
6667,DKK,Creative,,2016-12,1Y,A,,DEFERRED,1,43045.71
16923,GBP,Print & Publishing,,2017-10,2V,,,RECOGNIZED,0,4740.0
9940,EUR,Document Cloud,,2019-01,,D,,DEFERRED,12,-21953.58
29953,USD,Experience Cloud,ONGO,2019-01,1Y,D,YC,DEFERRED,0,1285.2
14551,GBP,Document Cloud,,2017-09,MTHLY,A,,DEFERRED,1,-1543.98
2973,AUD,LiveCycle,,2017-10,,,,RECOGNIZED,0,3537.25
4179,CAD,Experience Cloud,ACTL,2017-01,ONORE,,,RECOGNIZED,0,25578.56
1389,AUD,Document Cloud,,2018-03,1Y,A,,DEFERRED,1,818.29
5964,CHF,Experience Cloud,ONGO,2017-01,,D,YA,DEFERRED,0,27677.63
15797,GBP,Experience Cloud,1TME,2017-08,OCONS,,,PRO-SVC-INV,0,982578.27


## Clearing out the Non-Revenue billings from the file
 - No Idea what these are

In [9]:
df['Sales Type'].value_counts()

DEFERRED       22919
RECOGNIZED      5041
PRO-SVC-INV      844
NON-REV           92
Name: Sales Type, dtype: int64

In [10]:
print('Length of the dataframe before removing non-revenue billings: ', len(df))
df = df[df['Sales Type']!='NON-REV']
print('Length of the dataframe after removing non-revenue billings:  ', len(df))


Length of the dataframe before removing non-revenue billings:  28896
Length of the dataframe after removing non-revenue billings:   28804


## Starting to group the revenue by period, industry, etc

Attempting to group by the following categories
 - currency
 - period
 - sale type
 
May need to process the data differently with the deferred billings so we will start with the recognized and then the service billings

# DOING THIS ALL IN PANDAS WITH SPLIT APPLY COMBINE on Sales Type 


In [11]:
# First split the data into three dataframes
# Recognized billings
rec = df[df['Sales Type']=='RECOGNIZED']
svc = df[df['Sales Type']=='PRO-SVC-INV']
dfr = df[df['Sales Type']=='DEFERRED']

In [12]:
rec.columns

Index(['Document Currency', 'Enterprise Bu', 'Frequency',
       'Invoicing Fiscal Year-Period Desc', 'Product Configtype ID',
       'Revenue Recognition Category New', 'Rule For Bill Date', 'Sales Type',
       'Subscription Term', 'Completed Sales Doc Currency'],
      dtype='object')

# NOW WORKING ON THE BILLINGS

### Recognized Revenue

In [13]:
rec.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
5351,CHF,Document Cloud,,2016-03,,,,RECOGNIZED,0,38761.0
21929,NZD,Experience Cloud,ACTL,2016-07,ONORE,,,RECOGNIZED,0,442456.09
4033,CAD,Experience Cloud,1TME,2016-01,ONORE,,,RECOGNIZED,0,2426.19
23479,SEK,Other Solutions,,2018-02,,,,RECOGNIZED,0,-1028.75
25375,USD,Creative,,2018-12,1Y,,,RECOGNIZED,0,37.5
12215,EUR,Experience Cloud,ONGO,2019-03,,,,RECOGNIZED,0,2686460.79
6211,CHF,Experience Cloud,ONGO,2019-01,,,,RECOGNIZED,0,14653.8
16102,GBP,Experience Cloud,1TME,2018-06,,,,RECOGNIZED,0,12473.9
3827,CAD,Creative,ONGO,2019-03,,,,RECOGNIZED,0,406.72
13094,EUR,Print & Publishing,,2018-07,IDRT,,,RECOGNIZED,0,4050.0


In [14]:
# testing groupby object
gb_rec = rec.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'], as_index=False).sum()

In [15]:
# the Subscription term hangs around. We are dropping that here
gb_rec.drop(labels='Subscription Term', axis=1,inplace =True)

In [16]:
gb_rec.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Completed Sales Doc Currency
1577,NOK,Creative,2016-10,173385.0
853,EUR,Experience Cloud,2016-07,5221595.93
1124,GBP,Experience Cloud,2016-03,2368962.32
812,EUR,Document Cloud,2016-12,5793089.98
1055,GBP,Creative,2018-02,270417.31
50,AUD,Document Cloud,2015-10,739769.0
1238,GBP,Other Solutions,2018-09,-1170.56
1611,NOK,Document Cloud,2015-10,99360.0
84,AUD,Document Cloud,2018-08,374287.0
1987,SEK,Print & Publishing,2015-09,24560.0


In [17]:
print(type(gb_rec))

<class 'pandas.core.frame.DataFrame'>


## Now doing this for the service billings

In [18]:
# testing groupby object
gb_svc = svc.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'], as_index=False).sum()

In [19]:
gb_svc.drop(labels='Subscription Term', axis=1,inplace =True)
gb_svc.head(5)

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Completed Sales Doc Currency
0,AUD,Experience Cloud,2015-06,589499.45
1,AUD,Experience Cloud,2015-07,656493.5
2,AUD,Experience Cloud,2015-08,370780.74
3,AUD,Experience Cloud,2015-09,1251726.13
4,AUD,Experience Cloud,2015-10,291324.07


# NOW WORKING ON DEFERRED BILLINGS

## Type B billings are service agreements that will have invoices submitted before the billings are reclassified to revenue. If no invoices are assigned to the billings, the billings become revenue in 12 months

In [20]:
# filter out the type B first then do a group_by
dfr_b = dfr[dfr['Revenue Recognition Category New']=='B']

In [21]:
gb_b = dfr_b.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'], as_index=False).sum()

In [22]:
gb_b.drop(labels='Subscription Term', axis=1, inplace=True)

In [23]:
gb_b.head(10)

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Completed Sales Doc Currency
0,AUD,Creative,2017-10,8544.0
1,AUD,Document Cloud,2017-12,3283.2
2,AUD,Document Cloud,2018-06,3425.0
3,AUD,Document Cloud,2018-12,3425.0
4,AUD,Document Cloud,2019-03,4375.0
5,AUD,Experience Cloud,2015-06,258231.69
6,AUD,Experience Cloud,2015-07,261159.25
7,AUD,Experience Cloud,2015-08,270139.5
8,AUD,Experience Cloud,2015-09,482725.3
9,AUD,Experience Cloud,2015-10,287254.6


In [24]:
print('length of deferred billings : ', len(dfr))
print('length of the type B billings: ', len(dfr_b))

length of deferred billings :  22919
length of the type B billings:  997


## Now Type A Billings
These billings are on a billing plan. The product config tells us how long before they renew

 - '2Y' = 24 months
 - '1Y' = 12 months
 - 'MTHLY' = 1 month
 
NOTE: There are also other fields in the 'Product Configtype ID' field that do not map well to a rebill period.
To fix this, we need to load up a different file and determine the length of the sales contract (type A no config)
 

In [25]:
# filtering just the type A billings
dfr_a = dfr[dfr['Revenue Recognition Category New']=='A']

In [26]:
gb_a = dfr_a.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc',
                     'Product Configtype ID'], as_index=False).sum()
gb_a.drop(labels='Subscription Term', axis=1, inplace = True)

In [27]:
gb_a.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Completed Sales Doc Currency
1646,GBP,Creative,2015-10,MTHLY,-3143.25
2290,JPY,Document Cloud,2016-03,MTHLY,-32671.0
37,AUD,Creative,2016-06,2Y,5198.0
388,AUD,Print & Publishing,2016-07,2Y,5823.0
1310,EUR,Document Cloud,2017-06,1Y,6304611.38
650,CHF,Creative,2016-07,MTHLY,-1597.52
645,CHF,Creative,2016-05,1Y,-15939.28
178,AUD,Document Cloud,2016-09,1Y,1070717.01
1580,EUR,Print & Publishing,2017-01,1Y,351934.7
3375,USD,Experience Cloud,2016-11,MTHLY,15195.6


In [28]:
gb_a['Product Configtype ID'].value_counts()

1Y       1856
MTHLY     823
2Y        657
OUNIV     171
OCONS      62
3Y         46
ONORE      28
Name: Product Configtype ID, dtype: int64

### Below is just a check to see how large the billing types are across all periods

In [29]:
gb_a_config = gb_a.groupby(['Product Configtype ID'], as_index=False).sum()
gb_a_config

Unnamed: 0,Product Configtype ID,Completed Sales Doc Currency
0,1Y,64716120000.0
1,2Y,197366000.0
2,3Y,87875110.0
3,MTHLY,-19824270.0
4,OCONS,28837430.0
5,ONORE,2258881.0
6,OUNIV,3328696.0


### These 'OCONS', 'ONORE' and 'OUNIV' data types are not actual product config IDs so we have to get them from a different data file. We are excluding these types below.

In [30]:
config_list = ['1Y', '2Y', '3Y', 'MTHLY']
test1 = gb_a['Product Configtype ID'].isin(config_list)
sum(test1)

3382

In [31]:
test1 = gb_a[gb_a['Product Configtype ID'].isin(config_list)]

## For now, lets just split this into gb_a_1Y, gb_a_2Y, gb_a_3y, gb_a_1M


In [32]:
gb_a_1Y = test1[test1['Product Configtype ID']=='1Y']
gb_a_2Y = test1[test1['Product Configtype ID']=='2Y']
gb_a_3Y = test1[test1['Product Configtype ID']=='3Y']
gb_a_1M = test1[test1['Product Configtype ID']=='MTHLY']

In [33]:
print('this is the lenght of type A 1M billings: ', len(gb_a_1M))
print('this is the lenght of type A 1Y billings: ', len(gb_a_1Y))
print('this is the lenght of type A 2Y billings: ', len(gb_a_2Y))
print('this is the lenght of type A 3Y billings: ', len(gb_a_3Y))

this is the lenght of type A 1M billings:  823
this is the lenght of type A 1Y billings:  1856
this is the lenght of type A 2Y billings:  657
this is the lenght of type A 3Y billings:  46


# TYPE D billings
These billings have a field 'Rule For Bill Date' that determines when new billings will occur
 - Monthly [Y1, Y2, Y3, Y5]
 - Quarterly [YQ]
 - Every 4 months [YT]
 - Semi-annual [YH]
 - Annual [YA, YC]
 - Every 2 years - [Y4]
 
 We also need to track the type D billings that do not have a 'Rule for Bill Date'

In [34]:
# for now just do a groupby on the type
# filtering just the type A billings
dfr_d = dfr[dfr['Revenue Recognition Category New']=='D']

In [35]:
gb_d = dfr_d.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc',
                     'Rule For Bill Date'], as_index=False).sum()
gb_d.drop(labels='Subscription Term', axis=1, inplace = True)

In [36]:
gb_d_mthly = gb_d[gb_d['Rule For Bill Date'].isin(['Y1', 'Y2', 'Y3', 'Y5'])]
gb_d_qtrly = gb_d[gb_d['Rule For Bill Date']=='YQ']
gb_d_four_mths = gb_d[gb_d['Rule For Bill Date']=='YT']
gb_d_semi_ann = gb_d[gb_d['Rule For Bill Date']=='YH']
gb_d_annual = gb_d[gb_d['Rule For Bill Date'].isin(['YA', 'YC'])]
gb_d_two_yrs = gb_d[gb_d['Rule For Bill Date']=='Y4']

In [37]:
gb_d['Rule For Bill Date'].value_counts()

YA    1630
Y3    1438
YQ     530
Y1     256
YC     246
YH     243
Y2      95
Y4      23
Y7      18
YT       8
BT       1
Name: Rule For Bill Date, dtype: int64

### Note: There is one type D billings that is listed as 'BT' I don't know what this means, but it was a $180 EUR payment from January 2017, so we will ignore

In [38]:
print('Length of monthly', len(gb_d_mthly))
print('Length of quarterly', len(gb_d_qtrly))
print('Length of four months', len(gb_d_four_mths))
print('Length of semi ann', len(gb_d_semi_ann))
print('Length of annual', len(gb_d_annual))
print('Length of two years', len(gb_d_two_yrs))

Length of monthly 1789
Length of quarterly 530
Length of four months 8
Length of semi ann 243
Length of annual 1876
Length of two years 23


In [39]:
what_is_BT = gb_d[gb_d['Rule For Bill Date']=='YT']
what_is_BT.head(10)

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Rule For Bill Date,Completed Sales Doc Currency
4054,USD,Experience Cloud,2015-06,YT,11666.6
4068,USD,Experience Cloud,2015-08,YT,549861.11
4081,USD,Experience Cloud,2015-10,YT,11666.6
4101,USD,Experience Cloud,2016-01,YT,612916.6
4106,USD,Experience Cloud,2016-02,YT,-601250.0
4166,USD,Experience Cloud,2016-12,YT,13440.0
4190,USD,Experience Cloud,2017-04,YT,13440.0
4214,USD,Experience Cloud,2017-08,YT,13440.0


## Note: There are only 8 payments (back from 2017) that paid on a 4 month basis. 
We will ignore these as well.

## NOW WE NEED TO BUILD A DATAFRAME THAT INTEGRATES THIS DATA

- We will have the following descriptive fields
   - Invoicing Fiscal Year-Period
   - Document Currency
   - Enterprise BU

- We will have the following fields based on rebilling rule
   - Recognized
   - Service
   - Monthly
   - Quarterly
   - Annual
   - Two Years
   - Three Years

In [40]:
# We need to do it this way when we get to a .py file!
list_df = [gb_rec, gb_svc, gb_b,
        gb_a_1M,    gb_a_1Y,    gb_a_2Y,       gb_a_3Y, 
        gb_d_mthly, gb_d_qtrly, gb_d_semi_ann, gb_d_annual, gb_d_two_yrs]

list_columns = ['recognized', 'service', 'deferred_B', 
    'deferred_1M_a', 'deferred_1Y_a', 'deferred_2Y_a', 'deferred_3Y_a',
    'deferred_1M_d', 'deferred_3M_d', 'deferred_6M_d', 'deferred_1Y_d', 'deferred_2Y_d']


In [41]:
def merge_new_dataframe(old_df, new_df, new_column):
    df_merged = pd.merge(old_df, new_df, how='outer', 
                     left_on=['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'],
                    right_on=['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'])
    df_merged.rename(index=str, columns={'Completed Sales Doc Currency': new_column}, inplace=True)
    
    #need to drop the product configtype id for merges where the new_df is of type A
    config_str = 'Product Configtype ID'
    rule_str = 'Rule For Bill Date'
    if config_str in df_merged.columns:
        df_merged.drop(columns=['Product Configtype ID'], inplace=True)
    
    if rule_str in df_merged.columns:
        df_merged.drop(columns=['Rule For Bill Date'], inplace=True)
        
    return df_merged

In [42]:
def merge_all_dataframes(list_df, list_columns):
    for i, df in enumerate(list_df):
        print('This is i:', i)
        #print("This is the df: ", df.head())
        print('referencing the column: ', list_columns[i])

        if i==0:
            df_merged = list_df[0]
            df_merged.rename(index=str, columns={'Completed Sales Doc Currency': list_columns[i]}, inplace=True)
        else:
            df_merged = merge_new_dataframe(df_merged, df, list_columns[i])

    return df_merged
    

In [43]:
df = merge_all_dataframes(list_df, list_columns)

This is i: 0
referencing the column:  recognized
This is i: 1
referencing the column:  service
This is i: 2
referencing the column:  deferred_B
This is i: 3
referencing the column:  deferred_1M_a
This is i: 4
referencing the column:  deferred_1Y_a
This is i: 5
referencing the column:  deferred_2Y_a
This is i: 6
referencing the column:  deferred_3Y_a
This is i: 7
referencing the column:  deferred_1M_d
This is i: 8
referencing the column:  deferred_3M_d
This is i: 9
referencing the column:  deferred_6M_d
This is i: 10
referencing the column:  deferred_1Y_d
This is i: 11
referencing the column:  deferred_2Y_d


In [44]:
df.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,recognized,service,deferred_B,deferred_1M_a,deferred_1Y_a,deferred_2Y_a,deferred_3Y_a,deferred_1M_d,deferred_3M_d,deferred_6M_d,deferred_1Y_d,deferred_2Y_d
2632,USD,Experience Cloud,2017-06,11702632.45,12994334.68,4917375.84,,9690894.32,4288.0,,38670.0,10996079.73,2189860.93,-8278.83,
2053,RUB,Creative,2018-10,39611.0,,,-123173.52,-494015.37,,,29252186.16,,,3203173.81,
797,EUR,Creative,2016-06,2776380.25,,,-22536.11,25823658.82,349576.2,,20243677.67,20684.71,,293.36,
1103,EUR,Other Solutions,2017-11,-3445.3,,,,,,,,,,,
429,CHF,Document Cloud,2017-12,2090.0,,,-281.67,-368.93,,,39144.49,,,80026.32,
1179,GBP,Creative,2016-06,358940.39,,,-15003.05,4061860.37,26989.18,,8279669.61,,,2322822.16,
3092,CAD,Creative,2016-02,,,,,,,,,,,2469.44,
2498,USD,Document Cloud,2016-12,23227319.43,,30593.2,-7816.41,24092110.55,487470.0,,3827904.61,192191.4,,29944098.34,
1939,NOK,Experience Cloud,2016-09,260199.34,218078.5,45000.0,,,,,,491640.0,,800202.0,
711,DKK,LiveCycle,2016-04,3560.0,,,,,,,,,,,


In [45]:
def clean_df_columns(df):
    
    # clean up NaNs before adding 
    df = df.fillna(value=0)
    
    # Monthly
    df['deferred_1M'] = df['deferred_1M_a']+df['deferred_1M_d']
    df.drop(labels=['deferred_1M_a', 'deferred_1M_d'], axis=1, inplace=True)
    
    # Annual
    df['deferred_1Y'] = df['deferred_1Y_a']+df['deferred_1Y_d']
    df.drop(labels=['deferred_1Y_a', 'deferred_1Y_d'], axis=1, inplace=True)
    
    # Two-Year
    df['deferred_2Y'] = df['deferred_2Y_a']+df['deferred_2Y_d']
    df.drop(labels=['deferred_2Y_a', 'deferred_2Y_d'], axis=1, inplace=True)
    
    # renaming 3Y, 3M and 6M
    df.rename(index=str, columns = {'deferred_3Y_a':'deferred_3Y', 'deferred_3M_d':'deferred_3M', 
                               'deferred_6M_d': 'deferred_6M'}, inplace=True)

    #cleaning up the longer column names
    df.rename(index=str, columns = {'Document Currency': 'curr',
                               'Enterprise Bu':'BU',
                               'Invoicing Fiscal Year-Period Desc':'period'}, inplace=True)
    
    return df

In [46]:
df = clean_df_columns(df)

In [47]:
df.sample(10)

Unnamed: 0,curr,BU,period,recognized,service,deferred_B,deferred_3Y,deferred_3M,deferred_6M,deferred_1M,deferred_1Y,deferred_2Y
2372,USD,Creative,2016-08,8875797.92,6000.0,1500.0,0.0,-51430.56,13125.0,70285480.0,50338780.0,123325.24
1217,GBP,Document Cloud,2015-09,904810.54,0.0,0.0,0.0,0.0,0.0,216788.6,884514.7,937.2
1215,GBP,Document Cloud,2015-07,781227.06,0.0,0.0,0.0,0.0,0.0,181649.7,406925.5,188.26
3026,EUR,LiveCycle,2019-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,112199.3,0.0
2282,SEK,Print & Publishing,2017-05,9300.0,0.0,0.0,0.0,0.0,0.0,31479.12,0.0,0.0
2494,USD,Document Cloud,2016-10,13208450.28,0.0,0.0,0.0,16375.0,20237.7,3114133.0,29773310.0,455454.0
1527,JPY,Creative,2016-02,79072590.0,0.0,0.0,0.0,0.0,0.0,733231800.0,598947200.0,2185260.0
499,CHF,LiveCycle,2016-04,1300.0,0.0,0.0,0.0,0.0,0.0,0.0,3148.75,0.0
1939,NOK,Experience Cloud,2016-09,260199.34,218078.5,45000.0,0.0,491640.0,0.0,0.0,800202.0,0.0
695,DKK,Experience Cloud,2018-04,43214.94,64725.0,48319.0,0.0,0.0,0.0,0.0,641135.0,0.0


In [48]:
# Make this a function to be cleaned up somehow
del dfr
del dfr_a
del dfr_b
del dfr_d
del gb_a
del gb_a_1M
del gb_a_1Y
del gb_a_2Y
del gb_a_3Y
del gb_b, 
del gb_d
del gb_svc, gb_rec, gb_d_two_yrs
del gb_d_four_mths, gb_d_qtrly, gb_d_semi_ann


# Now working on the ZCC billings

These billings are type D billings that did not populate the rebill_rule field of the database.

They have a 'sales document type' = 'ZCC"

The billings themselves are being created from a tableau report that looks for additions to the deferred revenue waterfall based on billings of type D and have a sales document type of ZCC

In [49]:
whos

Variable               Type         Data/Info
---------------------------------------------
a                      Index        Index(['USD', 'EUR', 'GBP<...>],\n      dtype='object')
clean_df_columns       function     <function clean_df_columns at 0x1091cf510>
config_list            list         n=4
df                     DataFrame         curr                <...>n[3116 rows x 12 columns]
gb_a_config            DataFrame      Product Configtype ID  <...>             3.328696e+06
gb_d_annual            DataFrame         Document Currency   <...>\n[1876 rows x 5 columns]
gb_d_mthly             DataFrame         Document Currency   <...>\n[1789 rows x 5 columns]
keep_curr              Series       USD    7791\nEUR    5291\<...>nt Currency, dtype: int64
keep_these             ndarray      23: 23 elems, type `bool`, 23 bytes
list_columns           list         n=12
list_df                list         n=12
merge_all_dataframes   function     <function merge_all_dataframes at 0x10772ec80>


In [51]:
df.sample(10)

Unnamed: 0,curr,BU,period,recognized,service,deferred_B,deferred_3Y,deferred_3M,deferred_6M,deferred_1M,deferred_1Y,deferred_2Y
1302,GBP,Experience Cloud,2016-08,1571506.0,886677.19,35394.0,0.0,307345.8,0.0,-8375.0,1118074.06,0.0
1745,JPY,LiveCycle,2018-04,25000.0,0.0,0.0,0.0,0.0,0.0,0.0,1124881.0,0.0
2720,USD,LiveCycle,2015-12,988034.1,0.0,0.0,0.0,0.0,0.0,0.0,2069136.37,0.0
335,CAD,LiveCycle,2017-09,640.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2202,SEK,LiveCycle,2016-04,8880.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2051,RUB,Creative,2018-08,110934.0,0.0,0.0,0.0,0.0,0.0,21843284.93,3031576.47,0.0
2645,USD,Experience Cloud,2017-10,8630287.0,7760830.25,3527341.85,0.0,5170365.72,816184.25,1760.0,16302855.38,2046.0
1648,JPY,Document Cloud,2018-01,363134800.0,0.0,0.0,0.0,0.0,0.0,13630929.0,54830666.0,0.0
1169,GBP,Creative,2015-08,232312.4,0.0,0.0,0.0,0.0,0.0,4592605.8,3896257.13,80904.98
3042,NOK,Print & Publishing,2017-01,0.0,0.0,0.0,0.0,0.0,0.0,13508.71,689.5,0.0


# TO BE DONE:

1. Clean up the type F billings (at least check to see if they are necessary)
2. Make a function to delete all intermediate dataframes
3. Add type A no config function
4. Add type D ZCC billings

5. Work on the forecast part of this

6. Load up FX rates

In [58]:
# Adobe financial calendar
df_cal = pd.read_excel('../data/old/ADOBE_FINANCIAL_CALENDAR.xlsx', 'ADBE_cal')

In [59]:
df_cal.head()

Unnamed: 0,Year,Quarter,Period,Qtr_Ticker,Qtr_Start,Qtr_End,Per_Ticker,Per_Start,Per_End
0,2010,1,1,Q1_10,2009-11-28,2010-03-05,P1_10,2009-11-28,2010-01-01
1,2010,1,2,Q1_10,2009-11-28,2010-03-05,P2_10,2010-01-02,2010-01-29
2,2010,1,3,Q1_10,2009-11-28,2010-03-05,P3_10,2010-01-30,2010-03-05
3,2010,2,4,Q2_10,2010-03-06,2010-06-04,P4_10,2010-03-06,2010-04-02
4,2010,2,5,Q2_10,2010-03-06,2010-06-04,P5_10,2010-04-03,2010-04-30


## ZCC Billings

In [54]:
df_ZCC = pd.read_excel('../data/old/type_D_ZCC_billings.xlsx', sheet_name='DC')

In [57]:
df_ZCC.sample(10)

Unnamed: 0,Document Currency,Enterprise BU Description,Rev Rec Category,Rule for Bill Date Code,Week of FICA Posting Date (YYYYMMDD) (copy),DF Additions - Doc Curr
3638,USD,Document Cloud,D,YQ,2019-01-27,16695.0
470,AUD,Experience Cloud,D,YQ,2016-12-25,4000.0
836,CAD,Experience Cloud,D,YA,2018-09-02,832667.06
4014,USD,Experience Cloud,D,YA,2016-03-13,1659373.05
1016,CHF,Experience Cloud,D,YA,2018-09-02,427350.0
1106,DKK,Experience Cloud,D,YA,2015-09-27,2183264.65
3923,USD,Experience Cloud,D,Y3,2015-09-13,46126.0
2377,GBP,Experience Cloud,D,YA,2018-12-30,784219.72
918,CAD,Print & Publishing,D,YA,2018-03-18,958.0
945,CHF,Experience Cloud,D,YA,2016-01-10,10200.0


### Checking that we do not have any currencies that need to be removed


In [60]:
df_ZCC['Document Currency'].value_counts()

USD    1295
EUR     844
AUD     649
GBP     616
JPY     503
CAD     283
DKK     166
SEK     165
CHF     104
NOK      71
SGD       1
Name: Document Currency, dtype: int64

In [61]:
model_dict

{'curr_removed': ['CLP', 'COP', 'ARS', 'PEN', 'INR', 'BMD', 'MXP']}

In [62]:
ZCC_curr = df_ZCC['Document Currency'].unique()

In [65]:
for item in ZCC_curr:
    if item in model_dict['curr_removed']:
        print('This currency needs to be removed: ', item)

There are no currencies that need to be removed

# Now we are clearing out nans

In [79]:
print(sum(df_ZCC['DF Additions - Doc Curr'].isna()))
ZCC_isna = df_ZCC['DF Additions - Doc Curr'].isna()
print("This is the length of the ZCC records before clearing NAs: ", len(df_ZCC))
df_ZCC = df_ZCC[~ZCC_isna]
print("This is the length of the ZCC records before clearing NAs: ", len(df_ZCC))

34
This is the length of the ZCC records before clearing NAs:  4697
This is the length of the ZCC records before clearing NAs:  4663


In [81]:
df_cal.sample(5)

Unnamed: 0,Year,Quarter,Period,Qtr_Ticker,Qtr_Start,Qtr_End,Per_Ticker,Per_Start,Per_End
180,2025,1,1,Q1_25,2024-11-30,2025-02-28,P1_25,2024-11-30,2024-12-27
243,2030,2,4,Q2_30,2030-03-02,2030-05-31,P4_30,2030-03-02,2030-03-29
93,2017,4,10,Q4_17,2017-09-02,2017-12-01,P10_17,2017-09-02,2017-09-29
97,2018,1,2,Q1_18,2017-12-02,2018-03-02,P2_18,2017-12-30,2018-01-26
173,2024,2,6,Q2_24,2024-03-02,2024-05-31,P6_24,2024-04-27,2024-05-31


In [82]:
df_ZCC.sample(10)

Unnamed: 0,Document Currency,Enterprise BU Description,Rev Rec Category,Rule for Bill Date Code,Week of FICA Posting Date (YYYYMMDD) (copy),DF Additions - Doc Curr
1154,DKK,Experience Cloud,D,YQ,2016-04-03,39920.0
640,AUD,Print & Publishing,D,YA,2018-08-12,11176.89
308,AUD,Experience Cloud,D,YA,2017-11-19,135408.06
4535,USD,Experience Cloud,D,YQ,2015-04-05,592752.13
3317,SEK,Experience Cloud,D,YH,2015-08-02,-68490.0
444,AUD,Experience Cloud,D,YH,2017-02-05,24999.9
2051,GBP,Creative,D,YA,2017-08-06,1200.0
3968,USD,Experience Cloud,D,YA,2015-09-06,607282.79
1335,EUR,Document Cloud,D,YA,2018-06-17,80578.93
2664,JPY,Creative,D,YA,2018-01-14,122200.0


In [107]:
xx = df_ZCC['Week of FICA Posting Date (YYYYMMDD) (copy)'].iloc[0]
xx

Timestamp('2017-07-16 00:00:00')

In [105]:
df_ZCC.head()

Unnamed: 0,Document Currency,Enterprise BU Description,Rev Rec Category,Rule for Bill Date Code,Week of FICA Posting Date (YYYYMMDD) (copy),DF Additions - Doc Curr
1,AUD,Creative,D,YA,2017-07-16,2070.0
2,AUD,Creative,D,YA,2017-05-28,4560.0
3,AUD,Creative,D,YA,2018-01-07,6657.92
4,AUD,Creative,D,YA,2017-12-31,8274.87
5,AUD,Creative,D,YA,2018-04-01,12164.31


In [132]:
def add_ZCC_periods(df_ZCC, df_cal):
    ''' 
    Takes each ZCC period billing and maps it to the fiscal calendar then assignes the
    ZCC billing to the dataframe df
    '''
    period_list = []
    
    for i in range(len(df_ZCC)):
        # this_date is the date we need mapped to a period
        this_date = df_ZCC['Week of FICA Posting Date (YYYYMMDD) (copy)'].iloc[i]
        
        #max index will be the index to the period in our financial calendar
        this_index = df_cal['Per_End']<=this_date
        max_index = sum(this_index)

        this_year = df_cal['Year'].iloc[max_index].astype(str)
        this_period= df_cal['Period'].iloc[max_index].astype(str)

        # formatting the period string from the calendar
        if len(this_period)== 1:
            this_period = '0'+this_period

        period_ticker = this_year + '-' + this_period

        period_list.append(period_ticker)

    df_ZCC['period'] = period_list
    return df_ZCC


In [133]:
df_ZCC.head()

Unnamed: 0,Document Currency,Enterprise BU Description,Rev Rec Category,Rule for Bill Date Code,Week of FICA Posting Date (YYYYMMDD) (copy),DF Additions - Doc Curr
1,AUD,Creative,D,YA,2017-07-16,2070.0
2,AUD,Creative,D,YA,2017-05-28,4560.0
3,AUD,Creative,D,YA,2018-01-07,6657.92
4,AUD,Creative,D,YA,2017-12-31,8274.87
5,AUD,Creative,D,YA,2018-04-01,12164.31


In [134]:
df_ZCC = add_ZCC_periods(df_ZCC, df_cal)

In [135]:
df_ZCC.tail()

Unnamed: 0,Document Currency,Enterprise BU Description,Rev Rec Category,Rule for Bill Date Code,Week of FICA Posting Date (YYYYMMDD) (copy),DF Additions - Doc Curr,period
4692,USD,Print & Publishing,D,YC,2019-02-10,4410.0,2019-03
4693,USD,Print & Publishing,D,YC,2018-12-16,8000.0,2019-01
4694,USD,Print & Publishing,D,YC,2018-12-30,14745.0,2019-02
4695,USD,Print & Publishing,D,YC,2019-02-24,17218.6,2019-03
4696,USD,Print & Publishing,D,YH,2018-12-30,3256.5,2019-02


## Now we need to determine the length of the ZCC billings to and add this to our dataframe

In [139]:
df.columns

Index(['curr', 'BU', 'period', 'recognized', 'service', 'deferred_B',
       'deferred_3Y', 'deferred_3M', 'deferred_6M', 'deferred_1M',
       'deferred_1Y', 'deferred_2Y'],
      dtype='object')

In [140]:
df_ZCC.columns

Index(['Document Currency', 'Enterprise BU Description', 'Rev Rec Category',
       'Rule for Bill Date Code',
       'Week of FICA Posting Date (YYYYMMDD) (copy)',
       'DF Additions - Doc Curr', 'period'],
      dtype='object')

In [145]:
gb_ZCC = df_ZCC.groupby(['Document Currency', 'Enterprise BU Description', 'period',
                     'Rule for Bill Date Code'], as_index=False).sum()
#gb_d.drop(labels='Subscription Term', axis=1, inplace = True)

In [147]:
gb_ZCC.sample(20)

Unnamed: 0,Document Currency,Enterprise BU Description,period,Rule for Bill Date Code,DF Additions - Doc Curr
614,EUR,Experience Cloud,2015-04,Y1,32993.8
1805,USD,Experience Cloud,2019-03,YH,1513101.73
1438,USD,Creative,2017-09,YA,665194.38
1789,USD,Experience Cloud,2019-01,Y3,3886.0
1587,USD,Experience Cloud,2016-04,Y1,3360430.99
1090,JPY,Creative,2018-10,YA,60891830.0
1741,USD,Experience Cloud,2018-05,Y1,2432045.71
646,EUR,Experience Cloud,2015-12,YA,418800.53
1796,USD,Experience Cloud,2019-02,Y3,13886.0
439,DKK,Experience Cloud,2015-06,YQ,45879.51


In [149]:
gb_ZCC['Rule for Bill Date Code'].value_counts()

YA    703
YQ    405
YH    230
Y1    222
YC    184
Y3     67
Y4     11
YX      2
Name: Rule for Bill Date Code, dtype: int64

In [153]:
gb_ZCC_mthly = gb_ZCC[gb_ZCC['Rule for Bill Date Code'].isin(['Y1', 'Y2', 'Y3', 'Y5'])]
gb_ZCC_qtrly = gb_ZCC[gb_ZCC['Rule for Bill Date Code']=='YQ']
gb_ZCC_four_mths = gb_ZCC[gb_ZCC['Rule for Bill Date Code']=='YT']
gb_ZCC_semi_ann = gb_ZCC[gb_ZCC['Rule for Bill Date Code']=='YH']
gb_ZCC_annual = gb_ZCC[gb_ZCC['Rule for Bill Date Code'].isin(['YA', 'YC'])]
gb_ZCC_two_yrs = gb_ZCC[gb_ZCC['Rule for Bill Date Code']=='Y4']

In [156]:
len(gb_ZCC_four_mths)

0

In [152]:
what_is_YX = gb_ZCC[gb_ZCC['Rule for Bill Date Code']=='YX']
what_is_YX.head(10)

Unnamed: 0,Document Currency,Enterprise BU Description,period,Rule for Bill Date Code,DF Additions - Doc Curr
590,EUR,Document Cloud,2017-12,YX,0.0
1118,JPY,Document Cloud,2018-11,YX,1522800.0


In [93]:
df_cal.sample(5)

Unnamed: 0,Year,Quarter,Period,Qtr_Ticker,Qtr_Start,Qtr_End,Per_Ticker,Per_Start,Per_End
205,2027,1,2,Q1_27,2026-11-28,2027-03-05,P2_27,2027-01-02,2027-01-29
177,2024,4,10,Q4_24,2024-08-31,2024-11-29,P10_24,2024-08-31,2024-09-27
203,2026,4,12,Q4_26,2026-08-29,2026-11-27,P12_26,2026-10-24,2026-11-27
29,2012,2,6,Q2_12,2012-03-03,2012-06-01,P6_12,2012-04-28,2012-06-01
117,2019,4,10,Q4_19,2019-08-31,2019-11-29,P10_19,2019-08-31,2019-09-27


In [95]:
whos

Variable               Type         Data/Info
---------------------------------------------
ZCC_curr               ndarray      11: 11 elems, type `object`, 88 bytes
ZCC_isna               Series       0        True\n1       Fa<...>Length: 4697, dtype: bool
a                      DataFrame         Document Currency En<...>\n[4663 rows x 6 columns]
clean_df_columns       function     <function clean_df_columns at 0x1091cf510>
config_list            list         n=4
df                     DataFrame         curr                <...>n[3116 rows x 12 columns]
df_ZCC                 DataFrame         Document Currency En<...>\n[4663 rows x 6 columns]
df_cal                 DataFrame         Year  Quarter  Perio<...>n\n[252 rows x 9 columns]
gb_a_config            DataFrame      Product Configtype ID  <...>             3.328696e+06
gb_d_annual            DataFrame         Document Currency   <...>\n[1876 rows x 5 columns]
gb_d_mthly             DataFrame         Document Currency   <...>\n[17