# Testing Deferred Revenue in Python
Will this be easier for everyone to use than Matlab?

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Step 1: Processing Base Billings Data

In [2]:
df = pd.read_excel('../data/old/base_billings.xlsx', sheet_name='bill_DC')

In [3]:
df.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
14809,GBP,Document Cloud,,2018-11,,A,,DEFERRED,1,-1073.97
6473,CHF,Print & Publishing,,2019-03,1Y,D,,DEFERRED,1,49.5
14223,GBP,Document Cloud,,2016-02,1Y,A,,DEFERRED,1,53.7
23521,SEK,Print & Publishing,,2015-12,1Y,D,Y3,DEFERRED,1,26883.02
15095,GBP,Experience Cloud,1TME,2015-11,OCONS,,,PRO-SVC-INV,0,711479.8
8012,EUR,Creative,,2015-08,1Y,A,,DEFERRED,1,-40204.36
17486,JPY,Creative,,2015-10,1Y,A,,DEFERRED,1,-845721.0
1711,AUD,Experience Cloud,,2015-09,,,,RECOGNIZED,0,15189.0
19002,JPY,Document Cloud,,2019-02,,D,Y3,DEFERRED,1,1084263.0
21929,NZD,Experience Cloud,ACTL,2016-07,ONORE,,,RECOGNIZED,0,442456.09


## Filter that removes any currency that has  < 10 transactions. 


In [4]:
# creates a list of the currencies and the number of transactions for each currency
vc = df['Document Currency'].value_counts()

In [5]:
# Create variable that is true if the number of transaction is greater than 10, false otherwise
keep_these = vc.values > 10
# filtering only currencies that were greater than 10
keep_curr = vc[keep_these]
a = keep_curr.index
# filtering the dataframe to remove any of teh currencies not in our list
df = df[df['Document Currency'].isin(a)]

## Just keeping track of the currencies we removed in our model_dict data strucutre

In [6]:
remove_these = vc.values <= 10
model_dict = {'curr_removed': list(vc[remove_these].index)}
model_dict

{'curr_removed': ['CLP', 'COP', 'ARS', 'PEN', 'INR', 'BMD', 'MXP']}

# Removing any of the values that are zero

In [7]:
print('This is the length of the dataframe before removing zeros: ', len(df))
df = df[df['Completed Sales Doc Currency']!=0]
print('This is the length of the dataframe after removing zeros: ', len(df))

This is the length of the dataframe before removing zeros:  31418
This is the length of the dataframe after removing zeros:  28896


In [8]:
df.sample(20)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
13538,GBP,Creative,,2016-09,1Y,D,Y3,DEFERRED,1,8352551.0
16627,GBP,Print & Publishing,,2015-11,1Y,D,YA,DEFERRED,12,59.16
10220,EUR,Experience Cloud,ACTL,2015-10,ONORE,,,RECOGNIZED,0,666357.2
6233,CHF,Experience Cloud,CERT,2019-03,1Y,D,YA,DEFERRED,12,16554.44
30938,USD,Print & Publishing,,2017-05,,A,,DEFERRED,0,4572865.0
15328,GBP,Experience Cloud,1TME,2016-06,,,,NON-REV,0,5915.0
25970,USD,Document Cloud,,2016-11,1Y,A,,DEFERRED,12,6608494.0
18655,JPY,Document Cloud,,2017-05,,,,RECOGNIZED,0,270841500.0
25942,USD,Document Cloud,,2016-10,,D,Y3,DEFERRED,1,459918.8
201,AUD,Creative,,2016-04,,,,RECOGNIZED,0,249326.4


## Clearing out the Non-Revenue billings from the file
 - No Idea what these are

In [9]:
df['Sales Type'].value_counts()

DEFERRED       22919
RECOGNIZED      5041
PRO-SVC-INV      844
NON-REV           92
Name: Sales Type, dtype: int64

In [10]:
print('Length of the dataframe before removing non-revenue billings: ', len(df))
df = df[df['Sales Type']!='NON-REV']
print('Length of the dataframe after removing non-revenue billings:  ', len(df))


Length of the dataframe before removing non-revenue billings:  28896
Length of the dataframe after removing non-revenue billings:   28804


## Starting to group the revenue by period, industry, etc

Attempting to group by the following categories
 - currency
 - period
 - sale type
 
May need to process the data differently with the deferred billings so we will start with the recognized and then the service billings

# Hierarchy of the data dictionary
 - BU
 - Rev type
 - currency
  -- then vectors for currency
  
bill_hist['BU']['Deferred']['EUR']

In [12]:
# First split the data into three dataframes
# Recognized billings
rec = df[df['Sales Type']=='RECOGNIZED']
svc = df[df['Sales Type']=='PRO-SVC-INV']
dfr = df[df['Sales Type']=='DEFERRED']

In [11]:
df.columns

Index(['Document Currency', 'Enterprise Bu', 'Frequency',
       'Invoicing Fiscal Year-Period Desc', 'Product Configtype ID',
       'Revenue Recognition Category New', 'Rule For Bill Date', 'Sales Type',
       'Subscription Term', 'Completed Sales Doc Currency'],
      dtype='object')

### Recognized Revenue

In [14]:
# testing a nested dictionary


In [17]:
this_EUR = np.arange(0,30,1)
this_GBP = np.arange(31,61,1)


array([31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60])

In [18]:
this_rev_dict = {'EUR':this_EUR, 'GBP':this_GBP}

In [19]:
this_rev_dict

{'EUR': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]),
 'GBP': array([31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
        48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60])}

In [20]:
this_BU_dict = {'recognized':this_rev_dict}

In [21]:
this_BU_dict

{'recognized': {'EUR': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
         17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]),
  'GBP': array([31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60])}}

In [25]:
this_BU_dict['recognized']['GBP']

array([31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60])

# NOW WORKING ON THE REVENUE

In [27]:
rec.sample(10)

Unnamed: 0,Document Currency,Enterprise Bu,Frequency,Invoicing Fiscal Year-Period Desc,Product Configtype ID,Revenue Recognition Category New,Rule For Bill Date,Sales Type,Subscription Term,Completed Sales Doc Currency
31122,USD,Print & Publishing,,2018-02,,,,RECOGNIZED,0,2609817.89
13678,GBP,Creative,,2017-06,,,,RECOGNIZED,0,326766.87
30439,USD,Other Solutions,,2018-09,,,,RECOGNIZED,0,-14669.87
19965,JPY,Experience Cloud,ACTL,2019-01,ONORE,,,RECOGNIZED,0,14733753.0
23049,SEK,Document Cloud,,2019-02,,,,RECOGNIZED,0,27466.0
8449,EUR,Creative,,2017-01,2V,,,RECOGNIZED,0,14595.0
16358,GBP,Experience Cloud,ACTL,2019-02,ONORE,,,RECOGNIZED,0,33652.01
16998,GBP,Print & Publishing,,2018-05,,,,RECOGNIZED,0,68129.02
12670,EUR,Print & Publishing,,2016-04,IDRT,,,RECOGNIZED,0,3200.0
26594,USD,Document Cloud,,2018-11,,,,RECOGNIZED,0,12514522.46


In [30]:
# testing groupby object
gb_rec = rec.groupby(['Document Currency', 'Enterprise Bu', 'Invoicing Fiscal Year-Period Desc'], as_index=False).sum()

In [32]:
gb_rec

Unnamed: 0,Document Currency,Enterprise Bu,Invoicing Fiscal Year-Period Desc,Subscription Term,Completed Sales Doc Currency
0,AUD,Creative,2015-06,0,490156.86
1,AUD,Creative,2015-07,0,235045.62
2,AUD,Creative,2015-08,0,284334.70
3,AUD,Creative,2015-09,0,369648.88
4,AUD,Creative,2015-10,0,287292.48
5,AUD,Creative,2015-11,0,361723.22
6,AUD,Creative,2015-12,0,276178.28
7,AUD,Creative,2016-01,0,270306.21
8,AUD,Creative,2016-02,0,116996.45
9,AUD,Creative,2016-03,0,235382.84


In [33]:
curr_list = gb_rec['Document Currency'].unique()
curr_list

array(['AUD', 'BRL', 'CAD', 'CHF', 'DKK', 'EUR', 'GBP', 'HKD', 'JPY',
       'NOK', 'NZD', 'RUB', 'SEK', 'SGD', 'USD'], dtype=object)

In [35]:
bu_list = gb_rec['Enterprise Bu'].unique()
bu_list

array(['Creative', 'Document Cloud', 'Experience Cloud', 'LiveCycle',
       'Other Solutions', 'Print & Publishing'], dtype=object)

In [45]:
a = gb_rec[(gb_rec['Document Currency']==curr)&(gb_rec['Enterprise Bu']==bu)]['Completed Sales Doc Currency']
bill_hist[bu]['rec'][curr].values

NameError: name 'bill_hist' is not defined

In [44]:
a.values

array([18579453.89,  2950549.57,  3176253.61, 13576918.15,  3699382.57,
        3812063.18, 12312241.49,  3659986.17,  3153950.82, 10286857.24,
        2808624.21,  4170901.12,  9115649.14,  4888457.11,  2838795.62,
       10723658.59,  3086688.47,  2735258.97,  8972742.09,  1864827.5 ,
        2837831.27,  8991970.65,  2310718.62,  2148166.88,  7399186.83,
        2490961.22,  3132712.19, 16542433.43,  1628221.84,  2707930.7 ,
       14141201.72,  1369961.41,  2663330.14, 13834373.14,  2098586.73,
        1969150.34, 11511114.38,   882846.23,  1933801.71, 15691543.23,
        3213718.08,  8812386.51,  5894484.54,  1837388.61,  3201493.47,
       15516091.68])

In [75]:
def build_recognized_dict(gb_rec):
    ''' 
    This function takes a dataframe of billings that is grouped by BU and Currency and returns a dictionary
    containing the currency and BU
    
    to access a particular billings history for one of the BUs in a currency type
    my_variable = rec_dict['Creative']['EUR']
    
    '''
    rec_dict = {}
    
    for bu in bu_list:
        
        curr_dict = {}
        for curr in curr_list:
            
            these_billings = gb_rec[(gb_rec['Document Currency']==curr)&(gb_rec['Enterprise Bu']==bu)]['Completed Sales Doc Currency']
            curr_dict[curr] = these_billings.values

        rec_dict[bu] = curr_dict

        return rec_dict
        


In [76]:
rec_dict = build_recognized_dict(gb_rec)

In [78]:
# example of how to access part of the revenue dictionary
rec_dict


{'Creative': {'AUD': array([490156.86, 235045.62, 284334.7 , 369648.88, 287292.48, 361723.22,
         276178.28, 270306.21, 116996.45, 235382.84, 249819.19, 213088.53,
         246758.37, 202289.36, 151958.31, 343043.28, 234844.56, 285379.58,
         284149.11, 152712.79, 279610.83, 255250.91, 207815.06, 119076.6 ,
         372395.91, 219784.74, 136292.71, 391741.71, 233924.24, 243945.01,
         269951.02, 123361.66, 307901.48, 257843.68, 203222.5 , 194062.07,
         207143.04, 195203.02, 203324.92, 207983.07, 176830.19, 108316.18,
         278448.03, 110846.98, 216462.6 , 164031.92]),
  'BRL': array([12905., 25407., 22226., 24020., 12711., 36690.,  2494., 13270.,
         32219.]),
  'CAD': array([310.4 , 508.4 , 406.72]),
  'CHF': array([188241.07, 114278.44,  80119.09, 114220.5 ,  87940.5 , 116953.3 ,
          85089.4 , 124785.93,  64209.52, 100903.8 ,  55254.56,  46202.  ,
          62433.  ,  35538.  ,  39605.  ,  51104.  ,  44254.  ,  55952.  ,
          87814.  ,  61753. 

In [38]:
bu

'Print & Publishing'

In [53]:
bill_hist

array([18579453.89,  2950549.57,  3176253.61, 13576918.15,  3699382.57,
        3812063.18, 12312241.49,  3659986.17,  3153950.82, 10286857.24,
        2808624.21,  4170901.12,  9115649.14,  4888457.11,  2838795.62,
       10723658.59,  3086688.47,  2735258.97,  8972742.09,  1864827.5 ,
        2837831.27,  8991970.65,  2310718.62,  2148166.88,  7399186.83,
        2490961.22,  3132712.19, 16542433.43,  1628221.84,  2707930.7 ,
       14141201.72,  1369961.41,  2663330.14, 13834373.14,  2098586.73,
        1969150.34, 11511114.38,   882846.23,  1933801.71, 15691543.23,
        3213718.08,  8812386.51,  5894484.54,  1837388.61,  3201493.47,
       15516091.68])

In [52]:
a.values

array([18579453.89,  2950549.57,  3176253.61, 13576918.15,  3699382.57,
        3812063.18, 12312241.49,  3659986.17,  3153950.82, 10286857.24,
        2808624.21,  4170901.12,  9115649.14,  4888457.11,  2838795.62,
       10723658.59,  3086688.47,  2735258.97,  8972742.09,  1864827.5 ,
        2837831.27,  8991970.65,  2310718.62,  2148166.88,  7399186.83,
        2490961.22,  3132712.19, 16542433.43,  1628221.84,  2707930.7 ,
       14141201.72,  1369961.41,  2663330.14, 13834373.14,  2098586.73,
        1969150.34, 11511114.38,   882846.23,  1933801.71, 15691543.23,
        3213718.08,  8812386.51,  5894484.54,  1837388.61,  3201493.47,
       15516091.68])

In [58]:
list_a = ['a', 'b', 'c']
list_b = ['XXX', 'YYY', 'ZZZ']

test_dict = {}
for a in list_a:
    for b in list_b:
        print(a, b)
        test_dict[a]=b

a XXX
a YYY
a ZZZ
b XXX
b YYY
b ZZZ
c XXX
c YYY
c ZZZ


In [59]:
test_dict

{'a': 'ZZZ', 'b': 'ZZZ', 'c': 'ZZZ'}

In [57]:
print(type(test_dict))

<class 'dict'>
