# Loading up the Accounting Deferred Workbook

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

import pickle

### Loading the historical deferred balances by BU and Major Product Config

##### NOTE: The historical data includes Marketo and Magento (whereas these are not included on the other sheets

Need to make the following changes
 - 'External Reporting BU' needs to be billed forward
 - Totals need to be eliminated
 - delete unnecessary rows
 
 ###### Note: There are excess rows in the dataframe for digital media subscriptions that are hidden. Need to remove
  - There are leading zeros in the BUs and MPCs
  

# FOR TODAY 6/8/20 Skip the whole deferred trend. Just get to the forecast

In [2]:
#df = pd.read_excel("../data/Data_2020_P06/Q2'20 Rev Acctg Mgmt Workbook (06-04-20).xlsx", 
#                   sheet_name='Deferred Revenue Trend', skiprows=5)

In [3]:
def clean_DR_history(df):
    
    df = df.rename(columns={'External Reporting BU': 'BU',
                       'Major Product Config': 'MPC',
                       'Q/Q change': 'q_over_q',
                       'Y/Y change': 'y_over_y'})
    df = df.drop(columns = ['Unnamed: 13', 'Unnamed: 14'])
    
    df['BU'] = df['BU'].fillna(method='ffill')
    
    df['BU'] = df['BU'].str.strip()
    df['MPC'] = df['MPC'].str.strip()

    df = df.dropna(subset=['MPC'], axis=0)
    df = df.drop_duplicates(subset=['BU','MPC'], keep='first')
    df = df.set_index(['BU', 'MPC'])
    
    old_columns = df.columns
    new_columns = []
    for col in old_columns:
        new_columns.append(col.replace("'", '_'))
        
    columns_dict = dict(zip(old_columns, new_columns))
    df.rename(columns = columns_dict, inplace=True)
    
    return df

In [None]:
#df_hist = clean_DR_history(df)

In [None]:
#df_hist.head(40)

### Loading the Deferred Revenue Forecast Sheet

#### Note: For today, simply take the totals by each BU by quarter and move on!

To be done
 - clear out rows below the Grand Total inclusive of Magento/Marketo
 - forward fill the External Reporting BU
 - Move Marketo and Magento BU to Digital Experience
 - Aggregate this by External reporting BU
 - rename columns without that '
 - create interpolated periods here for the amortization (assume amortization to revenue is linear within the periods of a quarter
 - Save and move on
 
##### Note: There are a lot of rows at the bottom that need to be cleaned up

In [4]:
df = pd.read_excel("../data/Data_2020_P06/Q2'20 Rev Acctg Mgmt Workbook (06-04-20).xlsx", 
                   sheet_name='Deferred Revenue Forecast', skiprows=5)

In [5]:
df.head(50)

Unnamed: 0,External Reporting BU,Major Product Config,Historical,Q3'20,Q4'20,Q1'21,Q2'21,Q3'21,Q4'21,Q1'22,...,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50
0,Digital Media,Consulting,0,1022.47,748.575,29.777,3.149,0,0,0,...,,,,,,,,,,
1,,Educational,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2,,Full,0,-0.278,0,0,0,0,0,0,...,,,,,,,,,,
3,,Licensing,0,0,0,0,0,0,0,0,...,,,,,,,,,,
4,,Licensing M&S,0,283.978,266.207,47.156,21.977,5.002,2.623,0,...,,,,,,,,,,
5,,Other,0,-26634.5,0,0,0,0,0,0,...,,,,,,,,,,
6,,Royalty,0,418.614,233.746,454.925,57.057,35.761,34.685,3.114,...,,,,,,,,,,
7,,Royalty M&S,0,1028.3,827.391,690.155,475.079,29.679,0,0,...,,,,,,,,,,
8,,Software as Service,0,44835,31698.9,20254.3,6908.51,185.827,41.15,31.458,...,,,,,,,,,,
9,,Subscription,0,1.0296e+06,622760,361533,112857,11448,8117.2,5325.55,...,,,,,,,,,,


In [7]:
df.columns

Index(['External Reporting BU', 'Major Product Config', ' Historical', 'Q3'20',
       'Q4'20', 'Q1'21', 'Q2'21', 'Q3'21', 'Q4'21', 'Q1'22', 'Q2'22', 'Q3'22+',
       'As Performed / Upon Acceptance', 'Total', 'Unnamed: 14', 'Unnamed: 15',
       'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19',
       'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23',
       'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27',
       'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31',
       'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35',
       'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39',
       'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 43',
       'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47',
       'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50'],
      dtype='object')

##### Stripping spaces in the External Reporting BU columns

In [8]:
df['External Reporting BU'] = df['External Reporting BU'].str.strip()

##### Clearing out the rows below that we do not need

In [9]:
end_loc = df[df['External Reporting BU']=='Grand Total inclusive of Magento/Marketo']
end_index = end_loc.index[0]

df = df[df.index <= end_index]

In [10]:
df['External Reporting BU'].value_counts()

Digital Media                               1
Grand Total inclusive of Magento/Marketo    1
Marketo Deferred                            1
Publishing Total                            1
Magento Deferred                            1
Publishing                                  1
Digital Media Total                         1
Digital Experience Total                    1
Grand Total                                 1
Digital Experience                          1
Name: External Reporting BU, dtype: int64

### We are just taking the following rows
- Digital Media Total
- Publishing Total
- Digital Experience Total
- Marketo Deferred
- Magento Deferred

Then we need to add the Marketo and Magento defered to the digital experience total


In [11]:
keeper_rows = ['Digital Media Total',
              'Publishing Total',
              'Digital Experience Total',
              'Marketo Deferred',
              'Magento Deferred',
              'Grand Total inclusive of Magento/Marketo']

df_test = df[df['External Reporting BU'].isin(keeper_rows)]
df_test.head(10)

Unnamed: 0,External Reporting BU,Major Product Config,Historical,Q3'20,Q4'20,Q1'21,Q2'21,Q3'21,Q4'21,Q1'22,...,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50
13,Digital Media Total,,0.0,1051900.0,657534.0,383776.0,120701.0,11924.1,8320.0,5423.13,...,,,,,,,,,,
23,Digital Experience Total,,-120.0,389900.0,240669.0,138374.0,38697.4,15424.2,10513.4,3010.43,...,,,,,,,,,,
36,Publishing Total,,0.0,30997.3,19928.3,11714.3,5337.42,3280.49,807.988,347.025,...,,,,,,,,,,
38,Magento Deferred,,,32808.4,21486.5,12543.9,5166.5,1310.1,926.829,476.411,...,,,,,,,,,,
39,Marketo Deferred,,,76774.6,43148.6,22904.5,7524.18,0.0,0.0,0.0,...,,,,,,,,,,
40,Grand Total inclusive of Magento/Marketo,,-120.0,1582380.0,982766.0,569312.0,177427.0,31938.9,20568.2,9256.99,...,,,,,,,,,,


# Cleaning out bad columns

In [12]:
df_test.columns

Index(['External Reporting BU', 'Major Product Config', ' Historical', 'Q3'20',
       'Q4'20', 'Q1'21', 'Q2'21', 'Q3'21', 'Q4'21', 'Q1'22', 'Q2'22', 'Q3'22+',
       'As Performed / Upon Acceptance', 'Total', 'Unnamed: 14', 'Unnamed: 15',
       'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19',
       'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23',
       'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27',
       'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31',
       'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35',
       'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39',
       'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 43',
       'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47',
       'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50'],
      dtype='object')

In [13]:
df_test = df_test.loc[:, ~df_test.columns.str.contains('^Unnamed')]
df_test = df_test.drop(columns= ['Major Product Config', ' Historical'])

In [14]:
df_test.head(10)

Unnamed: 0,External Reporting BU,Q3'20,Q4'20,Q1'21,Q2'21,Q3'21,Q4'21,Q1'22,Q2'22,Q3'22+,As Performed / Upon Acceptance,Total
13,Digital Media Total,1051900.0,657534.0,383776.0,120701.0,11924.1,8320.0,5423.13,3864.81,64924.4,5601.75,2313970.0
23,Digital Experience Total,389900.0,240669.0,138374.0,38697.4,15424.2,10513.4,3010.43,1871.82,4073.25,730.422,843264.0
36,Publishing Total,30997.3,19928.3,11714.3,5337.42,3280.49,807.988,347.025,133.832,137.54,0.0,72684.2
38,Magento Deferred,32808.4,21486.5,12543.9,5166.5,1310.1,926.829,476.411,596.116,943.308,183.437,76441.5
39,Marketo Deferred,76774.6,43148.6,22904.5,7524.18,0.0,0.0,0.0,0.0,0.0,4158.31,154510.0
40,Grand Total inclusive of Magento/Marketo,1582380.0,982766.0,569312.0,177427.0,31938.9,20568.2,9256.99,6466.58,70078.5,10673.9,3460870.0


## Add Magento and Marketo to Digital Experience

##### NOTE: The External Reporting BU is different the the BU we have in deferred.
We will have to combine Creative and Document Cloud to get to the External Reporting BU since both show up as Digital Media in this accounting workbook


In [15]:
df_test['External Reporting BU'] = df_test['External Reporting BU'].str.replace('Magento Deferred', 
                                                                               'Digital Experience Total')

In [16]:
df_test['External Reporting BU'] = df_test['External Reporting BU'].str.replace('Marketo Deferred', 
                                                                               'Digital Experience Total')

In [1]:
# Was this repeated as a mistake?
#df_test['External Reporting BU'] = df_test['External Reporting BU'].str.replace('Marketo Deferred', 
#                                                                               'Digital Experience Total')

In [18]:
changed_columns = df_test.columns.str.replace("'", '_')
changed_columns = changed_columns.str.replace('+', "")

In [19]:
df_test.columns = changed_columns

In [20]:
# start by renaming the columns

In [21]:
df_test.columns

Index(['External Reporting BU', 'Q3_20', 'Q4_20', 'Q1_21', 'Q2_21', 'Q3_21',
       'Q4_21', 'Q1_22', 'Q2_22', 'Q3_22', 'As Performed / Upon Acceptance',
       'Total'],
      dtype='object')

In [22]:
df_test

Unnamed: 0,External Reporting BU,Q3_20,Q4_20,Q1_21,Q2_21,Q3_21,Q4_21,Q1_22,Q2_22,Q3_22,As Performed / Upon Acceptance,Total
13,Digital Media,1051900.0,657534.0,383776.0,120701.0,11924.1,8320.0,5423.13,3864.81,64924.4,5601.75,2313970.0
23,Digital Experience,389900.0,240669.0,138374.0,38697.4,15424.2,10513.4,3010.43,1871.82,4073.25,730.422,843264.0
36,Publishing,30997.3,19928.3,11714.3,5337.42,3280.49,807.988,347.025,133.832,137.54,0.0,72684.2
38,Digital Experience,32808.4,21486.5,12543.9,5166.5,1310.1,926.829,476.411,596.116,943.308,183.437,76441.5
39,Digital Experience,76774.6,43148.6,22904.5,7524.18,0.0,0.0,0.0,0.0,0.0,4158.31,154510.0
40,Grand inclusive of Magento/Marketo,1582380.0,982766.0,569312.0,177427.0,31938.9,20568.2,9256.99,6466.58,70078.5,10673.9,3460870.0


In [23]:
df_test_gb = df_test.groupby('External Reporting BU').sum()

In [24]:
df_test_gb

Unnamed: 0_level_0,Q3_20,Q4_20,Q1_21,Q2_21,Q3_21,Q4_21,Q1_22,Q2_22,Q3_22,As Performed / Upon Acceptance,Total
External Reporting BU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Digital Experience,499483.4,305303.73931,173822.55227,51388.09531,16734.29407,11440.21483,3486.83955,2467.93488,5016.553322,5072.17166,1074216.0
Digital Media,1051898.0,657534.011,383775.583,120701.449,11924.113,8319.999,5423.127,3864.809,64924.39,5601.745,2313967.0
Grand inclusive of Magento/Marketo,1582378.0,982766.07931,569312.46127,177426.96231,31938.89607,20568.20183,9256.99155,6466.57588,70078.483322,10673.91666,3460867.0
Publishing,30997.28,19928.329,11714.326,5337.418,3280.489,807.988,347.025,133.832,137.54,0.0,72684.23


### Now that we have the data that is all numeric, we need to adjust for the reporting in thousands (FP&A report)

In [25]:
df_test_gb = df_test_gb * 1000

In [26]:
df_test_gb

Unnamed: 0_level_0,Q3_20,Q4_20,Q1_21,Q2_21,Q3_21,Q4_21,Q1_22,Q2_22,Q3_22,As Performed / Upon Acceptance,Total
External Reporting BU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Digital Experience,499483400.0,305303700.0,173822600.0,51388100.0,16734294.07,11440214.83,3486839.55,2467934.88,5016553.0,5072172.0,1074216000.0
Digital Media,1051898000.0,657534000.0,383775600.0,120701400.0,11924113.0,8319999.0,5423127.0,3864809.0,64924390.0,5601745.0,2313967000.0
Grand inclusive of Magento/Marketo,1582378000.0,982766100.0,569312500.0,177427000.0,31938896.07,20568201.83,9256991.55,6466575.88,70078480.0,10673920.0,3460867000.0
Publishing,30997280.0,19928330.0,11714330.0,5337418.0,3280489.0,807988.0,347025.0,133832.0,137540.0,0.0,72684230.0


### Creating the columns that have this amortization by period

# Below here is the first mistake - need to create a P00 column and then delete it! Such that the amortization does not include the actual period that is in there

In [29]:
new_columns = []
for i in range(12*3):
    if len(str(i))==1:
        new_column = 'P0'+ str(i)
    else:
        new_column = 'P'+ str(i)
    new_columns.append(new_column)

In [30]:
new_columns

['P00',
 'P01',
 'P02',
 'P03',
 'P04',
 'P05',
 'P06',
 'P07',
 'P08',
 'P09',
 'P10',
 'P11',
 'P12',
 'P13',
 'P14',
 'P15',
 'P16',
 'P17',
 'P18',
 'P19',
 'P20',
 'P21',
 'P22',
 'P23',
 'P24',
 'P25',
 'P26',
 'P27',
 'P28',
 'P29',
 'P30',
 'P31',
 'P32',
 'P33',
 'P34',
 'P35']

In [31]:
qtrly_list = [col for col in df_test_gb.columns if 'Q' in col]
qtrly_list

['Q3_20',
 'Q4_20',
 'Q1_21',
 'Q2_21',
 'Q3_21',
 'Q4_21',
 'Q1_22',
 'Q2_22',
 'Q3_22']

In [32]:
period_index = 0
for index, qtr in enumerate(qtrly_list):
    
    df_test_gb[new_columns[period_index]]=df_test_gb[qtr] / 3
    period_index +=1
    df_test_gb[new_columns[period_index]]=df_test_gb[qtr] / 3
    period_index +=1
    df_test_gb[new_columns[period_index]]=df_test_gb[qtr] / 3
    period_index +=1


In [33]:
df_test_gb

Unnamed: 0_level_0,Q3_20,Q4_20,Q1_21,Q2_21,Q3_21,Q4_21,Q1_22,Q2_22,Q3_22,As Performed / Upon Acceptance,...,P17,P18,P19,P20,P21,P22,P23,P24,P25,P26
External Reporting BU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Digital Experience,499483400.0,305303700.0,173822600.0,51388100.0,16734294.07,11440214.83,3486839.55,2467934.88,5016553.0,5072172.0,...,3813405.0,1162279.85,1162279.85,1162279.85,822645.0,822645.0,822645.0,1672184.0,1672184.0,1672184.0
Digital Media,1051898000.0,657534000.0,383775600.0,120701400.0,11924113.0,8319999.0,5423127.0,3864809.0,64924390.0,5601745.0,...,2773333.0,1807709.0,1807709.0,1807709.0,1288270.0,1288270.0,1288270.0,21641460.0,21641460.0,21641460.0
Grand inclusive of Magento/Marketo,1582378000.0,982766100.0,569312500.0,177427000.0,31938896.07,20568201.83,9256991.55,6466575.88,70078480.0,10673920.0,...,6856067.0,3085663.85,3085663.85,3085663.85,2155525.0,2155525.0,2155525.0,23359490.0,23359490.0,23359490.0
Publishing,30997280.0,19928330.0,11714330.0,5337418.0,3280489.0,807988.0,347025.0,133832.0,137540.0,0.0,...,269329.3,115675.0,115675.0,115675.0,44610.67,44610.67,44610.67,45846.67,45846.67,45846.67


## I Don't need the everything in this. I can now remove some of the details

First check that my periods match the quarterly deferred numbers/


In [34]:
df_qtrly_only = df_test_gb.copy()
df_period_only = df_test_gb.copy()

df_period_only = df_period_only.loc[:, df_period_only.columns.str.contains('P')]
df_qtrly_only = df_qtrly_only.loc[:, ~df_qtrly_only.columns.str.contains('P')]
                                  
df_period_only['total'] = df_period_only.sum(axis=1)
                            

In [35]:
df_period_only

Unnamed: 0_level_0,As Performed / Upon Acceptance,P00,P01,P02,P03,P04,P05,P06,P07,P08,...,P18,P19,P20,P21,P22,P23,P24,P25,P26,total
External Reporting BU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Digital Experience,5072172.0,166494500.0,166494500.0,166494500.0,101767900.0,101767900.0,101767900.0,57940850.0,57940850.0,57940850.0,...,1162279.85,1162279.85,1162279.85,822645.0,822645.0,822645.0,1672184.0,1672184.0,1672184.0,1074216000.0
Digital Media,5601745.0,350632500.0,350632500.0,350632500.0,219178000.0,219178000.0,219178000.0,127925200.0,127925200.0,127925200.0,...,1807709.0,1807709.0,1807709.0,1288270.0,1288270.0,1288270.0,21641460.0,21641460.0,21641460.0,2313967000.0
Grand inclusive of Magento/Marketo,10673920.0,527459400.0,527459400.0,527459400.0,327588700.0,327588700.0,327588700.0,189770800.0,189770800.0,189770800.0,...,3085663.85,3085663.85,3085663.85,2155525.0,2155525.0,2155525.0,23359490.0,23359490.0,23359490.0,3460867000.0
Publishing,0.0,10332430.0,10332430.0,10332430.0,6642776.0,6642776.0,6642776.0,3904775.0,3904775.0,3904775.0,...,115675.0,115675.0,115675.0,44610.67,44610.67,44610.67,45846.67,45846.67,45846.67,72684230.0


In [36]:
df_qtrly_only

Unnamed: 0_level_0,Q3_20,Q4_20,Q1_21,Q2_21,Q3_21,Q4_21,Q1_22,Q2_22,Q3_22,Total
External Reporting BU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Digital Experience,499483400.0,305303700.0,173822600.0,51388100.0,16734294.07,11440214.83,3486839.55,2467934.88,5016553.0,1074216000.0
Digital Media,1051898000.0,657534000.0,383775600.0,120701400.0,11924113.0,8319999.0,5423127.0,3864809.0,64924390.0,2313967000.0
Grand inclusive of Magento/Marketo,1582378000.0,982766100.0,569312500.0,177427000.0,31938896.07,20568201.83,9256991.55,6466575.88,70078480.0,3460867000.0
Publishing,30997280.0,19928330.0,11714330.0,5337418.0,3280489.0,807988.0,347025.0,133832.0,137540.0,72684230.0


##### OK My periods work fine. Now I can move on to saving this and finishing the defered waterfall

In [37]:
df_test_gb.columns

Index(['Q3_20', 'Q4_20', 'Q1_21', 'Q2_21', 'Q3_21', 'Q4_21', 'Q1_22', 'Q2_22',
       'Q3_22', 'As Performed / Upon Acceptance', 'Total', 'P00', 'P01', 'P02',
       'P03', 'P04', 'P05', 'P06', 'P07', 'P08', 'P09', 'P10', 'P11', 'P12',
       'P13', 'P14', 'P15', 'P16', 'P17', 'P18', 'P19', 'P20', 'P21', 'P22',
       'P23', 'P24', 'P25', 'P26'],
      dtype='object')

In [38]:
df_waterfall = df_test_gb.loc[:, df_test_gb.columns.str.contains('P')]

In [39]:
df_waterfall

Unnamed: 0_level_0,As Performed / Upon Acceptance,P00,P01,P02,P03,P04,P05,P06,P07,P08,...,P17,P18,P19,P20,P21,P22,P23,P24,P25,P26
External Reporting BU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Digital Experience,5072172.0,166494500.0,166494500.0,166494500.0,101767900.0,101767900.0,101767900.0,57940850.0,57940850.0,57940850.0,...,3813405.0,1162279.85,1162279.85,1162279.85,822645.0,822645.0,822645.0,1672184.0,1672184.0,1672184.0
Digital Media,5601745.0,350632500.0,350632500.0,350632500.0,219178000.0,219178000.0,219178000.0,127925200.0,127925200.0,127925200.0,...,2773333.0,1807709.0,1807709.0,1807709.0,1288270.0,1288270.0,1288270.0,21641460.0,21641460.0,21641460.0
Grand inclusive of Magento/Marketo,10673920.0,527459400.0,527459400.0,527459400.0,327588700.0,327588700.0,327588700.0,189770800.0,189770800.0,189770800.0,...,6856067.0,3085663.85,3085663.85,3085663.85,2155525.0,2155525.0,2155525.0,23359490.0,23359490.0,23359490.0
Publishing,0.0,10332430.0,10332430.0,10332430.0,6642776.0,6642776.0,6642776.0,3904775.0,3904775.0,3904775.0,...,269329.3,115675.0,115675.0,115675.0,44610.67,44610.67,44610.67,45846.67,45846.67,45846.67


### Now Drop P00 from the waterfall

In [40]:
df_waterfall = df_waterfall.drop('P00', axis=1)

In [41]:
df_waterfall

Unnamed: 0_level_0,As Performed / Upon Acceptance,P01,P02,P03,P04,P05,P06,P07,P08,P09,...,P17,P18,P19,P20,P21,P22,P23,P24,P25,P26
External Reporting BU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Digital Experience,5072172.0,166494500.0,166494500.0,101767900.0,101767900.0,101767900.0,57940850.0,57940850.0,57940850.0,17129370.0,...,3813405.0,1162279.85,1162279.85,1162279.85,822645.0,822645.0,822645.0,1672184.0,1672184.0,1672184.0
Digital Media,5601745.0,350632500.0,350632500.0,219178000.0,219178000.0,219178000.0,127925200.0,127925200.0,127925200.0,40233820.0,...,2773333.0,1807709.0,1807709.0,1807709.0,1288270.0,1288270.0,1288270.0,21641460.0,21641460.0,21641460.0
Grand inclusive of Magento/Marketo,10673920.0,527459400.0,527459400.0,327588700.0,327588700.0,327588700.0,189770800.0,189770800.0,189770800.0,59142320.0,...,6856067.0,3085663.85,3085663.85,3085663.85,2155525.0,2155525.0,2155525.0,23359490.0,23359490.0,23359490.0
Publishing,0.0,10332430.0,10332430.0,6642776.0,6642776.0,6642776.0,3904775.0,3904775.0,3904775.0,1779139.0,...,269329.3,115675.0,115675.0,115675.0,44610.67,44610.67,44610.67,45846.67,45846.67,45846.67


### Saving the waterfall as Q2_waterfall

In [42]:
pickle.dump(df_waterfall, open('../data/processed/Q2_waterfall.p', 'wb'))

### This will get loaded into the deferred revenue forecast