In [2]:
import numpy as np
import pandas as pd

In [233]:
key = pd.read_excel('../data/raw data.xlsx', sheet_name = 'conversions')

def create_dict_key(dataframe):
    '''
    '''
    conversion_key = dict()
    for year in dataframe.columns[1:]:
        values = list(dataframe[year])
        categories = list(dataframe['categories'])
        temp = dict()
        for tup in tuple(zip(categories, values)):
            temp[tup[0]] = tup[1]
        
        temp['USD'] = 1
        conversion_key[year] = temp
            
    return conversion_key

conversion_key = create_dict_key(key)

In [234]:
df = pd.read_excel('../data/raw data.xlsx', sheet_name = 'raw data')

def adjustments(raw_data, conversion_key, fy_starts=13):
    '''
    Takes inputs of raw_data and conversion key and
    converts to real money terms.
    
    Inputs:
        raw_data: pandas dataframe of raw data Excel
        raw_data: pandas dataframe of raw data Excel
        
    Outputs:
        adjusted_data: pandas dataframe of adjusted data
    '''
    columns = raw_data.columns[fy_starts:]
    raw_data['Region'] = raw_data['Type']
    
    for index, row in raw_data.iterrows():
        currency = row['Currency']
        for year in columns:
            year_match = int(year[3:])
            infl = conversion_key[year_match]['Inflation Adjustment']
            curr = conversion_key[year_match][currency]
            row[year] = (float(row[year])/curr)*infl
        
        if 'US' in row['Region']:
            row['Region'] = 'US'
        elif 'Total' in row['Region']:
            row['Region'] = 'WW'
        else:
            row['Region'] = 'ExUS'
            
        raw_data.loc[index] = row
        
    clean = raw_data 
    
    return clean

clean = adjustments(df, conversion_key)

def summary(clean_data, fy_starts=13):
    '''
    Get from array 2 to array 3
    '''
    # First simulate the dataframr
    data = []
    names = list(clean_data['Proper Name'].unique())
    for name in names:
        data.append([name, 'US'])
        data.append([name, 'WW'])
        data.append([name, 'ExUS'])
    
    data = pd.DataFrame(data, columns = ['Proper Name', 'Region']) 
    
    # Add with original data to create empty rows
    years = clean_data.columns[fy_starts:]
    clean = clean_data.groupby(['Proper Name', 'Region'], as_index=False)[years].sum()
    
    clean = clean.merge(data, how='right', on=['Proper Name', 'Region'])
    
    # Place holder for summary helper
    
    return clean


summary(clean)

Unnamed: 0,Proper Name,Region,FY-2007,FY-2008,FY-2009,FY-2010,FY-2011,FY-2012,FY-2013,FY-2014,...,FY-2029,FY-2030,FY-2031,FY-2032,FY-2033,FY-2034,FY-2035,FY-2036,FY-2037,FY-2038
0,Actemra,US,0.0,0.0,0.0,75.879615,208.489888,331.554468,431.294194,556.352391,...,335.725889,315.654785,335.012030,343.125495,362.802547,358.359781,363.288261,0.0,0.0,0.0
1,Actemra,WW,0.0,0.0,0.0,519.382885,913.806742,1158.377021,1424.369677,1677.279130,...,1026.982709,950.614498,983.526357,1026.718156,1067.725867,1061.260590,1244.462648,0.0,0.0,0.0
2,Actemra,ExUS,,,,,,,,,...,,,,,,,,,,
3,Adempas,US,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,388.825103,334.938968,300.437226,296.269494,296.007724,88.168056,61.717639,0.0,0.0,0.0
4,Adempas,WW,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,84.362632,...,238.420843,234.891855,204.027394,86.399891,71.240784,59.505770,26.347257,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,Xtandi,WW,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.159675,0.650976,...,2.004867,1.371162,0.980480,0.760915,0.347654,0.278064,0.000000,0.0,0.0,0.0
278,Xtandi,ExUS,,,,,,,,,...,,,,,,,,,,
279,Yervoy,US,0.0,0.0,0.0,0.000000,425.068000,650.479600,730.814733,893.836300,...,608.303033,475.140206,287.319594,242.144549,159.085817,182.393814,145.915051,0.0,0.0,0.0
280,Yervoy,WW,0.0,0.0,0.0,0.000000,473.760000,912.999200,1226.304000,1648.995600,...,1052.208894,859.211704,529.341355,451.136010,362.668629,615.523927,600.701670,0.0,0.0,0.0
