In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from datetime import datetime
import time
import os
import warnings
warnings.filterwarnings("ignore")

######################
# METADATA
######################
input_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\FPA\Sales Forecasting\sales_table_dev\inputs'
output_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\FPA\Sales Forecasting\sales_table_dev\outputs'

######################
# READ IN DATA
######################
input_file = 'BPC PAM Sales Input.xlsx'
path = os.path.join(input_folder, input_file)
bpc_df=pd.read_excel(path, engine='openpyxl', skiprows=9)

input_file = 'net_sales_02132023.xlsx'
path = os.path.join(input_folder, input_file)
maypole_df=pd.read_excel(path, engine='openpyxl', skiprows=9)

input_file = 'Maypole to BPC Mapping (US and ESTC).xlsx'
path = os.path.join(input_folder, input_file)
mapping=pd.read_excel(path, engine='openpyxl', sheet_name='tags')

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
######################
# MAYPOLE DATA
######################

maypole=maypole_df

# Replace column names
columns = list(maypole.columns)
columns[0] = 'currency'
columns[1] = 'cost_object'
columns[2] = 'product'
maypole.columns = columns

maypole=maypole.melt(id_vars=["currency", "cost_object", "product"], 
        var_name="ds", 
        value_name="value")

# Create ds dataframe
maypole_ds=pd.DataFrame (maypole['ds'].unique(), columns = ['fy_ds'])

maypole_ds['ds'] = ""

# Extract substring of ds to parse into date
for i in range(0,len(maypole_ds)):
    maypole_ds['ds'][i]=maypole_ds['fy_ds'][i].split('- FY',1)[1]

# Convert to date
maypole_ds['ds'] =  pd.to_datetime(maypole_ds['ds'], format='%y %B')
maypole_ds['year'], maypole_ds['month'], maypole_ds['day'] = maypole_ds['ds'].dt.year, maypole_ds['ds'].dt.month, maypole_ds['ds'].dt.day

# Address fiscal year dates
for i in range(0,len(maypole_ds['ds'])):
    if maypole_ds['month'][i] <4:
        maypole_ds['year'][i]=maypole_ds['year'][i]+1
        
# Create new date
maypole_ds['ds']=pd.to_datetime(maypole_ds[['year', 'month', 'day']])

# Merge fiscal year date mapping with original df
maypole=maypole.merge(maypole_ds[['fy_ds', 'ds']], how='left', left_on='ds', right_on='fy_ds')
maypole=maypole[['currency', 'cost_object', 'product', 'ds_y', 'value']]
maypole.columns = ['currency', 'cost_object', 'product', 'ds', 'value']

maypole

Unnamed: 0,currency,cost_object,product,ds,value
0,JPY - Japan Yen,D_US - US,P_XTD_TOT - Xtandi Total,2014-04-01,5.030184e+09
1,JPY - Japan Yen,D_US - US,P_BE_TOT - Myrbetriq/BET Total,2014-04-01,1.784778e+09
2,JPY - Japan Yen,D_US - US,P_VC_TOT - Vesicare Total,2014-04-01,4.322020e+09
3,JPY - Japan Yen,D_US - US,P_PRG_TOT - Prograf Total,2014-04-01,1.834998e+09
4,JPY - Japan Yen,D_US - US,P_ADG_TOT - Advagraf/GRA Total,2014-04-01,2.111930e+07
...,...,...,...,...,...
234539,LC - LC,D_ASG - ASG (Singapore),P_ELG - Eligard,2023-02-01,
234540,LC - LC,D_ASG - ASG (Singapore),P_XOS_TOT - Xospata total,2023-02-01,-7.713050e+05
234541,LC - LC,D_ASG - ASG (Singapore),P_PADCEV_TOT - PADCEV total,2023-02-01,-4.824427e+05
234542,LC - LC,D_ASG - ASG (Singapore),P_TMX - Feburic,2023-02-01,-1.053433e+06


In [3]:
######################
# MAP MAYPOLE DATA TO BPC
######################
maypole1 = maypole
maypole1 = maypole1[maypole1['currency']=='LC - LC']
maypole1 = maypole1[maypole1['cost_object']=='D_US - US']

# Create tag
maypole1['tag_maypole']=maypole1['cost_object'] + maypole1['product']

# Merge with mapping
maypole1 = maypole1.merge(mapping[['tag_maypole', 'tag_bpc', 'bpc_co', 'bpc_prd','rule']], how='left', on='tag_maypole')

# Remove rows with no values
maypole1 = maypole1[~maypole1['value'].isnull()]

# Identify which maypole tags are unmapped
maypole1['tag_bpc'] = maypole1['tag_bpc'].replace(np.nan,'UNMAPPED',regex = True)
maypole1['bpc_co'] = maypole1['bpc_co'].replace(np.nan,'UNMAPPED',regex = True)
maypole1['bpc_prd'] = maypole1['bpc_prd'].replace(np.nan,'UNMAPPED',regex = True)
maypole1['rule'] = maypole1['rule'].replace(np.nan,1,regex = True)

# Apply rule to value
maypole1['value'] = maypole1['value']*maypole1['rule']

# Subset
maypole1 = maypole1[['tag_maypole', 'cost_object', 'product', 'tag_bpc', 'bpc_co', 'bpc_prd', 'ds', 'value', 'rule']]

# Rename
maypole1.columns = ['tag_maypole', 'maypole_co', 'maypole_prd', 'tag_bpc', 'bpc_co', 'bpc_prd', 'ds', 'value', 'rule']

# Save maypole data
output_file = r'sales_table_dev.csv'
path = os.path.join(output_folder, output_file)
maypole1.to_csv(path, index=False)

In [8]:
######################
# BPC DATA - TO BE ADDED TO CASH MODEL
######################
bpc=bpc_df

# Replace column names
bpc.columns = ['category', 'plan_year', 'region_grp', 'region', 'sales_type', 'product', 'currency', 'ds', 'value']

# Sum by sales type
bpc = pd.DataFrame(bpc.groupby(['category', 'plan_year', 'region_grp', 'region', 'product', 'currency', 'ds'])['value'].sum()).reset_index()

# Subset
bpc=bpc[['currency','region', 'product', 'ds', 'value']]

# Create ds dataframe
bpc_ds=pd.DataFrame (bpc['ds'].unique(), columns = ['fy_ds'])

bpc_ds['ds'] = ""

# Extract substring of ds to parse into date
for i in range(0,len(bpc_ds)):
    bpc_ds['ds'][i]=bpc_ds['fy_ds'][i].split('- FY',1)[1]

# Convert to date
bpc_ds['ds'] =  pd.to_datetime(bpc_ds['ds'], format='%y %B')
bpc_ds['year'], bpc_ds['month'], bpc_ds['day'] = bpc_ds['ds'].dt.year, bpc_ds['ds'].dt.month, bpc_ds['ds'].dt.day

# Address fiscal year dates
for i in range(0,len(bpc_ds['ds'])):
    if bpc_ds['month'][i] <4:
        bpc_ds['year'][i]=bpc_ds['year'][i]+1
        
# Create new date
bpc_ds['ds']=pd.to_datetime(bpc_ds[['year', 'month', 'day']])

# Merge fiscal year date mapping with original df
bpc=bpc.merge(bpc_ds[['fy_ds', 'ds']], how='left', left_on='ds', right_on='fy_ds')
bpc=bpc[['currency', 'region', 'product', 'ds_y', 'value']]
bpc.columns = ['currency', 'cost_object', 'product', 'ds', 'value']

bpc

Unnamed: 0,currency,cost_object,product,ds,value
0,LC - Local Currency,D_E_AFLIC - Licensing,BENDAMUSTN - Bendamustine,2021-04-01,531208.80
1,LC - Local Currency,D_E_AFLIC - Licensing,BENDAMUSTN - Bendamustine,2021-08-01,1064690.50
2,LC - Local Currency,D_E_AFLIC - Licensing,BENDAMUSTN - Bendamustine,2021-12-01,2279682.88
3,LC - Local Currency,D_E_AFLIC - Licensing,BENDAMUSTN - Bendamustine,2022-02-01,116033.25
4,LC - Local Currency,D_E_AFLIC - Licensing,BENDAMUSTN - Bendamustine,2022-01-01,-174927.82
...,...,...,...,...,...
7389,LC - Local Currency,"US21 - Agensys, Inc.",ENFORTUMAB - Enforumab Vedotin,2022-06-01,21372736.00
7390,LC - Local Currency,"US21 - Agensys, Inc.",ENFORTUMAB - Enforumab Vedotin,2022-05-01,16008233.00
7391,LC - Local Currency,"US21 - Agensys, Inc.",ENFORTUMAB - Enforumab Vedotin,2022-11-01,21489639.00
7392,LC - Local Currency,"US21 - Agensys, Inc.",ENFORTUMAB - Enforumab Vedotin,2022-10-01,16673462.00


In [9]:
######################
# INPUT DEV FOR CASH MODEL - TO BE ADDED TO CASH MODEL
######################
# Read in data
input_file = 'Maypole to BPC Mapping (US and ESTC).xlsx'
path = os.path.join(input_folder, input_file)
bpc_dates=pd.read_excel(path, engine='openpyxl', sheet_name='dates')

# Create tag
bpc['tag_bpc']=bpc['cost_object'] + bpc['product']

# Filter data for cost objects with dates
bpc1 = bpc[bpc['cost_object'].isin(bpc_dates['bpc_co'])]

# Filter BPC data for the dates supplied
bpc_final = pd.DataFrame()

for i in range(0, len(bpc_dates)):
    # Subset
    bpc_sub = bpc1[bpc1['cost_object']==bpc_dates['bpc_co'][i]]
    
    # Filter
    bpc_sub = bpc_sub[bpc_sub['ds']>=bpc_dates['date_start'][i]]
    
    # Concat
    bpc_final = pd.concat([bpc_final, bpc_sub], ignore_index=True)
               
# Filter Maypole data for the dates supplied
maypole_final = pd.DataFrame()

for i in range(0, len(bpc_dates)):
    # Subset
    maypole_sub = maypole1[maypole1['bpc_co']==bpc_dates['bpc_co'][i]]
    
    # Filter
    maypole_sub = maypole_sub[maypole_sub['ds']<bpc_dates['date_start'][i]]
    
    # Concat
    maypole_final = pd.concat([maypole_final, maypole_sub], ignore_index=True)
    
# Combine Maypole and BPC data
bpc_final = bpc_final[['tag_bpc','cost_object','product','ds','value']]
bpc_final.columns = ['tag','cost_object','product','ds','value']

maypole_final = maypole_final[['tag_bpc','bpc_co','bpc_prd','ds','value']]
maypole_final.columns = ['tag','cost_object','product','ds','value']

# Concat
final = pd.concat([maypole_final, bpc_final], ignore_index=True)

# Save maypole data
output_file = r'model_input.csv'
path = os.path.join(output_folder, output_file)
final.to_csv(path, index=False)