In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from datetime import datetime
import time
import os
import warnings
warnings.filterwarnings("ignore")

input_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\FPA\SKUtoEquivalized'
output_folder =  r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\FPA\SKUtoEquivalized\outputs'

######################
# READ IN DATA
######################
input_file = r'conversion_factors\PAM SKU conversion factors vDRAFT 11 NOv+old factors.xlsx'
path = os.path.join(input_folder, input_file)
mapping=pd.read_excel(path, sheet_name='PAM SKU values', engine='openpyxl', skiprows = 1)

input_file = r'inputs\tags_mapping.xlsx'
path = os.path.join(input_folder, input_file)
tags_map_prod=pd.read_excel(path, engine='openpyxl', sheet_name='prod')

input_file = r'inputs\tags_mapping.xlsx'
path = os.path.join(input_folder, input_file)
tags_map_reg=pd.read_excel(path, engine='openpyxl', sheet_name='reg')

input_file = 'inputs\sku_units_emea_v2.xlsx'
path = os.path.join(input_folder, input_file)
sku1=pd.read_excel(path, engine='openpyxl')

input_file = 'inputs\sku_units_nonemea_v2.xlsx'
path = os.path.join(input_folder, input_file)
sku2=pd.read_excel(path, engine='openpyxl')

input_file = 'inputs\equiv_units.xlsx'
path = os.path.join(input_folder, input_file)
equiv_units=pd.read_excel(path, engine='openpyxl', skiprows = 9)

In [9]:
######################
# MANIPULATE SKU DATA INTO FORMAT
######################
sku = pd.concat([sku1, sku2], ignore_index=True)
sku = sku[sku['Key Figure']=='Actual / Ad hoc Sales History']
sku=sku.melt(id_vars=["Product Group", "SC Category ID", "Location ID", "Product ID", "Product Desc", "Customer Country", "Dosage", "Key Figure"], 
        var_name="ds", 
        value_name="value")
# Convert date
sku['ds'] =  pd.to_datetime(sku['ds'], format='%b-%y')

# Convert Product ID to string
sku['Product ID'] = sku['Product ID'].astype(str)

In [11]:
######################
# MANIPULATE EQUIV DATA INTO FORMAT
######################
equnits=equiv_units
# Replace column names
columns = list(equnits.columns)
columns[0] = 'currency'
columns[1] = 'cost_object'
columns[2] = 'product'
equnits.columns = columns

equnits=equnits.melt(id_vars=["currency", "cost_object", "product"], 
        var_name="ds", 
        value_name="value")

# Extract substring of ds to parse into date
for i in range(0,len(equnits['ds'])):
    equnits['ds'][i]=equnits['ds'][i].split('- FY',1)[1]

# Convert to date
equnits['ds'] =  pd.to_datetime(equnits['ds'], format='%y %B')
equnits['year'], equnits['month'], equnits['day'] = equnits['ds'].dt.year, equnits['ds'].dt.month, equnits['ds'].dt.day

# Address fiscal year dates
for i in range(0,len(equnits['ds'])):
    if equnits['month'][i] <4:
        equnits['year'][i]=equnits['year'][i]+1

# Create new date
equnits['ds']=pd.to_datetime(equnits[['year', 'month', 'day']])

In [12]:
######################
# CALCULATE OLD EQUIV UNITS
######################
sku_new=sku[sku['Product ID'].isin(mapping['Mat #'])][['Product Group', 'Customer Country', 'Product Desc', 'Product ID', 'ds', 'value']]
sku_new=sku_new.merge(mapping[['Mat #', 'Conversion', 'Old conversion']], how='left', left_on='Product ID', right_on='Mat #')

# Calculate converted values
sku_new['ibp_old']=sku_new['value']*sku_new['Old conversion']
sku_new['ibp_new']=sku_new['value']*sku_new['Conversion']

output_folder =  r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\FPA\SKUtoEquivalized\outputs'
output_file = 'sku_new.csv'
path = os.path.join(output_folder, output_file)
sku_new.to_csv(path)

In [5]:
######################
# COMPARE CALCULATED EQUIV UNITS TO BPC EQUIV UNITS
######################
tags_map_reg=tags_map_reg[~tags_map_reg['bpc_reg'].isnull()]
tags_map_prod=tags_map_prod[~tags_map_prod['bpc_prod'].isnull()]

equnits_comp=equnits[(equnits['cost_object'].isin(tags_map_reg['bpc_reg'])) & (equnits['product'].isin(tags_map_prod['bpc_prod']))][['cost_object', 'product', 'ds', 'value']]
equnits_comp=equnits_comp.merge(tags_map_reg, how='left', left_on='cost_object', right_on='bpc_reg')
equnits_comp=equnits_comp.merge(tags_map_prod, how='left', left_on='product', right_on='bpc_prod')
equnits_comp=equnits_comp[['ibp_prod', 'ibp_reg', 'ds', 'value']]

# Group by product group, customer country, ds
sku_equiv = sku_new.groupby(['Product Group', 'Customer Country', 'ds'])['ibp_old', 'ibp_new'].agg('sum').reset_index()

# Rename IBP columns
sku_equiv.columns=['ibp_prod', 'ibp_reg', 'ds', 'ibp_old', 'ibp_new']

# Rename BPC columns
equnits_comp.columns=['ibp_prod', 'ibp_reg', 'ds', 'bpc']

# Create comp df
comp_df = equnits_comp.merge(sku_equiv, how='inner', on=['ibp_prod', 'ibp_reg', 'ds'])

# Remove last BPC date as it is negative
comp_df=comp_df[comp_df['ds']<max(comp_df['ds'])]


output_file = 'comp_df.csv'
path = os.path.join(output_folder, output_file)
comp_df.to_csv(path)

In [6]:
######################
# METRICS: PERCENTAGE DIFFERENCE
######################
comp_df2 = comp_df.groupby(['ibp_prod', 'ibp_reg'])['ibp_old', 'ibp_new', 'bpc'].agg('sum').reset_index()
comp_df2['ibp_oldVbpc']=((comp_df2['ibp_old']-comp_df2['bpc'])/comp_df2['bpc'])*100
comp_df2['ibp_newVbpc']=((comp_df2['ibp_new']-comp_df2['bpc'])/comp_df2['bpc'])*100

output_file = 'comp_df2.csv'
path = os.path.join(output_folder, output_file)
comp_df2.to_csv(path)

comp_df2

Unnamed: 0,ibp_prod,ibp_reg,ibp_old,ibp_new,bpc,ibp_oldVbpc,ibp_newVbpc
0,ENFORTUMAB VEDOTIN,DE,5168.0,393.7524,5168.0,0.0,-92.380952
1,ENFORTUMAB VEDOTIN,JP,51631.5,3933.829,50953.5,1.330625,-92.279571
2,ENFORTUMAB VEDOTIN,US,0.0,0.0,15096.0,-100.0,-100.0
3,ENZALUTAMIDE,CN,8494192.0,151682.0,15217590.0,-44.181764,-99.003246
4,ENZALUTAMIDE,DE,534119.0,535619.0,534017.0,0.019101,0.29999
5,ENZALUTAMIDE,ES,118554.0,148896.0,147940.0,-19.863458,0.646208
6,ENZALUTAMIDE,FR,290215.0,406285.0,405377.0,-28.408617,0.223989
7,ENZALUTAMIDE,GB,258780.0,370259.0,368664.0,-29.806002,0.432643
8,ENZALUTAMIDE,IT,102303.0,213175.0,212268.0,-51.804794,0.42729
9,ENZALUTAMIDE,JP,37438970.0,668553.0,43576710.0,-14.08491,-98.465802
