In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from datetime import datetime
import time
import os
import warnings
warnings.filterwarnings("ignore")

######################
# METADATA
######################
sim_start = datetime.now()
sims=10
input_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\PS22\inputs'
output_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\PS22\outputs'

######################
# READ IN DATA
######################
input_file = 'inputs_v2.xlsx'
path = os.path.join(input_folder, input_file)

sales_ci=pd.read_excel(path, sheet_name='sales', engine='openpyxl')
unc_pr=pd.read_excel(path, sheet_name='unc_pr', engine='openpyxl')
unc_pr_desc=pd.read_excel(path, sheet_name='unc_pr_desc', engine='openpyxl')
unc_pri=pd.read_excel(path, sheet_name='unc_pri', engine='openpyxl')
unc_pri_desc=pd.read_excel(path, sheet_name='unc_pri_desc', engine='openpyxl')

######################
# PREP BASELINE DATA
######################
sales_ci=sales_ci.drop_duplicates()
sales_ci=sales_ci.melt(id_vars=['product', 'region', 'units', 'indication'], 
        var_name="year", 
        value_name="sales")
sales_ci=sales_ci[(sales_ci['units'] =='¥')]

# Exclude indications that are not needed
exclude_ind = ['Adjustments', 'Central Adjustments', 'Total', '0', 'mPC 1L']
sales_ci=sales_ci[(~sales_ci['indication'].isin(exclude_ind))]

# Partition WW sales into 9 segments -> US, JP, CN, DE, FR, ES, IT, GB, WWex8
sales_ci_WW=sales_ci[(sales_ci['region']=='WW')]
ast8 = ['US','JP','CN', 'DE','FR','ES','IT','GB']
sales_ci_ast8=sales_ci[(sales_ci['region'].isin(ast8))]

sales_ci_WWex8=sales_ci_ast8.groupby(['product','units','indication','year']).sum().reset_index()
sales_ci_WWex8=sales_ci_WW.merge(sales_ci_WWex8, how='right', on=['product', 'units', 'indication', 'year'])
sales_ci_WWex8['sales']=sales_ci_WWex8['sales_x']-sales_ci_WWex8['sales_y']
sales_ci_WWex8['region']='WWex8'
sales_ci_WWex8 = sales_ci_WWex8[['product','region', 'units', 'indication', 'year', 'sales']]

# Generate new sales table
sales_ci_clean=pd.concat([sales_ci_ast8, sales_ci_WWex8])

# Add back non-strategic products
sales_ci_nsp=sales_ci[(sales_ci['indication']=='Non-Strategic')]
sales_ci_clean=pd.concat([sales_ci_clean, sales_ci_nsp])

# Create tag
sales_ci_clean['tag'] = sales_ci_clean['product'] + sales_ci_clean['region'] + sales_ci_clean['units'] + sales_ci_clean['indication'].astype(str)+ sales_ci_clean['year'].astype(str)

# Validation
print(sum(sales_ci_clean[(sales_ci_clean['year']==2025)]['sales']))  #18176
print(sum(sales_ci_clean[(sales_ci_clean['product']=='xtandi') & (sales_ci_clean['year']==2025)]['sales']))  #6963
print(sum(sales_ci_clean[(sales_ci_clean['product']=='mirabegron') & (sales_ci_clean['year']==2025)]['sales']))  #1223
print(sales_ci_clean['tag'].nunique()) # 2390

# Remove interim dfs from memory
del sales_ci_WW
del sales_ci_ast8
del sales_ci_WWex8
del sales_ci_nsp
del sales_ci

# Save new sales data
output_file = 'sales_clean.csv'
path = os.path.join(output_folder, output_file)
sales_ci_clean.to_csv(path)

18176.63205186673
6963.730575326161
1223.22
2300


In [2]:
%whos DataFrame

Variable         Type         Data/Info
---------------------------------------
sales_ci_clean   DataFrame                          pro<...>\n[2310 rows x 7 columns]
unc_pr           DataFrame         product region  year<...>n[1210 rows x 38 columns]
unc_pr_desc      DataFrame       uncertainties         <...>        US         0.15  
unc_pri          DataFrame         product region units<...>n[1170 rows x 24 columns]
unc_pri_desc     DataFrame      uncertainties          <...>aint.  \n2  HSCT-Maint.  


# Phase 1: PTRS

## Simulation

In [3]:
#############
# CREATE OUTPUT DFS FOR PHASE 1
#############
output_p1 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25', 'sales_RA_50', 'sales_RA_75', 'sales_RA_90']) # Output of Phase 1
scenarios_p1 = pd.DataFrame(columns=['scenario', 'tag', 'product', 'region', 'units', 'indication', 'year', 'sales', 'sim_sales']) # Scenarios of Phase 1

#############
# GENERATE SCENARIOS
#############
# Add PTRS values to sales data
sales_ci_unc = sales_ci_clean.merge(unc_pri, how='right', on=['product','region', 'units', 'indication', 'year'])

scenario_sdf=[]
product_sdf=[]
region_sdf=[]
units_sdf=[]
ind_sdf=[]
year_sdf=[]
sales_sdf=[]
tag_sdf=[]
sim_sales_sdf=[]
    
# Loop through rows
for index, row in sales_ci_unc.iterrows():
# for index, row in sales_ci_unc.iloc[29:30].iterrows():
    n=sims
    sales_ra = []
    unc_ptrs_ra = []
    
    for i in range(n):
        scenario_sdf.append(i)
        product_sdf.append(row.values[0])
        region_sdf.append(row.values[1])
        units_sdf.append(row.values[2])
        ind_sdf.append(row.values[3])
        year_sdf.append(row.values[4])
        sales_sdf.append(row.values[5])
        tag_sdf.append(row.values[6])

        # Get sales (value)
        sales = row.values[5]

        # Get PTRS Uncertainty Probability (value)
        unc_ptrs_prob=np.random.binomial(size=1, n=1, p=row.values[7])
        
        # Get Uncertainty Value (value) for others
        unc1=row.values[8]
        unc2=row.values[9]
        
        if ((unc_ptrs_prob == 1) and ((~np.isnan(unc1)) or (~np.isnan(unc2)) )):
            m=sims
            for j in range(m):

                # Get Uncertainty Probability (value) for others
                unc1_prob=np.random.randint(1,4)
                
                if ((unc1_prob == 1)):
                    sales=unc1
                
                if ((unc1_prob == 2)):
                    sales=unc2
                    
        
        # Generate RA sales
        unc_all=float(sales)*float(unc_ptrs_prob)
        sales_ra.append(unc_all)
        sim_sales_sdf.append(unc_all)
        
    # Get Product Worldwide sales by year (series)
    prod_ww_sales = sales_ci_unc.iloc[index]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_P1_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_p1 = output_p1.append(prod_ww_sales, ignore_index=True)
    
    # Develop scenario_df
    scenarios_p1= pd.DataFrame({'scenario': scenario_sdf,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales': sim_sales_sdf})

# Save outputs
output_p1 = output_p1[['tag','product','region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25', 'sales_RA_50','sales_RA_75', 'sales_RA_90', 'sales_P1_RA']]

# Add non-simulated tags to output and scenario dfs
sales_exP1=sales_ci_clean[~(sales_ci_clean['tag'].isin(output_p1['tag']))]
sales_exP1['sales_RA_10']=sales_exP1['sales']
sales_exP1['sales_RA_25']=sales_exP1['sales']
sales_exP1['sales_RA_50']=sales_exP1['sales']
sales_exP1['sales_RA_75']=sales_exP1['sales']
sales_exP1['sales_RA_90']=sales_exP1['sales']
sales_exP1['sales_P1_RA']=sales_exP1['sales']
output_p1=pd.concat([output_p1, sales_exP1])

# Validation
print(sum(output_p1[(output_p1['year']==2025)]['sales']))  #18176
print(sum(output_p1[(output_p1['product']=='xtandi') & (output_p1['year']==2025)]['sales'])) #6963
print(sum(output_p1[(output_p1['product']=='mirabegron') & (output_p1['year']==2025)]['sales']))  #1223
print(output_p1['tag'].nunique()) # 2300

18176.63205186673
6963.730575326161
1223.22
2300


## Prep Results for Analysis

In [4]:
# Convert PTRS results to sales df for next phase (Commercial Uncertainty)
output_p1=output_p1[['tag','product','region', 'units', 'indication', 'year', 'sales', 'sales_P1_RA']]

# Sum sales at the WW level
output_p1_ww=output_p1.groupby(['product','units','indication','year']).sum().reset_index()
output_p1_ww['region']='Total'
output_p1=pd.concat([output_p1_ww, output_p1])

# Sum sales and scenarios at the Product level, as commercial uncertainty is applied at a higher granularity
output_p1_prod=output_p1.groupby(['product','units','region','year']).sum().reset_index()
output_p1_prod['indication']='Total'
output_p1=pd.concat([output_p1_prod, output_p1])

scenarios_p1_prod=scenarios_p1[scenarios_p1['product']=='fezo'].groupby(['product','units','region','year', 'scenario']).sum().reset_index()
scenarios_p1_prod['indication']='Total'
scenarios_p1=pd.concat([scenarios_p1_prod, scenarios_p1])

# Validation
print(sum(output_p1[(output_p1['year']==2025)& (output_p1['region']!='Total') & (output_p1['indication']!='Total')]['sales']))  #18176
print(sum(output_p1[(output_p1['product']=='xtandi') & (output_p1['year']==2025) & (output_p1['region']!='Total') & (output_p1['indication']!='Total')]['sales']))  #6963
print(sum(output_p1[(output_p1['product']=='mirabegron') & (output_p1['year']==2025) & (output_p1['region']!='Total') & (output_p1['indication']!='Total')]['sales']))  #1223
print(output_p1[(output_p1['region']!='Total') & (output_p1['indication']!='Total')]['tag'].nunique()) # 2300

# Remove interim dfs from memory
del sales_exP1
del output_p1_ww
del output_p1_prod
del scenarios_p1_prod
del sales_ci_unc

output_file = 'output_p1.csv'
path = os.path.join(output_folder, output_file)
output_p1.to_csv(path)

output_file = 'scenarios_p1.csv'
path = os.path.join(output_folder, output_file)
scenarios_p1.to_csv(path)

18176.63205186673
6963.730575326161
1223.22
2300


# Phase 2: Market Events / Commercial Uncertainty

## Prep for Phase 2

In [5]:
#############
# CREATE OUTPUT DFS FOR PHASE 2
#############
output_p2 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25','sales_RA_50','sales_RA_75', 'sales_RA_90']) # Outputs of Phase 2
scenarios_p2 = pd.DataFrame(columns=['scenario', 'tag', 'product', 'region', 'units', 'indication', 'year', 'sales', 'sim_sales']) # Scenarios of Phase 2
output_unc_p2 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales']) # Uncertainties of Phase 2

#############
# PREP INPUTS DFS FOR PHASE 2
#############
# Create input df
input_p2a=output_p1[(output_p1['region']=='Total') & (output_p1['indication']=='Total')] # These do not go thru phase2
input_p2b=output_p1[(output_p1['region']!='Total') & (output_p1['indication'].isin(['Total']))] # These go thru phase2

# Validation
print(sum(input_p2a[(input_p2a['year']==2025)]['sales']))  #18176
print(sum(input_p2a[(input_p2a['product']=='xtandi') & (input_p2a['year']==2025)]['sales']))  #6963
print(sum(input_p2a[(input_p2a['product']=='mirabegron') & (input_p2a['year']==2025)]['sales']))  #1223

print(sum(input_p2b[(input_p2b['year']==2025)]['sales']))  #18176
print(sum(input_p2b[(input_p2b['product']=='xtandi') & (input_p2b['year']==2025)]['sales']))  #6963
print(sum(input_p2b[(input_p2b['product']=='mirabegron') & (input_p2b['year']==2025)]['sales']))  #1223

18176.632051866727
6963.730575326162
1223.22
18176.632051866723
6963.730575326161
1223.2200000000003


## Simulation

In [6]:
#############
# XTANDI
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'xtandi'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]

for year in years:
    print(year)
    
    for region in regions:
        n=sims
        sales_ra = []
        unc1_ra = []
        unc2_ra = []
        unc3_ra = []
        unc26_ra = []
        unc29_ra = []
        
        for i in range(n):
            # Get Uncertainty Probability (value)
            unc1_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc1')]['probability'], 1, 1)
            unc2_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc2')]['probability'], 1, 1)
            unc3_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc3')]['probability'], 1, 1)
            unc26_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc26')]['probability'])
            unc29_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc29')]['probability'], 1, 1)
            
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)
            
            # Get relevant sales values for uncertainties
            unc26_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]['sales']
            unc29_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]['sales']

            # Get Uncertainty Quant by year (series)
            unc1=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc1']
            unc2=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc2'])*float(unc26_sales)
            unc3=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc3'])*float(unc26_sales)
            unc26=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc26'])*float(unc26_sales)
            unc29=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc29'])*float(unc29_sales)

            # Generate RA sales
            sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob) + float(unc1*unc1_prob) + float(unc2*unc2_prob) + float(unc3*unc3_prob) + float(unc26*unc26_prob) + float(unc29*unc29_prob)
            sales_ra.append(unc_all)
            
            # Generate uncertainty arrays
            unc1_ra.append(float(unc1*unc1_prob))
            unc2_ra.append(float(unc2*unc2_prob))
            unc3_ra.append(float(unc3*unc3_prob))
            unc26_ra.append(float(unc26*unc26_prob))
            unc29_ra.append(float(unc29*unc29_prob))
            
        # Get Product Region sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)
        
        # Add uncertainty arrays to output_unc
        output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        output_unc['unc1_ra'] = np.mean(unc1_ra)
        output_unc['unc2_ra'] = np.mean(unc2_ra)
        output_unc['unc3_ra'] = np.mean(unc3_ra)
        output_unc['unc26_ra'] = np.mean(unc26_ra)
        output_unc['unc29_ra'] = np.mean(unc29_ra)
        
        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
        output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,0,xtandi,FR,¥,Total,2031,61.432580,57.807029,58.567193,59.314824,60.307611,61.020842,61.432580,59.391241
86,0,xtandi,IT,¥,Total,2031,19.942742,18.783669,18.994453,19.327046,19.618204,19.851350,19.942742,19.312956
87,0,xtandi,ES,¥,Total,2031,16.842266,15.816876,16.036349,16.302782,16.552847,16.729734,16.842266,16.288062
88,0,xtandi,GB,¥,Total,2031,9.228120,8.685854,8.787916,8.921555,9.073331,9.189117,9.228120,8.930472


In [7]:
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales']))
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales_RA_75']))

42655.95573277718
39824.09568185688
41806.648764673824
42726.892341010585


In [8]:
#############
# XOSPATA
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'xospata'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]


for year in years:
    print(year)
    
    for region in regions:
        n=sims
        sales_ra = []
        unc4_ra = []
        unc5_ra = []
        unc6_ra = []
        unc28_ra = []
        
        for i in range(n):
            # Get Uncertainty Probability (value)
            unc4_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc4')]['probability'], 1, 1)
            unc5_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc5')]['probability'], 1, 1)
            unc6_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc6')]['probability'], 1, 1)
            unc28_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc28')]['probability'], 1, 1)

            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

            # Get relevant sales values for uncertainties
            unc4_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]['sales']
            unc5_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]['sales']
            unc6_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]['sales']
            unc28_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]['sales']
            
            # Get Uncertainty Quant by year (series)
            unc4=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc4'])*float(unc4_sales)
            unc5=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc5'])*float(unc5_sales)
            unc6=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc6'])*float(unc6_sales)
            unc28=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc28'])*float(unc28_sales)

            # Generate RA sales
            sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob) + float(unc4*unc4_prob) + float(unc5*unc5_prob) + float(unc6*unc6_prob) + float(unc28*unc28_prob)
            sales_ra.append(unc_all)
            
            # Generate uncertainty arrays
            unc4_ra.append(float(unc4*unc4_prob))
            unc5_ra.append(float(unc5*unc5_prob))
            unc6_ra.append(float(unc6*unc6_prob))
            unc28_ra.append(float(unc28*unc28_prob))
            
        # Get Product Region sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)
        
        # Add uncertainty arrays to output_unc
        output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        output_unc['unc4_ra'] = np.mean(unc4_ra)
        output_unc['unc5_ra'] = np.mean(unc5_ra)
        output_unc['unc6_ra'] = np.mean(unc6_ra)
        output_unc['unc28_ra'] = np.mean(unc28_ra)
        
        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
        output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,0,xospata,FR,¥,Total,2031,32.290125,16.738834,18.800080,21.112265,23.869929,26.052807,29.126416,21.268125
176,0,xospata,IT,¥,Total,2031,100.264447,51.281415,59.472737,66.866508,75.363429,80.560026,90.822516,66.555159
177,0,xospata,ES,¥,Total,2031,23.282524,12.322166,14.094934,15.449459,16.959259,18.660955,21.363488,15.550170
178,0,xospata,GB,¥,Total,2031,43.577112,23.761924,26.278909,29.354338,32.581123,35.737178,40.268896,29.470809


In [9]:
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2026) & (output_p2['region']!='ALL')]['sales']))
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2026) & (output_p2['region']!='ALL')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2026) & (output_p2['region']!='ALL')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2026) & (output_p2['region']!='ALL')]['sales_RA_75']))

1088.5645599123377
939.3781392963408
969.6495799208164
1004.0643840764457


In [10]:
#############
# PADCEV
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'padcev'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]

for year in years:
    print(year)
    
    for region in regions:
        n=sims
        sales_ra = []
        unc7_ra = []
        unc8_ra = []
        unc9_ra = []
        unc10_ra = []
        unc11_ra = []
        unc12_ra = []
        unc13_ra = []
        unc14_ra = []
        
        for i in range(n):
            # Get Uncertainty Probability (value)
            unc7_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc7')]['probability'], 1, 1)
            unc8_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc8')]['probability'], 1, 1)
            unc9_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc9')]['probability'], 1, 1)
            unc10_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc10')]['probability'], 1, 1)
            unc11_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc11')]['probability'], 1, 1)
            unc12_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc12')]['probability'], 1, 1)
            unc13_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc13')]['probability'], 1, 1)
            unc14_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc14')]['probability'], 1, 1)
        
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

            # Get Uncertainty Quant by year (series)
            unc7=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc7']
            unc8=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc8']
            unc9=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc9']
            unc10=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc10']
            unc11=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc11']
            unc12=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc12']
            unc13=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc13']
            unc14=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc14']

            # Generate RA sales
            sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob) + float(unc7*unc7_prob) + float(unc8*unc8_prob) + float(unc9*unc9_prob) + float(unc10*unc10_prob) + float(unc11*unc11_prob) + float(unc12*unc12_prob) + float(unc13*unc13_prob) + float(unc14*unc14_prob)
            sales_ra.append(unc_all)
            
            # Generate uncertainty arrays
            unc7_ra.append(float(unc7*unc7_prob))
            unc8_ra.append(float(unc8*unc8_prob))
            unc9_ra.append(float(unc9*unc9_prob))
            unc10_ra.append(float(unc10*unc10_prob))
            unc11_ra.append(float(unc11*unc11_prob))
            unc12_ra.append(float(unc12*unc12_prob))
            unc13_ra.append(float(unc13*unc13_prob))
            unc14_ra.append(float(unc14*unc14_prob))

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)
        
        # Add uncertainty arrays to output_unc
        output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        output_unc['unc7_ra'] = np.mean(unc7_ra)
        output_unc['unc8_ra'] = np.mean(unc8_ra)
        output_unc['unc9_ra'] = np.mean(unc9_ra)
        output_unc['unc10_ra'] = np.mean(unc10_ra)
        output_unc['unc11_ra'] = np.mean(unc11_ra)
        output_unc['unc12_ra'] = np.mean(unc12_ra)
        output_unc['unc13_ra'] = np.mean(unc13_ra)
        output_unc['unc14_ra'] = np.mean(unc14_ra)
    
        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
        output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,0,padcev,FR,¥,Total,2031,67.683969,56.097274,58.576647,64.623853,68.523727,70.954085,63.740468,63.810735
266,0,padcev,IT,¥,Total,2031,137.397042,114.890530,120.926751,127.662905,137.337434,143.915324,128.891881,128.516536
267,0,padcev,ES,¥,Total,2031,74.871373,61.860396,64.665952,70.135842,76.428774,78.835634,70.731236,70.385111
268,0,padcev,GB,¥,Total,2031,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [11]:
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales']))
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales_RA_75']))

22941.477383758553
20214.662482728374
22021.373213260384
23780.71374643413


In [12]:
#############
# EVRENZO
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'evrenzo'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]

for year in years:
    print(year)
    
    for region in regions:
        n=sims
        sales_ra = []
        unc15_ra = []
        unc16_ra = []
        unc17_ra = []
        unc18_ra = []
        
        for i in range(n):
            # Get Uncertainty Probability (value)
            unc15_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc15')]['probability'], 1, 1)
            unc16_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc16')]['probability'], 1, 1)
            unc17_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc17')]['probability'], 1, 1)
            unc18_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc18')]['probability'], 1, 1)
        
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

            # Get Uncertainty Quant by year (series)
            unc15=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc15']
            unc16=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc16']
            unc17=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc17']
            unc18=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc18']

            # Generate RA sales
            sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob) + float(unc15*unc15_prob) + float(unc16*unc16_prob) + float(unc17*unc17_prob) + float(unc18*unc18_prob)
            sales_ra.append(unc_all)
            
            # Generate uncertainty arrays
            unc15_ra.append(float(unc15*unc15_prob))
            unc16_ra.append(float(unc16*unc16_prob))
            unc17_ra.append(float(unc17*unc17_prob))
            unc18_ra.append(float(unc18*unc18_prob))

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)
        
        # Add uncertainty arrays to output_unc
        output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        output_unc['unc15_ra'] = np.mean(unc15_ra)
        output_unc['unc16_ra'] = np.mean(unc16_ra)
        output_unc['unc17_ra'] = np.mean(unc17_ra)
        output_unc['unc18_ra'] = np.mean(unc18_ra)
        
        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
        output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,0,evrenzo,FR,¥,Total,2031,66.060153,58.019808,60.585115,65.970579,70.833466,73.245082,66.060153,65.718555
356,0,evrenzo,IT,¥,Total,2031,101.517209,89.653255,94.553024,102.200803,111.112218,114.218484,101.517209,102.502082
357,0,evrenzo,ES,¥,Total,2031,24.867392,22.022713,23.364681,25.143317,26.987421,27.947685,24.867392,25.104644
358,0,evrenzo,GB,¥,Total,2031,58.225255,51.391354,54.030026,58.292770,63.149855,65.341200,58.225255,58.351042


In [13]:
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales']))
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales_RA_75']))

4451.264751048861
4102.613105453689
4434.104613195332
4776.767104194852


In [14]:
#############
# ZOLBE
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'zolbe'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]

for year in years:
    print(year)
    
    for region in regions:
        n=sims
        sales_ra = []
        unc27_ra = []
        
        for i in range(n):
            # Get Uncertainty Probability (value)
            unc27_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc27')]['probability'], 1, 1)
            
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)
            
            # Get relevant sales values for uncertainties
            unc27_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]['sales']
            
            # Get Uncertainty Quant by year (series)
            unc27=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc27'])*float(unc4_sales)

            # Generate RA sales
            sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob) + float(unc27*unc27_prob) 
            sales_ra.append(unc_all)
            
            # Generate uncertainty arrays
            unc27_ra.append(float(unc27*unc27_prob))

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)
        
        # Add uncertainty arrays to output_unc
        output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        output_unc['unc27_ra'] = np.mean(unc27_ra)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
        output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,0,zolbe,FR,¥,Total,2031,14.478137,-53.545916,-39.845390,-25.279227,-12.604777,-5.476168,12.161635,-26.759977
446,0,zolbe,IT,¥,Total,2031,19.395337,-44.812591,-29.432891,-15.904496,-5.361077,3.590426,16.583013,-18.406038
447,0,zolbe,ES,¥,Total,2031,10.821708,-56.474027,-42.511800,-26.909721,-14.745047,-6.020144,9.739537,-29.609575
448,0,zolbe,GB,¥,Total,2031,11.565093,-56.686546,-43.743746,-26.832720,-12.312898,-5.054584,9.656853,-28.761988


In [15]:
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales']))
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales_RA_75']))  

5046.271924194486
1779.4222384381692
2671.3122907952074
3411.157374096777


In [16]:
#############
# FEZO
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'fezo'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]

for year in years:
    print(year)
    
    # Get Fezo US values, to create proportion
    fezo_base=output_p1[(output_p1['product']==product) & (output_p1['region']=='US') & (output_p1['year']==year)& (output_p1['indication']=='Total')]['sales_P1_RA']
    fezo_10 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc19'])
    fezo_25 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc20'])
    fezo_50 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc21'])
    fezo_75 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc22'])
    fezo_90 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc23'])
    
    for region in regions:
        # Get Product Worldwide sales by year (series) 
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year)]
        prod_ww_sales['sales_RA_10'] = float(fezo_10/fezo_base)*float(prod_ww_sales['sales_P1_RA'])
        prod_ww_sales['sales_RA_25'] = float(fezo_25/fezo_base)*float(prod_ww_sales['sales_P1_RA'])
        prod_ww_sales['sales_RA_50'] = float(fezo_50/fezo_base)*float(prod_ww_sales['sales_P1_RA'])
        prod_ww_sales['sales_RA_75'] = float(fezo_75/fezo_base)*float(prod_ww_sales['sales_P1_RA'])
        prod_ww_sales['sales_RA_90'] = float(fezo_90/fezo_base)*float(prod_ww_sales['sales_P1_RA'])
    
        prod_ww_sales['sales_P2_RA'] = prod_ww_sales['sales_RA_50']

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,0,fezo,FR,¥,Total,2031,56.674236,29.490610,36.931151,47.485495,61.986402,75.962309,54.974009,47.485495
536,0,fezo,IT,¥,Total,2031,47.452308,24.946508,31.240563,40.168626,52.435141,64.257550,46.503262,40.168626
537,0,fezo,ES,¥,Total,2031,21.534861,11.379015,14.249963,18.322379,23.917586,29.310219,21.211838,18.322379
538,0,fezo,GB,¥,Total,2031,68.250000,36.246344,45.391370,58.363511,76.186299,93.363817,67.567500,58.363511


In [17]:
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales']))
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales_RA_75']))   

26586.692485035557
17580.636101684573
22604.857100110406
29509.866953694313


In [18]:
#############
# MIRABEGRON
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'mirabegron'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]

for region in regions:
    n=sims
    sales_ra22 = []
    sales_ra23 = []
    sales_ra24 = []
    sales_ra25 = []
    
    for i in range(n):
        # Get Base Uncertainty
        unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Get High,Base,Low Uncertainty
        unc_hbl_prob=np.random.randint(1,4)
        # unc_hbl_prob=1

        # Generate RA sales - High
        if (unc_hbl_prob==1):
            sales22=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2022) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)
            sales23=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2023) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)
            sales24=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2024) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)
            sales25=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2025) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)

        # Generate RA sales - Med
        if (unc_hbl_prob==2):
            sales22=float(input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2022) ]['sales'])*float(1+unc_base_prob)
            sales23=float(input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2023) ]['sales'])*float(1+unc_base_prob)
            sales24=float(input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2024) ]['sales'])*float(1+unc_base_prob)
            sales25=float(input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2025) ]['sales'])*float(1+unc_base_prob)
        
        # Generate RA sales - Low
        if (unc_hbl_prob==3):
            sales22=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2022) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)
            sales23=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2023) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)
            sales24=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2024) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)
            sales25=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2025) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)
        
        sales_ra22.append(sales22)
        sales_ra23.append(sales23)
        sales_ra24.append(sales24)
        sales_ra25.append(sales25)
 
     # Get Product, Region sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2022) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra22, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra22, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra22, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra22, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra22, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra22)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2023) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra23, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra23, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra23, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra23, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra23, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra23)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2024) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra24, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra24, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra24, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra24, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra24, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra24)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2025) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
        # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2026) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
        # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2027) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
        # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2028) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
        # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2029) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
        # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2030) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
        # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==2031) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
625,0,mirabegron,WWex8,¥,Total,2027,208.616854,269.277340,272.986585,280.683157,287.628358,292.371622,208.616854,280.671709
626,0,mirabegron,WWex8,¥,Total,2028,136.442589,269.277340,272.986585,280.683157,287.628358,292.371622,136.442589,280.671709
627,0,mirabegron,WWex8,¥,Total,2029,128.092440,269.277340,272.986585,280.683157,287.628358,292.371622,128.092440,280.671709
628,0,mirabegron,WWex8,¥,Total,2030,120.469347,269.277340,272.986585,280.683157,287.628358,292.371622,120.469347,280.671709


In [19]:
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales']))
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales_RA_75']))  

8804.690517700881
9464.331299582316
13784.061467520522
17412.011457593082


In [20]:
#############
# TACROLIMUS
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'tacrolimus'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]

for year in years:
    print(year)
    
    for region in regions:
        n=sims
        sales_ra = []

        for i in range(n):
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)
            
            # Generate RA sales
            sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob)
            sales_ra.append(unc_all)
            
        # Get Product Worldwide sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)
    
        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,0,tacrolimus,FR,¥,Total,2031,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
716,0,tacrolimus,IT,¥,Total,2031,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
717,0,tacrolimus,ES,¥,Total,2031,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
718,0,tacrolimus,GB,¥,Total,2031,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [21]:
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales']))
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales_RA_75']))  

14101.779121712712
13753.782817109692
14107.257217646646
14450.73910662566


In [22]:
#############
# LEXISCAN
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
product = 'lexiscan'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]

for year in years:
    print(year)
    n=sims
    sales_ra = []
    
    for i in range(n):
        # Get Base Uncertainty
        unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year)& (unc_pr['region'] =='US')]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Generate RA sales
        sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['year']==year) ]
        unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)

    
    # Append to sales df
    output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,0,lexiscan,WW,¥,Total,2027,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
726,0,lexiscan,WW,¥,Total,2028,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
727,0,lexiscan,WW,¥,Total,2029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
728,0,lexiscan,WW,¥,Total,2030,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [23]:
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales']))
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales_RA_75']))

1174.7385985710891
1154.2034115023137
1176.7344285493848
1204.9529332919653


In [24]:
#############
# OTHERS
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
region = 'WW'
other_products = ['evenity','cresemba','tamsulosin','suglat','ambisome','vesicare','mycamine','sujanu','symraf','cimzia','blincyto','repatha','linzess',
'vesomni','myslee/stilnox','irribow','gonax','feburic','asamax','josamycin','non prod','istodax','other merchandise products','allelock','tr','acr','blz','bonoteo','seroquel','geninax','col','inf-v','kiklin',
'ofa','regnite','srs','st','dificlir','p_aco - acofide','other astellas products']

for product in other_products:
    # Clear output for rerun
    output_p2=output_p2[(output_p2['product']!=product)]
    print(product)
    for year in years:
        n=sims
        sales_ra = []
    
        for i in range(n):
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

            # Generate RA sales
            sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            unc_all=float(sales['sales_P1_RA']) + float(sales['sales_P1_RA']*unc_base_prob)
            sales_ra.append(unc_all)

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)
    

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
    
output_p2=output_p2.fillna(0)
output_p2

evenity
cresemba
tamsulosin
suglat
ambisome
vesicare
mycamine
sujanu
symraf
cimzia
blincyto
repatha
linzess
vesomni
myslee/stilnox
irribow
gonax
feburic
asamax
josamycin
non prod
istodax
other merchandise products
allelock
tr
acr
blz
bonoteo
seroquel
geninax
col
inf-v
kiklin
ofa
regnite
srs
st
dificlir
p_aco - acofide
other astellas products


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA
0,0,xtandi,US,¥,Total,2022,3186.729835,2194.328112,3030.573980,3095.410926,3152.287723,3192.241492,3098.782281,2937.512240
1,0,xtandi,JP,¥,Total,2022,438.632820,424.482479,428.057944,435.348147,442.624259,446.334343,438.632820,435.350471
2,0,xtandi,CN,¥,Total,2022,60.320040,58.341628,58.920730,59.708042,60.788543,61.377361,60.320040,59.832633
3,0,xtandi,DE,¥,Total,2022,518.114617,501.548575,506.071711,513.489405,520.870109,526.387443,518.114617,513.706296
4,0,xtandi,FR,¥,Total,2022,260.226710,252.247382,254.176965,257.951518,261.733370,264.795894,260.226710,258.215148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1125,0,other astellas products,WW,¥,Total,2027,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1126,0,other astellas products,WW,¥,Total,2028,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1127,0,other astellas products,WW,¥,Total,2029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1128,0,other astellas products,WW,¥,Total,2030,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [25]:
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales']))
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales_RA_75']))

9797.389256678245
9322.677269487933
9794.204793438947
10280.429823178367


## Prep Results for Analysis

In [26]:
# Add WW sales to output
output_p2a=output_p2
output_p2a=output_p2a.groupby(['product','units','indication','year']).sum().reset_index()
output_p2a['region']='ALL'
output_p2 = output_p2.append(output_p2a, ignore_index=True)

# Consolidate uncertainty output
output_unc_p2=output_unc_p2.groupby(['product','units','indication','year', 'region']).max().reset_index()

# Remove interim dfs from memory
del input_p2a 
del input_p2b 
del sales
del prod_ww_sales
del output_unc
del output_p2a

output_file = 'output_p2.csv'
path = os.path.join(output_folder, output_file)
output_p2.to_csv(path)

output_file = 'output_unc_p2.csv'
path = os.path.join(output_folder, output_file)
output_unc_p2.to_csv(path)

output_file = 'scenarios_p2.csv'
path = os.path.join(output_folder, output_file)
scenarios_p2.to_csv(path)

In [27]:
%whos DataFrame

Variable         Type         Data/Info
---------------------------------------
output_p1        DataFrame                          pro<...>\n[4550 rows x 8 columns]
output_p2        DataFrame          tag product region <...>n[1620 rows x 14 columns]
output_unc_p2    DataFrame         product units indica<...>\n[450 rows x 30 columns]
sales_ci_clean   DataFrame                          pro<...>\n[2310 rows x 7 columns]
scenarios_p1     DataFrame           product units regi<...>[254000 rows x 9 columns]
scenarios_p2     DataFrame    Empty DataFrame\nColumns:<...>es, sim_sales]\nIndex: []
unc_pr           DataFrame         product region  year<...>n[1210 rows x 38 columns]
unc_pr_desc      DataFrame       uncertainties         <...>        US         0.15  
unc_pri          DataFrame         product region units<...>n[1170 rows x 24 columns]
unc_pri_desc     DataFrame      uncertainties          <...>aint.  \n2  HSCT-Maint.  


# Phase 3: Above Brand

## Prep for Phase 3 and Simulation

In [28]:
# Create output dfs
output_p3 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25','sales_RA_50','sales_RA_75', 'sales_RA_90', 
                                  'sales_P1_RA', 'sales_P2_RA', 'sales_P3_RA']) # Outputs of Phase 3

scenarios_p3 = pd.DataFrame(columns=['scenario', 'tag', 'product', 'region', 'units', 'indication', 'year', 'sales', 'sim_sales']) # Scenarios of Phase 3
output_unc_p3 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'variable', 'value']) # Uncertainties of Phase 3

# Inputs
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB']
products = ['evrenzo', 'fezo', 'mirabegron', 'padcev', 'tacrolimus', 'xospata', 'xtandi', 'zolbe']
input_p3a=output_p2[~(output_p2['region'].isin(regions))] # These do not go thru phase3
input_p3b=output_p2[(output_p2['region'].isin(regions))] # These go thru phase3

for product in products:
    print(product)

    for year in years:

        for region in regions:
            n=sims
            sales_ra = []
            unc_ab1_ra = []
            unc_ab2_ra = []
            unc_ab3_ra = []
            unc_ab4_ra = []
            unc_ab5_ra = []

            for i in range(n):
                # Get Above Brand Uncertainty Probability (value)
                unc_ab1_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab1')]['probability'])
                unc_ab2_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab2')]['probability'])
                unc_ab3_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab3')]['probability'])
                unc_ab4_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab4')]['probability'])
                unc_ab5_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab5')]['probability'])
                
                # Get sales from phase 2
                sales=input_p3b[(input_p3b['product'] ==product) & (input_p3b['region']==region) & (input_p3b['year']==year) ]

                # Get Uncertainty Quant by year (series)
                unc_ab1=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab1']*float(sales['sales_P2_RA'])*unc_ab1_prob
                unc_ab2=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab2']*float(sales['sales_P2_RA'])*unc_ab2_prob
                unc_ab3=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab3']*float(sales['sales_P2_RA'])*unc_ab3_prob
                unc_ab4=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab4']*float(sales['sales_P2_RA'])*unc_ab4_prob
                unc_ab5=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab5']*float(sales['sales_P2_RA'])*unc_ab5_prob
                
                # Generate RA sales
                unc_all=float(sales['sales_P2_RA']) + unc_ab1 + unc_ab2 + unc_ab3 + unc_ab4 + unc_ab5
                sales_ra.append(unc_all)
                
                # Generate uncertainty arrays
                unc_ab1_ra.append(float(unc_ab1))
                unc_ab2_ra.append(float(unc_ab2))
                unc_ab3_ra.append(float(unc_ab3))
                unc_ab4_ra.append(float(unc_ab4))
                unc_ab5_ra.append(float(unc_ab5))
            
            # Get Product Worldwide sales by year (series)
            prod_ww_sales=input_p3b[(input_p3b['product'] ==product) & (input_p3b['region']==region) & (input_p3b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P3_RA'] = np.mean(sales_ra)
            
            # Add uncertainty arrays to output_unc
            output_unc=input_p3b[(input_p3b['product'] ==product) & (input_p3b['region']==region) & (input_p3b['year']==year) ][['tag', 'product', 'region', 'units', 'indication', 'year', 'sales','sales_P1_RA', 'sales_P2_RA']]
            output_unc['unc_ab1_ra'] = np.mean(unc_ab1_ra)
            output_unc['unc_ab2_ra'] = np.mean(unc_ab2_ra)
            output_unc['unc_ab3_ra'] = np.mean(unc_ab3_ra)
            output_unc['unc_ab4_ra'] = np.mean(unc_ab4_ra)
            output_unc['unc_ab5_ra'] = np.mean(unc_ab5_ra)

            # Append to sales df
            output_p3 = output_p3.append(prod_ww_sales, ignore_index=True)
            output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)
            
output_p3=output_p3.fillna(0)
output_p3

evrenzo
fezo
mirabegron
padcev
tacrolimus
xospata
xtandi
zolbe


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_25,sales_RA_50,sales_RA_75,sales_RA_90,sales_P1_RA,sales_P2_RA,sales_P3_RA
0,0,evrenzo,US,¥,Total,2022,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0,evrenzo,JP,¥,Total,2022,55.767542,50.195508,50.195508,55.772787,55.772787,55.772787,55.767542,55.772787,53.012034
2,0,evrenzo,CN,¥,Total,2022,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0,evrenzo,DE,¥,Total,2022,16.820283,14.214146,14.214146,14.214146,16.722525,16.722525,16.820283,16.722525,15.154788
4,0,evrenzo,FR,¥,Total,2022,0.866087,0.721654,0.721654,0.721654,0.849005,0.849005,0.866087,0.849005,0.763680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,0,zolbe,DE,¥,Total,2031,36.749689,-3.776007,-3.776007,-3.209606,-3.209606,-3.209606,32.523475,-3.776007,-3.407846
636,0,zolbe,FR,¥,Total,2031,14.478137,-26.759977,-26.759977,-22.745981,-22.745981,-22.745981,12.161635,-26.759977,-24.170949
637,0,zolbe,IT,¥,Total,2031,19.395337,-18.406038,-18.406038,-15.645132,-15.645132,-15.645132,16.583013,-18.406038,-16.514818
638,0,zolbe,ES,¥,Total,2031,10.821708,-29.609575,-29.609575,-25.168138,-25.168138,-25.168138,9.739537,-29.609575,-26.966920


## Prep Results for Analysis

In [29]:
# Update output uncertainty df for PTRS
output_unc_p2=output_unc_p2.groupby(['product','units','indication','year', 'region']).max().reset_index()
output_unc_p2['unc_ptrs'] = output_unc_p2['sales_P1_RA'] - output_unc_p2['sales']

# Update output uncertainty df for PTRS
output_unc_p2=output_unc_p2.fillna(0)

# Select columns needed
output_unc_p3=output_unc_p2[['tag', 'product', 'region', 'units', 'indication', 'year', 'sales',
                             'unc_ptrs','unc1_ra', 'unc2_ra', 'unc3_ra', 'unc4_ra',
                             'unc5_ra', 'unc6_ra', 'unc7_ra', 'unc8_ra', 'unc9_ra', 'unc10_ra',
                             'unc11_ra', 'unc12_ra', 'unc13_ra', 'unc14_ra', 'unc15_ra', 'unc16_ra',
                             'unc17_ra', 'unc18_ra', 'unc26_ra', 'unc27_ra', 'unc28_ra', 'unc29_ra',
                             'unc_ab1_ra', 'unc_ab2_ra','unc_ab3_ra', 'unc_ab4_ra', 'unc_ab5_ra']]

# Pivot table
id_vars=['tag', 'product', 'region', 'units', 'indication', 'year', 'sales']
value_vars=['unc_ptrs','unc1_ra', 'unc2_ra', 'unc3_ra', 'unc4_ra',
            'unc5_ra', 'unc6_ra', 'unc7_ra', 'unc8_ra', 'unc9_ra', 'unc10_ra',
            'unc11_ra', 'unc12_ra', 'unc13_ra', 'unc14_ra', 'unc15_ra', 'unc16_ra',
            'unc17_ra', 'unc18_ra', 'unc26_ra', 'unc27_ra', 'unc28_ra', 'unc29_ra', 
            'unc_ab1_ra', 'unc_ab2_ra','unc_ab3_ra', 'unc_ab4_ra', 'unc_ab5_ra']

output_unc_p3=pd.melt(output_unc_p3, id_vars=id_vars, value_vars=value_vars)

# # Merge uncertainty name to output_unc_p22
# output_unc_p22=output_unc_p22.merge(unc_pr_desc, how='left', left_on='lkey', right_on='rkey')

# Add back tags that did not go through phase 3
input_p3a['sales_P3_RA']=input_p3a['sales_P2_RA']
output_p3=pd.concat([output_p3, input_p3a[input_p3a['region']!='ALL']], ignore_index=True, axis=0)

# Add WW sales to output
output_phase3_agg=output_p3
output_phase3_agg=output_phase3_agg.groupby(['product','units','indication','year']).sum().reset_index()
output_phase3_agg['region']='ALL'
output_p3 = output_p3.append(output_phase3_agg, ignore_index=True)

# Remove interim dfs from memory
del prod_ww_sales
del sales
del input_p3a 
del input_p3b 
del output_phase3_agg

output_file = 'output_p3.csv'
path = os.path.join(output_folder, output_file)
output_p3.to_csv(path)

output_file = 'output_unc_p3.csv'
path = os.path.join(output_folder, output_file)
output_unc_p3.to_csv(path)

output_file = 'scenarios_p3.csv'
path = os.path.join(output_folder, output_file)
scenarios_p3.to_csv(path)

In [30]:
%whos DataFrame

Variable         Type         Data/Info
---------------------------------------
output_p1        DataFrame                          pro<...>\n[4550 rows x 8 columns]
output_p2        DataFrame          tag product region <...>n[1620 rows x 14 columns]
output_p3        DataFrame          tag  product region<...>n[1620 rows x 15 columns]
output_unc       DataFrame         tag product region u<...>        0.0         0.0  
output_unc_p2    DataFrame         product units indica<...>\n[690 rows x 37 columns]
output_unc_p3    DataFrame           tag  product regio<...>n[19320 rows x 9 columns]
sales_ci_clean   DataFrame                          pro<...>\n[2310 rows x 7 columns]
scenarios_p1     DataFrame           product units regi<...>[254000 rows x 9 columns]
scenarios_p2     DataFrame    Empty DataFrame\nColumns:<...>es, sim_sales]\nIndex: []
scenarios_p3     DataFrame    Empty DataFrame\nColumns:<...>es, sim_sales]\nIndex: []
unc_pr           DataFrame         product region  year<...>

# Metadata

In [31]:
sim_end = datetime.now()
td =  sim_end - sim_start
td_mins = int(round(td.total_seconds() / 60))
metatdata = {'sim_start': sim_start, 'sim_end': sim_end, 'sims_run': sims, 'sim_time': td_mins}
metatdata_df=pd.DataFrame.from_dict(metatdata, orient='index')

output_file = 'metatdata.csv'
path = os.path.join(output_folder, output_file)
metatdata_df.to_csv(path)