In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from datetime import datetime
import time
import os
import warnings
warnings.filterwarnings("ignore")

######################
# METADATA
######################
sim_start = datetime.now()
sims=3
input_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\PS22\inputs'
output_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\PS22\outputs\dev'

######################
# READ IN DATA
######################
input_file = 'inputs_v2.xlsx'
path = os.path.join(input_folder, input_file)

sales_ci=pd.read_excel(path, sheet_name='sales', engine='openpyxl')
unc_pr=pd.read_excel(path, sheet_name='unc_pr', engine='openpyxl')
unc_pr_desc=pd.read_excel(path, sheet_name='unc_pr_desc', engine='openpyxl')
unc_pri=pd.read_excel(path, sheet_name='unc_pri', engine='openpyxl')
unc_pri_desc=pd.read_excel(path, sheet_name='unc_pri_desc', engine='openpyxl')

######################
# PREP BASELINE DATA
######################
sales_ci=sales_ci.drop_duplicates()
sales_ci=sales_ci.melt(id_vars=['product', 'region', 'units', 'indication'], 
        var_name="year", 
        value_name="sales")
sales_ci=sales_ci[(sales_ci['units'] =='¥')]

# Exclude indications that are not needed
exclude_ind = ['Adjustments', 'Central Adjustments', 'Total', '0', 'mPC 1L']
sales_ci=sales_ci[(~sales_ci['indication'].isin(exclude_ind))]

# Partition WW sales into 9 segments -> US, JP, CN, DE, FR, ES, IT, GB, WWex8
sales_ci_WW=sales_ci[(sales_ci['region']=='WW')]
ast8 = ['US','JP','CN', 'DE','FR','ES','IT','GB']
sales_ci_ast8=sales_ci[(sales_ci['region'].isin(ast8))]

sales_ci_WWex8=sales_ci_ast8.groupby(['product','units','indication','year']).sum().reset_index()
sales_ci_WWex8=sales_ci_WW.merge(sales_ci_WWex8, how='right', on=['product', 'units', 'indication', 'year'])
sales_ci_WWex8['sales']=sales_ci_WWex8['sales_x']-sales_ci_WWex8['sales_y']
sales_ci_WWex8['region']='WWex8'
sales_ci_WWex8 = sales_ci_WWex8[['product','region', 'units', 'indication', 'year', 'sales']]

# Generate new sales table
sales_ci_clean=pd.concat([sales_ci_ast8, sales_ci_WWex8])

# Add back non-strategic products
sales_ci_nsp=sales_ci[(sales_ci['indication']=='Non-Strategic')]
sales_ci_clean=pd.concat([sales_ci_clean, sales_ci_nsp])

# Create tag
sales_ci_clean['tag'] = sales_ci_clean['product'] + sales_ci_clean['region'] + sales_ci_clean['units'] + sales_ci_clean['indication'].astype(str)+ sales_ci_clean['year'].astype(str)

# Validation
print(sum(sales_ci_clean[(sales_ci_clean['year']==2025)]['sales']))  #18393
print(sum(sales_ci_clean[(sales_ci_clean['product']=='xtandi') & (sales_ci_clean['year']==2025)]['sales']))  #6963
print(sum(sales_ci_clean[(sales_ci_clean['product']=='mirabegron') & (sales_ci_clean['year']==2025)]['sales']))  #1223
print(sales_ci_clean['tag'].nunique()) # 2390

# Remove interim dfs from memory
del sales_ci_WW
del sales_ci_ast8
del sales_ci_WWex8
del sales_ci_nsp
del sales_ci

# Save new sales data
output_file = 'sales_clean.csv'
path = os.path.join(output_folder, output_file)
sales_ci_clean.to_csv(path)

18176.63205186673
6963.730575326161
1223.22
2300


In [34]:
%whos DataFrame

Variable         Type         Data/Info
---------------------------------------
output_p1        DataFrame                          pro<...>\n[4550 rows x 8 columns]
output_p2        DataFrame          tag product region <...>n[9580 rows x 15 columns]
output_unc_p2    DataFrame         product units indica<...>\n[360 rows x 28 columns]
sales_ci_clean   DataFrame                          pro<...>\n[2310 rows x 7 columns]
sales_exp1       DataFrame                             <...>\n[1130 rows x 7 columns]
scenarios_exp1   DataFrame          scenario           <...>\n[3390 rows x 9 columns]
scenarios_p1     DataFrame                          pro<...>n[10320 rows x 9 columns]
scenarios_p2     DataFrame         scenario_p1  scenari<...>n[8631 rows x 11 columns]
scenarios_p2a    DataFrame         product units region<...>\n[1130 rows x 9 columns]
scenarios_p2b    DataFrame       scenario_p1  scenario_<...>      0.0           0.0  
unc_pr           DataFrame         product region  year<...>

# Phase 1: PTRS

## Simulation

In [35]:
#############
# CREATE OUTPUT DFS FOR PHASE 1
#############
output_p1 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25', 'sales_RA_50', 'sales_RA_75', 'sales_RA_90']) # Output of Phase 1
scenarios_p1 = pd.DataFrame(columns=['scenario', 'tag', 'product', 'region', 'units', 'indication', 'year', 'sales', 'sim_sales']) # Scenarios of Phase 1

#############
# GENERATE SCENARIOS FOR TAGS IN P1
#############
# Add PTRS values to sales data
sales_ci_unc = sales_ci_clean.merge(unc_pri, how='right', on=['product','region', 'units', 'indication', 'year'])

scenario_sdf=[]
product_sdf=[]
region_sdf=[]
units_sdf=[]
ind_sdf=[]
year_sdf=[]
sales_sdf=[]
tag_sdf=[]
sim_sales_sdf=[]
    
# Loop through rows
for index, row in sales_ci_unc.iterrows():
# for index, row in sales_ci_unc.iloc[29:30].iterrows():
    n=sims
    sales_ra = []
    unc_ptrs_ra = []
    
    for i in range(n):
        scenario_sdf.append((i+1))
        product_sdf.append(row.values[0])
        region_sdf.append(row.values[1])
        units_sdf.append(row.values[2])
        ind_sdf.append(row.values[3])
        year_sdf.append(row.values[4])
        sales_sdf.append(row.values[5])
        tag_sdf.append(row.values[6])

        # Get sales (value)
        sales = row.values[5]

        # Get PTRS Uncertainty Probability (value)
        unc_ptrs_prob=np.random.binomial(size=1, n=1, p=row.values[7])
        
        # Get Uncertainty Value (value) for others
        unc1=row.values[8]
        unc2=row.values[9]
        
        if ((unc_ptrs_prob == 1) and ((~np.isnan(unc1)) or (~np.isnan(unc2)) )):
            m=sims
            for j in range(m):

                # Get Uncertainty Probability (value) for others
                unc1_prob=np.random.randint(1,4)
                
                if ((unc1_prob == 1)):
                    sales=unc1
                
                if ((unc1_prob == 2)):
                    sales=unc2
                    
        
        # Generate RA sales
        unc_all=float(sales)*float(unc_ptrs_prob)
        sales_ra.append(unc_all)
        sim_sales_sdf.append(unc_all)
        
    # Get Product Worldwide sales by year (series)
    prod_ww_sales = sales_ci_unc.iloc[index]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_P1_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_p1 = output_p1.append(prod_ww_sales, ignore_index=True)
    
    # Develop scenario_df
    scenarios_p1= pd.DataFrame({'scenario': scenario_sdf,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales': sim_sales_sdf})
    
    
#############
# GENERATE SCENARIOS FOR TAGS NOT IN P1
#############
# Create df of tags without ptrs uncertainty
sales_exp1=sales_ci_clean[~(sales_ci_clean['tag'].isin(sales_ci_unc['tag']))]
sales_exp1 = sales_exp1[['tag', 'product', 'region', 'units', 'indication', 'year', 'sales']]

scenario_sdf=[]
product_sdf=[]
region_sdf=[]
units_sdf=[]
ind_sdf=[]
year_sdf=[]
sales_sdf=[]
tag_sdf=[]
sim_sales_sdf=[]
    
# Loop through rows
for index, row in sales_exp1.iterrows():
    n=sims
    
    for i in range(n):
        scenario_sdf.append((i+1))
        tag_sdf.append(row.values[0])
        product_sdf.append(row.values[1])
        region_sdf.append(row.values[2])
        units_sdf.append(row.values[3])
        ind_sdf.append(row.values[4])
        year_sdf.append(row.values[5])
        sales_sdf.append(row.values[6])
        sim_sales_sdf.append(row.values[6])
        
            
    # Develop scenario_df
    scenarios_exp1= pd.DataFrame({'scenario': scenario_sdf,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales': sim_sales_sdf})

# Save outputs
output_p1 = output_p1[['tag','product','region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25', 'sales_RA_50','sales_RA_75', 'sales_RA_90', 'sales_P1_RA']]

# Add non-simulated tags to output df
sales_exP1=sales_ci_clean[~(sales_ci_clean['tag'].isin(output_p1['tag']))]
sales_exP1['sales_RA_10']=sales_exP1['sales']
sales_exP1['sales_RA_25']=sales_exP1['sales']
sales_exP1['sales_RA_50']=sales_exP1['sales']
sales_exP1['sales_RA_75']=sales_exP1['sales']
sales_exP1['sales_RA_90']=sales_exP1['sales']
sales_exP1['sales_P1_RA']=sales_exP1['sales']
output_p1=pd.concat([output_p1, sales_exP1])

# Add non-simulated tags to scenario df
scenarios_p1=pd.concat([scenarios_p1, scenarios_exp1])

# Validation
print(sum(output_p1[(output_p1['year']==2025)]['sales']))  #18393
print(sum(output_p1[(output_p1['product']=='xtandi') & (output_p1['year']==2025)]['sales'])) #6963
print(sum(scenarios_p1[(scenarios_p1['product']=='xtandi') & (scenarios_p1['year']==2025) & (scenarios_p1['indication']!='Total') & (scenarios_p1['scenario']==1)]['sales']))  #6963
print(sum(output_p1[(output_p1['product']=='mirabegron') & (output_p1['year']==2025)]['sales']))  #1223
print(output_p1['tag'].nunique()) # 2300

18176.63205186673
6963.730575326161
6963.730575326161
1223.22
2300


## Prep Results for Analysis

In [5]:
# Convert PTRS results to sales df for next phase (Commercial Uncertainty)
output_p1=output_p1[['tag','product','region', 'units', 'indication', 'year', 'sales', 'sales_P1_RA']]

# Sum sales at the WW level
output_p1_ww=output_p1.groupby(['product','units','indication','year']).sum().reset_index()
output_p1_ww['region']='Total'
output_p1=pd.concat([output_p1_ww, output_p1])

# Sum sales and scenarios at the Product level, as commercial uncertainty is applied at a higher granularity
output_p1_prod=output_p1.groupby(['product','units','region','year']).sum().reset_index()
output_p1_prod['indication']='Total'
output_p1=pd.concat([output_p1_prod, output_p1])

scenarios_p1_prod=scenarios_p1.groupby(['product','units','region','year', 'scenario']).sum().reset_index()
scenarios_p1_prod['indication']='Total'
scenarios_p1=pd.concat([scenarios_p1_prod, scenarios_p1])

# Validation
print(sum(output_p1[(output_p1['year']==2025)& (output_p1['region']!='Total') & (output_p1['indication']!='Total')]['sales']))  #18393
print(sum(output_p1[(output_p1['product']=='xtandi') & (output_p1['year']==2025) & (output_p1['region']!='Total') & (output_p1['indication']!='Total')]['sales']))  #6963
print(sum(output_p1[(output_p1['product']=='mirabegron') & (output_p1['year']==2025) & (output_p1['region']!='Total') & (output_p1['indication']!='Total')]['sales']))  #1223
print(output_p1[(output_p1['region']!='Total') & (output_p1['indication']!='Total')]['tag'].nunique()) # 2390

# Remove interim dfs from memory
del sales_exP1
del output_p1_ww
del output_p1_prod
del scenarios_p1_prod
del sales_ci_unc

output_file = 'output_p1.csv'
path = os.path.join(output_folder, output_file)
output_p1.to_csv(path)

output_file = 'scenarios_p1.csv'
path = os.path.join(output_folder, output_file)
scenarios_p1.to_csv(path)

18176.63205186673
6963.730575326161
1223.22
2300


# Phase 2: Market Events / Commercial Uncertainty

## Prep for Phase 2

In [6]:
#############
# CREATE OUTPUT DFS FOR PHASE 2
#############
output_p2 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25','sales_RA_50','sales_RA_75', 'sales_RA_90']) # Outputs of Phase 2
scenarios_p2 = pd.DataFrame(columns=['scenario_p1', 'scenario_p2', 'tag', 'product', 'region', 'units', 'indication', 'year', 'sales', 'sim_sales_p1', 'sim_sales_p2']) # Scenarios of Phase 2
output_unc_p2 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales']) # Uncertainties of Phase 2

#############
# PREP INPUTS DFS FOR PHASE 2
#############
# Create input df
input_p2a=output_p1[(output_p1['region']=='Total') & (output_p1['indication']=='Total')] # These do not go thru phase2
# input_p2b=output_p1[(output_p1['region']!='Total') & (output_p1['indication'].isin(['Total']))] # These go thru phase2

# input_p2a=scenarios_p1[(scenarios_p1['region']=='Total') & (scenarios_p1['indication']=='Total')] # These do not go thru phase2
input_p2b=scenarios_p1[(scenarios_p1['region']!='Total') & (scenarios_p1['indication'].isin(['Total']))] # These go thru phase2

# Validation
print(sum(input_p2a[(input_p2a['year']==2025)]['sales']))  #18393
print(sum(input_p2a[(input_p2a['product']=='xtandi') & (input_p2a['year']==2025)]['sales']))  #6963
print(sum(input_p2a[(input_p2a['product']=='mirabegron') & (input_p2a['year']==2025)]['sales']))  #1223

print(sum(input_p2b[(input_p2b['year']==2025) & (input_p2b['scenario']==1)]['sales']))  #18393
print(sum(input_p2b[(input_p2b['product']=='xtandi') & (input_p2b['year']==2025) & (input_p2b['scenario']==1)]['sales']))  #6963
print(sum(input_p2b[(input_p2b['product']=='mirabegron') & (input_p2b['year']==2025) & (input_p2b['scenario']==1)]['sales']))  #1223

18176.632051866727
6963.730575326162
1223.22
18176.632051866723
6963.730575326161
1223.2200000000003


## Simulation

In [9]:
#############
# XTANDI
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'xtandi'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)
    for year in years:

        for region in regions:
            n=sims
            scenario_sdf1=[]
            scenario_sdf2=[]
            product_sdf=[]
            region_sdf=[]
            units_sdf=[]
            ind_sdf=[]
            year_sdf=[]
            sales_sdf=[]
            sim_sales_sdf1=[]
            tag_sdf=[]

            sales_ra = []
            unc1_ra = []
            unc2_ra = []
            unc3_ra = []
            unc26_ra = []

            for i in range(n):
                # Get Uncertainty Probability (value)
                unc1_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc1')]['probability'], 1, 1)
                unc2_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc2')]['probability'], 1, 1)
                unc3_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc3')]['probability'], 1, 1)
                unc26_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc26')]['probability'])

                # Get Base Uncertainty
                unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
                unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)
                
                # Get relevant sales values for uncertainties
                # unc26_sales=input_p2b[(input_p2b['product']==product) & (input_p2b['year'] ==year) & (input_p2b['region']==region)]
                unc26_sales=scenarios_p2a[(scenarios_p2a['product']==product) & (scenarios_p2a['year'] ==year) & (scenarios_p2a['region']==region)]['sales']
                
                # Get Uncertainty Quant by year (series)
                unc1=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc1']
                unc2=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc2'])*float(unc26_sales)
                unc3=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc3'])*float(unc26_sales)
                unc26=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc26'])*float(unc26_sales)

                # Generate RA sales
                # sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
                sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year) ]
                
                unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob) + float(unc1*unc1_prob) + float(unc2*unc2_prob) + float(unc3*unc3_prob) + float(unc26*unc26_prob)
                sales_ra.append(unc_all)

                # Generate uncertainty arrays
                unc1_ra.append(float(unc1*unc1_prob))
                unc2_ra.append(float(unc2*unc2_prob))
                unc3_ra.append(float(unc3*unc3_prob))
                unc26_ra.append(float(unc26*unc26_prob))

                # Create scenario array
                if nsim_p1==1: 
                    nsim_p2=0
                else:
                    nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2
                    
                scenario_sdf1.append(nsim_p1)
                scenario_sdf2.append(nsim_p2+(i+1))
                product_sdf.append(product)
                region_sdf.append(region)
                units_sdf.append(sales['units'].values[0])
                ind_sdf.append(sales['indication'].values[0])
                year_sdf.append(year)
                sales_sdf.append(float(sales['sales']))
                sim_sales_sdf1.append(float(sales['sim_sales']))
                tag_sdf.append(sales['tag'].values[0])

            # Develop interim scenario_df
            scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                    'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

            # Append interim scenario_df to final scenario_df
            scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

            # Get Product Region sales by year (series)
            prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)

            # Add uncertainty arrays to output_unc
            output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            output_unc['unc1_ra'] = np.mean(unc1_ra)
            output_unc['unc2_ra'] = np.mean(unc2_ra)
            output_unc['unc3_ra'] = np.mean(unc3_ra)
            output_unc['unc26_ra'] = np.mean(unc26_ra)

            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
            output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


In [11]:
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales']))
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='xtandi')]['sales_RA_75']))

383903.60159499437
357177.0074864362
370791.4548815642
377202.9553070945


In [12]:
#############
# XOSPATA
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'xospata'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for year in years:
        print(year)

        for region in regions:
            n=sims
            scenario_sdf1=[]
            scenario_sdf2=[]
            product_sdf=[]
            region_sdf=[]
            units_sdf=[]
            ind_sdf=[]
            year_sdf=[]
            sales_sdf=[]
            sim_sales_sdf1=[]
            tag_sdf=[]
            
            sales_ra = []
            unc4_ra = []
            unc5_ra = []
            unc6_ra = []

            for i in range(n):
                # Get Uncertainty Probability (value)
                unc4_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc4')]['probability'], 1, 1)
                unc5_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc5')]['probability'], 1, 1)
                unc6_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc6')]['probability'], 1, 1)

                # Get Base Uncertainty
                unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
                unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

                # Get relevant sales values for uncertainties
                unc4_sales=scenarios_p2a[(scenarios_p2a['product']==product) & (scenarios_p2a['year'] ==year) & (scenarios_p2a['region']==region)]['sales']
                unc5_sales=scenarios_p2a[(scenarios_p2a['product']==product) & (scenarios_p2a['year'] ==year) & (scenarios_p2a['region']==region)]['sales']
                unc6_sales=scenarios_p2a[(scenarios_p2a['product']==product) & (scenarios_p2a['year'] ==year) & (scenarios_p2a['region']==region)]['sales']

                # Get Uncertainty Quant by year (series)
                unc4=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc4'])*float(unc4_sales)
                unc5=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc5'])*float(unc5_sales)
                unc6=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc6'])*float(unc6_sales)

                # Generate RA sales
                sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year) ]
                unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob) + float(unc4*unc4_prob) + float(unc5*unc5_prob) + float(unc6*unc6_prob)
                sales_ra.append(unc_all)

                # Generate uncertainty arrays
                unc4_ra.append(float(unc4*unc4_prob))
                unc5_ra.append(float(unc5*unc5_prob))
                unc6_ra.append(float(unc6*unc6_prob))
                
                # Create scenario array
                if nsim_p1==1: 
                    nsim_p2=0
                else:
                    nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2
                    
                scenario_sdf1.append(nsim_p1)
                scenario_sdf2.append(nsim_p2+(i+1))
                product_sdf.append(product)
                region_sdf.append(region)
                units_sdf.append(sales['units'].values[0])
                ind_sdf.append(sales['indication'].values[0])
                year_sdf.append(year)
                sales_sdf.append(float(sales['sales']))
                sim_sales_sdf1.append(float(sales['sim_sales']))
                tag_sdf.append(sales['tag'].values[0])
                
            # Develop interim scenario_df
            scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                    'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

            # Append interim scenario_df to final scenario_df
            scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

            # Get Product Region sales by year (series)
            prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)

            # Add uncertainty arrays to output_unc
            output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            output_unc['unc4_ra'] = np.mean(unc4_ra)
            output_unc['unc5_ra'] = np.mean(unc5_ra)
            output_unc['unc6_ra'] = np.mean(unc6_ra)

            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
            output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
3
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


In [14]:
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2025) & (output_p2['region']!='ALL')]['sales']))
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2025) & (output_p2['region']!='ALL')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2025) & (output_p2['region']!='ALL')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='xospata') & (output_p2['year'] ==2025) & (output_p2['region']!='ALL')]['sales_RA_75']))

6547.066058707576
6473.010083769217
6588.332482122582
6690.289813605658


In [15]:
#############
# PADCEV
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'padcev'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for year in years:
        print(year)

        for region in regions:
            n=sims
            scenario_sdf1=[]
            scenario_sdf2=[]
            product_sdf=[]
            region_sdf=[]
            units_sdf=[]
            ind_sdf=[]
            year_sdf=[]
            sales_sdf=[]
            sim_sales_sdf1=[]
            tag_sdf=[]
            
            sales_ra = []
            unc7_ra = []
            unc8_ra = []
            unc9_ra = []
            unc10_ra = []
            unc11_ra = []
            unc12_ra = []
            unc13_ra = []
            unc14_ra = []

            for i in range(n):
                # Get Uncertainty Probability (value)
                unc7_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc7')]['probability'], 1, 1)
                unc8_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc8')]['probability'], 1, 1)
                unc9_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc9')]['probability'], 1, 1)
                unc10_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc10')]['probability'], 1, 1)
                unc11_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc11')]['probability'], 1, 1)
                unc12_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc12')]['probability'], 1, 1)
                unc13_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc13')]['probability'], 1, 1)
                unc14_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc14')]['probability'], 1, 1)

                # Get Base Uncertainty
                unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
                unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

                # Get Uncertainty Quant by year (series)
                unc7=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc7']
                unc8=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc8']
                unc9=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc9']
                unc10=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc10']
                unc11=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc11']
                unc12=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc12']
                unc13=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc13']
                unc14=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc14']

                # Generate RA sales
                sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year) ]
                unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob) + float(unc7*unc7_prob) + float(unc8*unc8_prob) + float(unc9*unc9_prob) + float(unc10*unc10_prob) + float(unc11*unc11_prob) + float(unc12*unc12_prob) + float(unc13*unc13_prob) + float(unc14*unc14_prob)
                sales_ra.append(unc_all)

                # Generate uncertainty arrays
                unc7_ra.append(float(unc7*unc7_prob))
                unc8_ra.append(float(unc8*unc8_prob))
                unc9_ra.append(float(unc9*unc9_prob))
                unc10_ra.append(float(unc10*unc10_prob))
                unc11_ra.append(float(unc11*unc11_prob))
                unc12_ra.append(float(unc12*unc12_prob))
                unc13_ra.append(float(unc13*unc13_prob))
                unc14_ra.append(float(unc14*unc14_prob))
                
                # Create scenario array
                if nsim_p1==1: 
                    nsim_p2=0
                else:
                    nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2
                    
                scenario_sdf1.append(nsim_p1)
                scenario_sdf2.append(nsim_p2+(i+1))
                product_sdf.append(product)
                region_sdf.append(region)
                units_sdf.append(sales['units'].values[0])
                ind_sdf.append(sales['indication'].values[0])
                year_sdf.append(year)
                sales_sdf.append(float(sales['sales']))
                sim_sales_sdf1.append(float(sales['sim_sales']))
                tag_sdf.append(sales['tag'].values[0])

            # Develop interim scenario_df
            scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                    'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

            # Append interim scenario_df to final scenario_df
            scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

            # Get Product Worldwide sales by year (series)
            prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)

            # Add uncertainty arrays to output_unc
            output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            output_unc['unc7_ra'] = np.mean(unc7_ra)
            output_unc['unc8_ra'] = np.mean(unc8_ra)
            output_unc['unc9_ra'] = np.mean(unc9_ra)
            output_unc['unc10_ra'] = np.mean(unc10_ra)
            output_unc['unc11_ra'] = np.mean(unc11_ra)
            output_unc['unc12_ra'] = np.mean(unc12_ra)
            output_unc['unc13_ra'] = np.mean(unc13_ra)
            output_unc['unc14_ra'] = np.mean(unc14_ra)

            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
            output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
3
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


In [None]:
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales']))
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='padcev')]['sales_RA_75']))

In [17]:
#############
# EVRENZO
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'evrenzo'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for year in years:
        print(year)

        for region in regions:
            n=sims
            scenario_sdf1=[]
            scenario_sdf2=[]
            product_sdf=[]
            region_sdf=[]
            units_sdf=[]
            ind_sdf=[]
            year_sdf=[]
            sales_sdf=[]
            sim_sales_sdf1=[]
            tag_sdf=[]
            
            sales_ra = []
            unc15_ra = []
            unc16_ra = []
            unc17_ra = []
            unc18_ra = []

            for i in range(n):
                # Get Uncertainty Probability (value)
                unc15_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc15')]['probability'], 1, 1)
                unc16_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc16')]['probability'], 1, 1)
                unc17_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc17')]['probability'], 1, 1)
                unc18_prob=np.random.triangular(0, unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc18')]['probability'], 1, 1)

                # Get Base Uncertainty
                unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
                unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

                # Get Uncertainty Quant by year (series)
                unc15=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc15']
                unc16=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc16']
                unc17=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc17']
                unc18=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc18']

                # Generate RA sales
                sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year) ]
                unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob) + float(unc15*unc15_prob) + float(unc16*unc16_prob) + float(unc17*unc17_prob) + float(unc18*unc18_prob)
                sales_ra.append(unc_all)

                # Generate uncertainty arrays
                unc15_ra.append(float(unc15*unc15_prob))
                unc16_ra.append(float(unc16*unc16_prob))
                unc17_ra.append(float(unc17*unc17_prob))
                unc18_ra.append(float(unc18*unc18_prob))
                
                # Create scenario array
                if nsim_p1==1: 
                    nsim_p2=0
                else:
                    nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2
                    
                scenario_sdf1.append(nsim_p1)
                scenario_sdf2.append(nsim_p2+(i+1))
                product_sdf.append(product)
                region_sdf.append(region)
                units_sdf.append(sales['units'].values[0])
                ind_sdf.append(sales['indication'].values[0])
                year_sdf.append(year)
                sales_sdf.append(float(sales['sales']))
                sim_sales_sdf1.append(float(sales['sim_sales']))
                tag_sdf.append(sales['tag'].values[0])

            # Develop interim scenario_df
            scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                    'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

            # Append interim scenario_df to final scenario_df
            scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

            # Get Product Worldwide sales by year (series)
            prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)

            # Add uncertainty arrays to output_unc
            output_unc=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            output_unc['unc15_ra'] = np.mean(unc15_ra)
            output_unc['unc16_ra'] = np.mean(unc16_ra)
            output_unc['unc17_ra'] = np.mean(unc17_ra)
            output_unc['unc18_ra'] = np.mean(unc18_ra)

            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)
            output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
3
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


In [18]:
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales']))
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='evrenzo')]['sales_RA_75']))

40061.382759439744
38288.10901317918
39744.72898782903
41367.87381850048


In [19]:
#############
# ZOLBE
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'zolbe'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for year in years:
        print(year)

        for region in regions:
            n=sims
            scenario_sdf1=[]
            scenario_sdf2=[]
            product_sdf=[]
            region_sdf=[]
            units_sdf=[]
            ind_sdf=[]
            year_sdf=[]
            sales_sdf=[]
            sim_sales_sdf1=[]
            tag_sdf=[]
            
            sales_ra = []

            for i in range(n):
                # Get Base Uncertainty
                unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
                unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

                # Generate RA sales
                sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year) ]
                unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob)
                sales_ra.append(unc_all)
                
                # Create scenario array
                if nsim_p1==1: 
                    nsim_p2=0
                else:
                    nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2
                    
                scenario_sdf1.append(nsim_p1)
                scenario_sdf2.append(nsim_p2+(i+1))
                product_sdf.append(product)
                region_sdf.append(region)
                units_sdf.append(sales['units'].values[0])
                ind_sdf.append(sales['indication'].values[0])
                year_sdf.append(year)
                sales_sdf.append(float(sales['sales']))
                sim_sales_sdf1.append(float(sales['sim_sales']))
                tag_sdf.append(sales['tag'].values[0])

            # Develop interim scenario_df
            scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                    'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

            # Append interim scenario_df to final scenario_df
            scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

            # Get Product Worldwide sales by year (series)
            prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)

            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
3
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


In [20]:
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales']))
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='zolbe')]['sales_RA_75']))  

45416.447317750295
40023.23947535404
41674.14852216146
43182.705413401185


In [21]:
output_file = 'trash.csv'
path = os.path.join(output_folder, output_file)
scenarios_p2.to_csv(path)

In [23]:
#############
# FEZO
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'fezo'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
output_unc_p2=output_unc_p2[(output_unc_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for year in years:
        print(year)

        # Get Fezo US values, to create proportion
        fezo_base=scenarios_p2a[(scenarios_p2a['product']==product) & (scenarios_p2a['region']=='US') & (scenarios_p2a['year']==year)& (scenarios_p2a['indication']=='Total')]['sim_sales']
        fezo_10 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc19'])
        fezo_25 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc20'])
        fezo_50 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc21'])
        fezo_75 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc22'])
        fezo_90 = float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region']=='US')]['unc23'])

        for region in regions:
            # Get Product Worldwide sales by year (series) 
            prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year)]
            
            prod_ww_sales['sales_RA_10'] = float(fezo_10/fezo_base)*float(prod_ww_sales['sim_sales'])
            prod_ww_sales['sales_RA_25'] = float(fezo_25/fezo_base)*float(prod_ww_sales['sim_sales'])
            prod_ww_sales['sales_RA_50'] = float(fezo_50/fezo_base)*float(prod_ww_sales['sim_sales'])
            prod_ww_sales['sales_RA_75'] = float(fezo_75/fezo_base)*float(prod_ww_sales['sim_sales'])
            prod_ww_sales['sales_RA_90'] = float(fezo_90/fezo_base)*float(prod_ww_sales['sim_sales'])

            prod_ww_sales['sales_P2_RA'] = prod_ww_sales['sales_RA_50']

            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2022
870    0.0
Name: sim_sales, dtype: float64
2023
873    718.292302
Name: sim_sales, dtype: float64
2024
876    1941.483198
Name: sim_sales, dtype: float64
2025
879    2537.400264
Name: sim_sales, dtype: float64
2026
882    2597.745765
Name: sim_sales, dtype: float64
2027
885    2858.062566
Name: sim_sales, dtype: float64
2028
888    2949.399033
Name: sim_sales, dtype: float64
2029
891    2986.437575
Name: sim_sales, dtype: float64
2030
894    3031.188746
Name: sim_sales, dtype: float64
2031
897    3082.893351
Name: sim_sales, dtype: float64
2
2022
871    0.0
Name: sim_sales, dtype: float64
2023
874    718.292302
Name: sim_sales, dtype: float64
2024
877    1941.483198
Name: sim_sales, dtype: float64
2025
880    2537.400264
Name: sim_sales, dtype: float64
2026
883    2597.745765
Name: sim_sales, dtype: float64
2027
886    2858.062566
Name: sim_sales, dtype: float64
2028
889    2949.399033
Name: sim_sales, dtype: float64
2029
892    2986.437575
Name: sim_sales, dtype: float64
2030
8

In [None]:
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales']))
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='fezo')]['sales_RA_75']))   

In [24]:
#############
# MIRABEGRON
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'mirabegron'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for region in regions:
        n=sims
        scenario_sdf1=[]
        scenario_sdf2=[]
        product_sdf=[]
        region_sdf=[]
        units_sdf=[]
        ind_sdf=[]
        year_sdf=[]
        sales_sdf=[]
        sim_sales_sdf1=[]
        tag_sdf=[]
            
        sales_ra22 = []
        sales_ra23 = []
        sales_ra24 = []
        sales_ra25 = []

        for i in range(n):
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

            # Get High,Base,Low Uncertainty
            unc_hbl_prob=np.random.randint(1,4)
            # unc_hbl_prob=1

            # Generate RA sales - High
            if (unc_hbl_prob==1):
                sales22=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2022) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)
                sales23=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2023) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)
                sales24=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2024) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)
                sales25=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2025) & (unc_pr['region'] ==region)]['unc24'])*float(1+unc_base_prob)

            # Generate RA sales - Med
            if (unc_hbl_prob==2):
                sales22=float(scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2022) ]['sales'])*float(1+unc_base_prob)
                sales23=float(scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2023) ]['sales'])*float(1+unc_base_prob)
                sales24=float(scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2024) ]['sales'])*float(1+unc_base_prob)
                sales25=float(scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2025) ]['sales'])*float(1+unc_base_prob)

            # Generate RA sales - Low
            if (unc_hbl_prob==3):
                sales22=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2022) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)
                sales23=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2023) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)
                sales24=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2024) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)
                sales25=float(unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==2025) & (unc_pr['region'] ==region)]['unc25'])*float(1+unc_base_prob)

            sales_ra22.append(sales22)
            sales_ra23.append(sales23)
            sales_ra24.append(sales24)
            sales_ra25.append(sales25)
            
            # Create scenario array
            if nsim_p1==1: 
                nsim_p2=0
            else:
                nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2

            scenario_sdf1.append(nsim_p1)
            scenario_sdf2.append(nsim_p2+(i+1))
            product_sdf.append(product)
            region_sdf.append(region)
            units_sdf.append(sales['units'].values[0])
            ind_sdf.append(sales['indication'].values[0])
            year_sdf.append(year)
            sales_sdf.append(float(sales['sales']))
            sim_sales_sdf1.append(float(sales['sim_sales']))
            tag_sdf.append(sales['tag'].values[0])

        # Develop interim scenario_df
        scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

        # Append interim scenario_df to final scenario_df
        scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)


         # Get Product, Region sales by year (series)
        prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2022) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra22, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra22, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra22, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra22, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra22, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra22)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2023) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra23, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra23, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra23, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra23, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra23, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra23)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2024) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra24, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra24, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra24, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra24, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra24, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra24)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(input_p2b['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2025) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

            # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2026) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

            # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2027) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

            # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(input_p2b['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2028) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

            # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2029) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

            # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(input_p2b['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2030) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

            # Get Product Worldwide sales by year (series)
        prod_ww_sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==2031) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra25, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra25, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra25)

        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2
3


In [25]:
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales']))
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='mirabegron')]['sales_RA_75']))  

26414.071553102636
32978.164965658405
37241.47405023195
44764.3823539889


In [26]:
#############
# TACROLIMUS
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB', 'WWex8']
product = 'tacrolimus'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for year in years:
        print(year)

        for region in regions:
            n=sims
            scenario_sdf1=[]
            scenario_sdf2=[]
            product_sdf=[]
            region_sdf=[]
            units_sdf=[]
            ind_sdf=[]
            year_sdf=[]
            sales_sdf=[]
            sim_sales_sdf1=[]
            tag_sdf=[]
            
            sales_ra = []

            for i in range(n):
                # Get Base Uncertainty
                unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_base']
                unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

                # Generate RA sales
                sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year) ]
                unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob)
                sales_ra.append(unc_all)
                
                # Create scenario array
                if nsim_p1==1: 
                    nsim_p2=0
                else:
                    nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2
                    
                scenario_sdf1.append(nsim_p1)
                scenario_sdf2.append(nsim_p2+(i+1))
                product_sdf.append(product)
                region_sdf.append(region)
                units_sdf.append(sales['units'].values[0])
                ind_sdf.append(sales['indication'].values[0])
                year_sdf.append(year)
                sales_sdf.append(float(sales['sales']))
                sim_sales_sdf1.append(float(sales['sim_sales']))
                tag_sdf.append(sales['tag'].values[0])

            # Develop interim scenario_df
            scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                    'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

            # Append interim scenario_df to final scenario_df
            scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

            # Get Product Worldwide sales by year (series)
            prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)

            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
3
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


In [27]:
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales']))
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='tacrolimus')]['sales_RA_75']))  

126916.01209541476
125430.20137286757
127023.1009150868
128672.5087068835


In [28]:
#############
# LEXISCAN
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
product = 'lexiscan'

# Clear output for rerun
output_p2=output_p2[(output_p2['product']!=product)]
scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]

# Filter scenarios df for scenario
for sim in range(sims):
    nsim_p1=(sim+1)
    scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
    print(nsim_p1)

    for year in years:
        print(year)
        n=sims
        scenario_sdf1=[]
        scenario_sdf2=[]
        product_sdf=[]
        region_sdf=[]
        units_sdf=[]
        ind_sdf=[]
        year_sdf=[]
        sales_sdf=[]
        sim_sales_sdf1=[]
        tag_sdf=[]
            
        sales_ra = []

        for i in range(n):
            # Get Base Uncertainty
            unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year)& (unc_pr['region'] =='US')]['unc_base']
            unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

            # Generate RA sales
            sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['year']==year) ]
            unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob)
            sales_ra.append(unc_all)
            
            # Create scenario array
            if nsim_p1==1: 
                nsim_p2=0
            else:
                nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2

            scenario_sdf1.append(nsim_p1)
            scenario_sdf2.append(nsim_p2+(i+1))
            product_sdf.append(product)
            region_sdf.append(region)
            units_sdf.append(sales['units'].values[0])
            ind_sdf.append(sales['indication'].values[0])
            year_sdf.append(year)
            sales_sdf.append(float(sales['sales']))
            sim_sales_sdf1.append(float(sales['sim_sales']))
            tag_sdf.append(sales['tag'].values[0])

        # Develop interim scenario_df
        scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

        # Append interim scenario_df to final scenario_df
        scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

        # Get Product Worldwide sales by year (series)
        prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['year']==year) ]
        prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
        prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
        prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
        prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
        prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
        prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)


        # Append to sales df
        output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

1
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
3
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031


In [29]:
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales']))
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'] =='lexiscan')]['sales_RA_75']))

10572.647387139801
10354.238268239284
10477.577013703425
10634.978132859344


In [30]:
#############
# OTHERS
#############
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
region = 'WW'
other_products = ['evenity','cresemba','tamsulosin','suglat','ambisome','vesicare','mycamine','sujanu','symraf','cimzia','blincyto','repatha','linzess',
'vesomni','myslee/stilnox','irribow','gonax','feburic','asamax','josamycin','non prod','istodax','other merchandise products','allelock','tr','acr','blz','bonoteo','seroquel','geninax','col','inf-v','kiklin',
'ofa','regnite','srs','st','dificlir','p_aco - acofide','other astellas products']

for product in other_products:
    # Clear output for rerun
    output_p2=output_p2[(output_p2['product']!=product)]
    scenarios_p2=scenarios_p2[(scenarios_p2['product']!=product)]
    print(product)
    
    # Filter scenarios df for scenario
    for sim in range(sims):
        nsim_p1=(sim+1)
        scenarios_p2a=input_p2b[(input_p2b['scenario']==nsim_p1)]
        print(nsim_p1)

        for year in years:
            n=sims
            scenario_sdf1=[]
            scenario_sdf2=[]
            product_sdf=[]
            region_sdf=[]
            units_sdf=[]
            ind_sdf=[]
            year_sdf=[]
            sales_sdf=[]
            sim_sales_sdf1=[]
            tag_sdf=[]
            
            sales_ra = []

            for i in range(n):
                # Get Base Uncertainty
                unc_base=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year)]['unc_base']
                unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

                # Generate RA sales
                sales=scenarios_p2a[(scenarios_p2a['product'] ==product) & (scenarios_p2a['region']==region) & (scenarios_p2a['year']==year) ]
                unc_all=float(sales['sim_sales']) + float(sales['sim_sales']*unc_base_prob)
                sales_ra.append(unc_all)
                
                # Create scenario array
                if nsim_p1==1: 
                    nsim_p2=0
                else:
                    nsim_p2=((nsim_p1*nsim_p1)+nsim_p1)/2
                    
                scenario_sdf1.append(nsim_p1)
                scenario_sdf2.append(nsim_p2+(i+1))
                product_sdf.append(product)
                region_sdf.append(region)
                units_sdf.append(sales['units'].values[0])
                ind_sdf.append(sales['indication'].values[0])
                year_sdf.append(year)
                sales_sdf.append(float(sales['sales']))
                sim_sales_sdf1.append(float(sales['sim_sales']))
                tag_sdf.append(sales['tag'].values[0])

            # Develop interim scenario_df
            scenarios_p2b= pd.DataFrame({'scenario_p1': scenario_sdf1,'scenario_p2': scenario_sdf2,'tag': tag_sdf,'product': product_sdf,'region': region_sdf, 'units': units_sdf, 
                                    'indication': ind_sdf, 'year': year_sdf, 'sales': sales_sdf, 'sim_sales_p1': sim_sales_sdf1, 'sim_sales_p2': sales_ra})

            # Append interim scenario_df to final scenario_df
            scenarios_p2=scenarios_p2.append(scenarios_p2b, ignore_index=True)

            # Get Product Worldwide sales by year (series)
            prod_ww_sales=input_p2b[(input_p2b['product'] ==product) & (input_p2b['region']==region) & (input_p2b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P2_RA'] = np.mean(sales_ra)


            # Append to sales df
            output_p2 = output_p2.append(prod_ww_sales, ignore_index=True)

    output_p2=output_p2.fillna(0)
    output_p2

evenity
1
2
3
cresemba
1
2
3
tamsulosin
1
2
3
suglat
1
2
3
ambisome
1
2
3
vesicare
1
2
3
mycamine
1
2
3
sujanu
1
2
3
symraf
1
2
3
cimzia
1
2
3
blincyto
1
2
3
repatha
1
2
3
linzess
1
2
3
vesomni
1
2
3
myslee/stilnox
1
2
3
irribow
1
2
3
gonax
1
2
3
feburic
1
2
3
asamax
1
2
3
josamycin
1
2
3
non prod
1
2
3
istodax
1
2
3
other merchandise products
1
2
3
allelock
1
2
3
tr
1
2
3
acr
1
2
3
blz
1
2
3
bonoteo
1
2
3
seroquel
1
2
3
geninax
1
2
3
col
1
2
3
inf-v
1
2
3
kiklin
1
2
3
ofa
1
2
3
regnite
1
2
3
srs
1
2
3
st
1
2
3
dificlir
1
2
3
p_aco - acofide
1
2
3
other astellas products
1
2
3


In [31]:
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales']))
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales_RA_25']))
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales_RA_50']))
print(sum(output_p2[(output_p2['product'].isin(other_products))]['sales_RA_75']))

88176.5033101042
86025.5114753956
88320.49609295983
90752.38229806589


## Prep Results for Analysis

In [32]:
# Add WW sales to output
output_p2a=output_p2
output_p2a=output_p2a.groupby(['product','units','indication','year']).sum().reset_index()
output_p2a['region']='ALL'
output_p2 = output_p2.append(output_p2a, ignore_index=True)

# Consolidate uncertainty output
output_unc_p2=output_unc_p2.groupby(['product','units','indication','year', 'region']).max().reset_index()

# Remove interim dfs from memory
del input_p2a 
del input_p2b 
del sales
del prod_ww_sales
del output_unc
del output_p2a

output_file = 'output_p2.csv'
path = os.path.join(output_folder, output_file)
output_p2.to_csv(path)

output_file = 'output_unc_p2.csv'
path = os.path.join(output_folder, output_file)
output_unc_p2.to_csv(path)

output_file = 'scenarios_p2.csv'
path = os.path.join(output_folder, output_file)
scenarios_p2.to_csv(path)

In [None]:
%whos DataFrame

# Phase 3: Above Brand

## Prep for Phase 3 and Simulation

In [None]:
# Create output dfs
output_p3 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_25','sales_RA_50','sales_RA_75', 'sales_RA_90', 
                                  'sales_P1_RA', 'sales_P2_RA', 'sales_P3_RA']) # Outputs of Phase 3

scenarios_p3 = pd.DataFrame(columns=['scenario', 'tag', 'product', 'region', 'units', 'indication', 'year', 'sales', 'sim_sales']) # Scenarios of Phase 3
output_unc_p3 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'variable', 'value']) # Uncertainties of Phase 3

# Inputs
years = [2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031]
regions = ['US', 'JP', 'CN', 'DE', 'FR', 'IT', 'ES', 'GB']
products = ['evrenzo', 'fezo', 'mirabegron', 'padcev', 'tacrolimus', 'xospata', 'xtandi', 'zolbe']
input_p3a=output_p2[~(output_p2['region'].isin(regions))] # These do not go thru phase3
input_p3b=output_p2[(output_p2['region'].isin(regions))] # These go thru phase3

for product in products:
    print(product)

    for year in years:

        for region in regions:
            n=sims
            sales_ra = []
            unc_ab1_ra = []
            unc_ab2_ra = []
            unc_ab3_ra = []
            unc_ab4_ra = []

            for i in range(n):
                # Get Above Brand Uncertainty Probability (value)
                unc_ab1_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab1')]['probability'])
                unc_ab2_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab2')]['probability'])
                unc_ab3_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab3')]['probability'])
                unc_ab4_prob=np.random.binomial(size=1, n=1, p=unc_pr_desc[(unc_pr_desc['uncertainties'] == 'unc_ab4')]['probability'])
                
                # Get sales from phase 2
                sales=input_p3b[(input_p3b['product'] ==product) & (input_p3b['region']==region) & (input_p3b['year']==year) ]

                # Get Uncertainty Quant by year (series)
                unc_ab1=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab1']*float(sales['sales_P2_RA'])*unc_ab1_prob
                unc_ab2=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab2']*float(sales['sales_P2_RA'])*unc_ab2_prob
                unc_ab3=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab3']*float(sales['sales_P2_RA'])*unc_ab3_prob
                unc_ab4=unc_pr[(unc_pr['product'] ==product) & (unc_pr['year'] ==year) & (unc_pr['region'] ==region)]['unc_ab4']*float(sales['sales_P2_RA'])*unc_ab4_prob
                
                # Generate RA sales
                unc_all=float(sales['sales_P2_RA']) + unc_ab1 + unc_ab2 + unc_ab3 + unc_ab4
                sales_ra.append(unc_all)
                
                # Generate uncertainty arrays
                unc_ab1_ra.append(float(unc_ab1))
                unc_ab2_ra.append(float(unc_ab2))
                unc_ab3_ra.append(float(unc_ab3))
                unc_ab4_ra.append(float(unc_ab4))
            
            # Get Product Worldwide sales by year (series)
            prod_ww_sales=input_p3b[(input_p3b['product'] ==product) & (input_p3b['region']==region) & (input_p3b['year']==year) ]
            prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
            prod_ww_sales['sales_RA_25'] = np.percentile(sales_ra, 25)
            prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
            prod_ww_sales['sales_RA_75'] = np.percentile(sales_ra, 75)
            prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
            prod_ww_sales['sales_P3_RA'] = np.mean(sales_ra)
            
            # Add uncertainty arrays to output_unc
            output_unc=input_p3b[(input_p3b['product'] ==product) & (input_p3b['region']==region) & (input_p3b['year']==year) ][['tag', 'product', 'region', 'units', 'indication', 'year', 'sales','sales_P1_RA', 'sales_P2_RA']]
            output_unc['unc_ab1_ra'] = np.mean(unc_ab1_ra)
            output_unc['unc_ab2_ra'] = np.mean(unc_ab2_ra)
            output_unc['unc_ab3_ra'] = np.mean(unc_ab3_ra)
            output_unc['unc_ab4_ra'] = np.mean(unc_ab4_ra)

            # Append to sales df
            output_p3 = output_p3.append(prod_ww_sales, ignore_index=True)
            output_unc_p2 = output_unc_p2.append(output_unc, ignore_index=True)
            
output_p3=output_p3.fillna(0)
output_p3

## Prep Results for Analysis

In [None]:
# Update output uncertainty df for PTRS
output_unc_p2=output_unc_p2.groupby(['product','units','indication','year', 'region']).max().reset_index()
output_unc_p2['unc_ptrs'] = output_unc_p2['sales_P1_RA'] - output_unc_p2['sales']

# Update output uncertainty df for PTRS
output_unc_p2=output_unc_p2.fillna(0)

# Select columns needed
output_unc_p3=output_unc_p2[['tag', 'product', 'region', 'units', 'indication', 'year', 'sales',
                             'unc_ptrs','unc1_ra', 'unc2_ra', 'unc3_ra', 'unc4_ra',
                             'unc5_ra', 'unc6_ra', 'unc7_ra', 'unc8_ra', 'unc9_ra', 'unc10_ra',
                             'unc11_ra', 'unc12_ra', 'unc13_ra', 'unc14_ra', 'unc15_ra', 'unc16_ra',
                             'unc17_ra', 'unc18_ra', 'unc26_ra', 
                             'unc_ab1_ra', 'unc_ab2_ra','unc_ab3_ra', 'unc_ab4_ra']]

# Pivot table
id_vars=['tag', 'product', 'region', 'units', 'indication', 'year', 'sales']
value_vars=['unc_ptrs','unc1_ra', 'unc2_ra', 'unc3_ra', 'unc4_ra',
            'unc5_ra', 'unc6_ra', 'unc7_ra', 'unc8_ra', 'unc9_ra', 'unc10_ra',
            'unc11_ra', 'unc12_ra', 'unc13_ra', 'unc14_ra', 'unc15_ra', 'unc16_ra',
            'unc17_ra', 'unc18_ra', 'unc26_ra', 
            'unc_ab1_ra', 'unc_ab2_ra','unc_ab3_ra', 'unc_ab4_ra']

output_unc_p3=pd.melt(output_unc_p3, id_vars=id_vars, value_vars=value_vars)

# # Merge uncertainty name to output_unc_p22
# output_unc_p22=output_unc_p22.merge(unc_pr_desc, how='left', left_on='lkey', right_on='rkey')

# Add back tags that did not go through phase 3
input_p3a['sales_P3_RA']=input_p3a['sales_P2_RA']
output_p3=pd.concat([output_p3, input_p3a[input_p3a['region']!='ALL']], ignore_index=True, axis=0)

# Add WW sales to output
output_phase3_agg=output_p3
output_phase3_agg=output_phase3_agg.groupby(['product','units','indication','year']).sum().reset_index()
output_phase3_agg['region']='ALL'
output_p3 = output_p3.append(output_phase3_agg, ignore_index=True)

# Remove interim dfs from memory
del prod_ww_sales
del sales
del input_p3a 
del input_p3b 
del output_phase3_agg

output_file = 'output_p3.csv'
path = os.path.join(output_folder, output_file)
output_p3.to_csv(path)

output_file = 'output_unc_p3.csv'
path = os.path.join(output_folder, output_file)
output_unc_p3.to_csv(path)

output_file = 'scenarios_p3.csv'
path = os.path.join(output_folder, output_file)
scenarios_p3.to_csv(path)

In [None]:
%whos DataFrame

# Metadata

In [None]:
sim_end = datetime.now()
td =  sim_end - sim_start
td_mins = int(round(td.total_seconds() / 60))
metatdata = {'sim_start': sim_start, 'sim_end': sim_end, 'sims_run': sims, 'sim_time': td_mins}
metatdata_df=pd.DataFrame.from_dict(metatdata, orient='index')

output_file = 'metatdata.csv'
path = os.path.join(output_folder, output_file)
metatdata_df.to_csv(path)