In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import warnings
warnings.filterwarnings("ignore")

######################
# READ IN DATA
######################
sales_ci=pd.read_excel(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\inputs.xlsx", sheet_name='sales_ci')
unc_p=pd.read_excel(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\inputs.xlsx", sheet_name='unc_p')
unc_p_desc=pd.read_excel(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\inputs.xlsx", sheet_name='unc_p_desc')
unc_pri=pd.read_excel(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\inputs.xlsx", sheet_name='unc_pri')
unc_pri_desc=pd.read_excel(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\inputs.xlsx", sheet_name='unc_pri_desc')

######################
# PREP DATA
######################
sales_ci=sales_ci.drop_duplicates()
sales_ci=sales_ci.melt(id_vars=['product', 'region', 'units', 'indication'], 
        var_name="year", 
        value_name="sales")
sales_ci=sales_ci[(sales_ci['units'] =='¥')]

# Exclude indications that are not needed
exclude_ind = ['Adjustments', 'Central Adjustments', 'Total']
sales_ci=sales_ci[(~sales_ci['indication'].isin(exclude_ind))]

# Partition WW sales into 9 segments -> US, JP, CN, DE, FR, ES, IT, GB, WWex8
sales_ci_WW=sales_ci[(sales_ci['region']=='WW')]
ast8 = ['US','JP','CN', 'DE','FR','ES','IT','GB']
sales_ci_ast8=sales_ci[(sales_ci['region'].isin(ast8))]

sales_ci_WWex8=sales_ci_ast8.groupby(['product','units','indication','year']).sum().reset_index()
sales_ci_WWex8=sales_ci_WW.merge(sales_ci_WWex8, how='right', on=['product', 'units', 'indication', 'year'])
sales_ci_WWex8['sales']=sales_ci_WWex8['sales_x']-sales_ci_WWex8['sales_y']
sales_ci_WWex8['region']='WWex8'
sales_ci_WWex8 = sales_ci_WWex8[['product','region', 'units', 'indication', 'year', 'sales']]

# Generate new sales table
sales_ci_clean=pd.concat([sales_ci_ast8, sales_ci_WWex8])

# Add back non-strategic products
sales_ci_nsp=sales_ci[(sales_ci['indication']=='Non-Strategic')]
sales_ci_clean=pd.concat([sales_ci_clean, sales_ci_nsp])

# Create tag
sales_ci_clean['tag'] = sales_ci_clean['product'] + sales_ci_clean['region'] + sales_ci_clean['units'] + sales_ci_clean['indication'].astype(str)+ sales_ci_clean['year'].astype(str)

# Validation
print(sum(sales_ci_clean[(sales_ci_clean['year']==2025)]['sales']))  #18426
print(sum(sales_ci_clean[(sales_ci_clean['product']=='xtandi') & (sales_ci_clean['year']==2025)]['sales']))  #7309
print(sales_ci_clean['tag'].nunique()) # 700

# Save new sales data
sales_ci_clean.to_csv(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\output\sales_ci_clean.csv")

18491.32047982937
7309.662391952652
700


In [2]:
#############
# PHASE 1: PTRS
#############
# Create output df
output_df = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_50', 'sales_RA_90'])

# Add PTRS values to sales data
sales_ci_unc = sales_ci_clean.merge(unc_pri, how='right', on=['product','region', 'units', 'indication', 'year'])

# Loop through rows
for index, row in sales_ci_unc.iterrows():
# for index, row in sales_ci_unc.iloc[29:30].iterrows():
    n=500
    sales_ra = []
    for i in range(n):
        
        # Get sales (value)
        sales = row.values[5]
        
        # Get PTRS Uncertainty Probability (value)
        unc_ptrs_prob=np.random.binomial(size=1, n=1, p=row.values[7])
        
        # Get Uncertainty Value (value) for others
        unc1=row.values[8]
        unc2=row.values[9]
        
        if ((unc_ptrs_prob == 1) and ((~np.isnan(unc1)) or (~np.isnan(unc2)) )):
            m=500
            for j in range(m):

                # Get Uncertainty Probability (value) for others
                unc1_prob=np.random.randint(1,4)
                
                if ((unc1_prob == 1)):
                    sales=unc1
                
                if ((unc1_prob == 2)):
                    sales=unc2
        
        # Generate RA sales
        unc_all=float(sales)*float(unc_ptrs_prob)
        sales_ra.append(unc_all)
        
    # Get Product Worldwide sales by year (series)
    prod_ww_sales = sales_ci_unc.iloc[index]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_RA_mean'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df = output_df.append(prod_ww_sales, ignore_index=True)

# Save outputs
output_df = output_df[['tag','product','region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_50', 'sales_RA_90', 'sales_RA_mean']]

sales_exPTRS=sales_ci_clean[~(sales_ci_clean['tag'].isin(output_df['tag']))]
sales_exPTRS['sales_RA_10']=sales_exPTRS['sales']
sales_exPTRS['sales_RA_50']=sales_exPTRS['sales']
sales_exPTRS['sales_RA_90']=sales_exPTRS['sales']
sales_exPTRS['sales_RA_mean']=sales_exPTRS['sales']
output_df=pd.concat([output_df, sales_exPTRS])

# Validation
print(sum(output_df[(output_df['year']==2025)]['sales']))  #18426
print(sum(output_df[(output_df['product']=='xtandi') & (output_df['year']==2025)]['sales'])) #7309
print(output_df['tag'].nunique()) # 700

output_df.to_csv(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\output\output_phase1.csv")


18491.320479829377
7309.662391952652
700


In [3]:
#############
# PHASE 2: COMMERCIAL UNCERTAINTY
#############
# Convert PTRS results to sales df for next phase (Commercial Uncertainty)
sales_ptrs=output_df[['tag','product','region', 'units', 'indication', 'year', 'sales', 'sales_RA_mean']]
sales_ptrs.columns = ['tag','product','region', 'units', 'indication', 'year', 'sales', 'sales_p1_RA']

# Sum sales at the WW level
sales_ptrs_ww=sales_ptrs.groupby(['product','units','indication','year']).sum().reset_index()
sales_ptrs_ww['region']='Total'
sales_ptrs=pd.concat([sales_ptrs_ww, sales_ptrs])

# Sum sales at the Product level, as commercial uncertainty is applied at a higher granularity
sales_ptrs_prod=sales_ptrs.groupby(['product','units','region','year']).sum().reset_index()
sales_ptrs_prod['indication']='Total'
sales_ptrs=pd.concat([sales_ptrs_prod, sales_ptrs])

# Validation
print(sum(sales_ptrs[(sales_ptrs['year']==2025)& (sales_ptrs['region']!='Total') & (sales_ptrs['indication']!='Total')]['sales']))  #18426
print(sum(sales_ptrs[(sales_ptrs['product']=='xtandi') & (sales_ptrs['year']==2025) & (sales_ptrs['region']!='Total') & (sales_ptrs['indication']!='Total')]['sales']))  #7309
print(sales_ptrs[(sales_ptrs['region']!='Total') & (sales_ptrs['indication']!='Total')]['tag'].nunique()) # 700
    

18491.320479829377
7309.662391952652
700


In [5]:
######################
# CREATE INPUT & OUTPUT DF
######################
output_df2 = pd.DataFrame(columns=['tag','product', 'region', 'units', 'indication', 'year', 'sales', 'sales_RA_10', 'sales_RA_50', 'sales_RA_90'])

# Create input df
sales_p2=sales_ptrs[(sales_ptrs['region']=='Total') & (sales_ptrs['indication']=='Total')]

# Validation
print(sum(sales_p2[(sales_p2['year']==2025)]['sales']))  #18426
print(sum(sales_p2[(sales_p2['product']=='xtandi') & (sales_p2['year']==2025)]['sales']))  #7309

18491.320479829374
7309.662391952652


In [6]:
#############
# XTANDI
#############
years = [2022, 2023, 2024, 2025]
product = 'xtandi'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    n=500
    sales_ra = []
    
    for i in range(n):
        # Get Uncertainty Probability (value)
        unc1_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc1')]['probability'], 1, 1)
        unc2_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc2')]['probability'], 1, 1)
        unc3_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc3')]['probability'], 1, 1)
        
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Get Uncertainty Quant by year (series)
        unc1=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc1']
        unc2=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc2']
        unc3=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc3']

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob) + float(unc1*unc1_prob) + float(unc2*unc2_prob) + float(unc3*unc3_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

2022
2023
2024
2025


Unnamed: 0,tag,product,region,units,indication,year,sales,sales_RA_10,sales_RA_50,sales_RA_90,sales_p1_RA,sales_p2_RA
0,0,xtandi,Total,¥,Total,2022,6205.689687,5971.577664,6121.992532,6258.990215,6122.596922,6118.755404
1,0,xtandi,Total,¥,Total,2023,6573.134521,6289.597046,6479.094374,6620.729822,6440.834647,6467.527601
2,0,xtandi,Total,¥,Total,2024,6901.520169,6596.739487,6749.200812,6934.860753,6721.991031,6759.488861
3,0,xtandi,Total,¥,Total,2025,7309.662392,6976.458287,7161.761481,7345.427952,7067.75735,7163.654841


In [None]:
print(sum(output_df2[(output_df2['product'] =='xtandi')]['sales']))
print(sum(output_df2[(output_df2['product'] =='xtandi')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='xtandi')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='xtandi')]['sales_RA_90']))

In [None]:
#############
# XOSPATA
#############
years = [2022, 2023, 2024, 2025]
product = 'xospata'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    n=500
    sales_ra = []
    
    for i in range(n):
        # Get Uncertainty Probability (value)
        unc4_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc4')]['probability'], 1, 1)
        unc5_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc5')]['probability'], 1, 1)
        unc6_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc6')]['probability'], 1, 1)
        
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)
        
        # Get relevant sales values for uncertainties
        unc4_sales=sum(sales_ptrs[(sales_ptrs['product']==product) & (sales_ptrs['year'] ==year) & sales_ptrs['region'].isin(['US', 'JP', 'DE', 'FR', 'IT', 'ES', 'GB'])]['sales'])
        unc5_sales=sum(sales_ptrs[(sales_ptrs['product']==product) & (sales_ptrs['year'] ==year) & sales_ptrs['region'].isin(['US', 'DE', 'FR', 'IT', 'ES', 'GB'])]['sales'])
        unc6_sales=sum(sales_ptrs[(sales_ptrs['product']==product) & (sales_ptrs['year'] ==year) & sales_ptrs['region'].isin(['US', 'DE', 'FR', 'IT', 'ES', 'GB'])]['sales'])

        # Get Uncertainty Quant by year (series)
        unc4=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc4']*unc4_sales
        unc5=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc5']*unc5_sales
        unc6=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc6']*unc6_sales

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob) + float(unc4*unc4_prob) + float(unc5*unc5_prob) + float(unc6*unc6_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='xospata')]['sales']))
print(sum(output_df2[(output_df2['product'] =='xospata')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='xospata')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='xospata')]['sales_RA_90']))   

In [None]:
#############
# PADCEV
#############
years = [2022, 2023, 2024, 2025]
product = 'padcev'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    n=500
    sales_ra = []
    
    for i in range(n):
        # Get Uncertainty Probability (value)
        unc7_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc7')]['probability'], 1, 1)
        unc8_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc8')]['probability'], 1, 1)
        unc9_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc9')]['probability'], 1, 1)
        unc10_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc10')]['probability'], 1, 1)
        unc11_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc11')]['probability'], 1, 1)
        unc12_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc12')]['probability'], 1, 1)
        unc13_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc13')]['probability'], 1, 1)
        unc14_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc14')]['probability'], 1, 1)
        
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Get Uncertainty Quant by year (series)
        unc7=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc7']
        unc8=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc8']
        unc9=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc9']
        unc10=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc10']
        unc11=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc11']
        unc12=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc12']
        unc13=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc13']
        unc14=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc14']

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob) + float(unc7*unc7_prob) + float(unc8*unc8_prob) + float(unc9*unc9_prob) + float(unc10*unc10_prob) + float(unc11*unc11_prob) + float(unc12*unc12_prob) + float(unc13*unc13_prob) + float(unc14*unc14_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='padcev')]['sales']))
print(sum(output_df2[(output_df2['product'] =='padcev')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='padcev')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='padcev')]['sales_RA_90']))

In [None]:
#############
# EVRENZO
#############
years = [2022, 2023, 2024, 2025]
product = 'evrenzo'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    sales_ra = []
    n=500
    
    for i in range(n):
        # Get Uncertainty Probability (value)
        unc15_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc15')]['probability'], 1, 1)
        unc16_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc16')]['probability'], 1, 1)
        unc17_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc17')]['probability'], 1, 1)
        unc18_prob=np.random.triangular(0, unc_p_desc[(unc_p_desc['uncertainties'] == 'unc18')]['probability'], 1, 1)
        
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Get Uncertainty Quant by year (series)
        unc15=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc15']
        unc16=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc16']
        unc17=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc17']
        unc18=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc18']

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob) + float(unc15*unc15_prob) + float(unc16*unc16_prob) + float(unc17*unc17_prob) + float(unc18*unc18_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='evrenzo')]['sales']))
print(sum(output_df2[(output_df2['product'] =='evrenzo')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='evrenzo')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='evrenzo')]['sales_RA_90']))

In [None]:
#############
# ZOLBE
#############
years = [2022, 2023, 2024, 2025]
product = 'zolbe'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    n=500
    sales_ra = []
    
    for i in range(n):
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='zolbe')]['sales']))
print(sum(output_df2[(output_df2['product'] =='zolbe')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='zolbe')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='zolbe')]['sales_RA_90']))  

In [None]:
#############
# FEZO
#############
years = [2022, 2023, 2024, 2025]
product = 'fezo'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)

    fezo_base=sales_ptrs[(sales_ptrs['product']==product) & (sales_ptrs['region']=='US') & (sales_ptrs['year']==year)& (sales_ptrs['indication']=='Total')]['sales_p1_RA']
    fezo_10 = float(unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc19'])
    fezo_50 = float(unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc20'])
    fezo_90 = float(unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc21'])

    # Get Product Worldwide sales by year (series) 
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = float(fezo_10/fezo_base)*float(prod_ww_sales['sales_p1_RA'])
    prod_ww_sales['sales_RA_50'] = float(fezo_50/fezo_base)*float(prod_ww_sales['sales_p1_RA'])
    prod_ww_sales['sales_RA_90'] = float(fezo_90/fezo_base)*float(prod_ww_sales['sales_p1_RA'])
    
    prod_ww_sales['sales_p2_RA'] = prod_ww_sales['sales_RA_50']

    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)

output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='fezo')]['sales']))
print(sum(output_df2[(output_df2['product'] =='fezo')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='fezo')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='fezo')]['sales_RA_90']))   

In [None]:
#############
# MIRABEGRON
#############
years = [2022, 2023, 2024, 2025]
product = 'mirabegron'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

n=500
sales_ra22 = []
sales_ra23 = []
sales_ra24 = []
sales_ra25 = []
    
for i in range(n):
    # Get Base Uncertainty
    unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
    unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

    # Get High,Base,Low Uncertainty
    unc_hbl_prob=np.random.randint(1,4)
    # unc_hbl_prob=1

    # Generate RA sales - High
    if (unc_hbl_prob==1):
        sales22=float(1778.7)*float(1+unc_base_prob)
        sales23=float(1769.2)*float(1+unc_base_prob)
        sales24=float(1741.45)*float(1+unc_base_prob)
        sales25=float(1726.98)*float(1+unc_base_prob)

    # Generate RA sales - Med
    if (unc_hbl_prob==2):
        sales22=float(1778.7)*float(1+unc_base_prob)
        sales23=float(1768.97)*float(1+unc_base_prob)
        sales24=float(1741.21)*float(1+unc_base_prob)
        sales25=float(1223.22)*float(1+unc_base_prob)
        
    # Generate RA sales - Low
    if (unc_hbl_prob==3):
        sales22=float(1778.7)*float(1+unc_base_prob)
        sales23=float(1763.69)*float(1+unc_base_prob)
        sales24=float(965.55)*float(1+unc_base_prob)
        sales25=float(742.85)*float(1+unc_base_prob)
        
    sales_ra22.append(sales22)
    sales_ra23.append(sales23)
    sales_ra24.append(sales24)
    sales_ra25.append(sales25)
 
 # Get Product Worldwide sales by year (series)
prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==2022) ]
prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra22, 10)
prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra22, 50)
prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra22, 90)
prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra22)

# Append to sales df
output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)

# Get Product Worldwide sales by year (series)
prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==2023) ]
prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra23, 10)
prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra23, 50)
prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra23, 90)
prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra23)

# Append to sales df
output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)

# Get Product Worldwide sales by year (series)
prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==2024) ]
prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra24, 10)
prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra24, 50)
prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra24, 90)
prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra24)

# Append to sales df
output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)

# Get Product Worldwide sales by year (series)
prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==2025) ]
prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra25, 10)
prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra25, 50)
prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra25, 90)
prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra25)

# Append to sales df
output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='mirabegron')]['sales']))
print(sum(output_df2[(output_df2['product'] =='mirabegron')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='mirabegron')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='mirabegron')]['sales_RA_90']))  

In [None]:
#############
# TACROLIMUS
#############
years = [2022, 2023, 2024, 2025]
product = 'tacrolimus'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    n=500
    sales_ra = []
    
    for i in range(n):
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='tacrolimus')]['sales']))
print(sum(output_df2[(output_df2['product'] =='tacrolimus')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='tacrolimus')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='tacrolimus')]['sales_RA_90']))  

In [None]:
#############
# LEXISCAN
#############
years = [2022, 2023, 2024, 2025]
product = 'lexiscan'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    n=500
    sales_ra = []
    
    for i in range(n):
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='lexiscan')]['sales']))
print(sum(output_df2[(output_df2['product'] =='lexiscan')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='lexiscan')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='lexiscan')]['sales_RA_90']))

In [None]:
#############
# OTHERS
#############
years = [2022, 2023, 2024, 2025]
product = 'others'

# Clear output for rerun
output_df2=output_df2[(output_df2['product']!=product)]

for year in years:
    print(year)
    n=500
    sales_ra = []
    
    for i in range(n):
        # Get Base Uncertainty
        unc_base=unc_p[(unc_p['product'] ==product) & (unc_p['year'] ==year)]['unc_base']
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Generate RA sales
        sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
        unc_all=float(sales['sales_p1_RA']) + float(sales['sales_p1_RA']*unc_base_prob)
        sales_ra.append(unc_all)

    # Get Product Worldwide sales by year (series)
    prod_ww_sales=sales_p2[(sales_p2['product'] ==product) & (sales_p2['region']=='Total') & (sales_p2['year']==year) ]
    prod_ww_sales['sales_RA_10'] = np.percentile(sales_ra, 10)
    prod_ww_sales['sales_RA_50'] = np.percentile(sales_ra, 50)
    prod_ww_sales['sales_RA_90'] = np.percentile(sales_ra, 90)
    prod_ww_sales['sales_p2_RA'] = np.mean(sales_ra)
    
    # Append to sales df
    output_df2 = output_df2.append(prod_ww_sales, ignore_index=True)
    
output_df2=output_df2.fillna(0)
output_df2

In [None]:
print(sum(output_df2[(output_df2['product'] =='others')]['sales']))
print(sum(output_df2[(output_df2['product'] =='others')]['sales_RA_10']))
print(sum(output_df2[(output_df2['product'] =='others')]['sales_RA_50']))
print(sum(output_df2[(output_df2['product'] =='others')]['sales_RA_90']))

In [None]:
# Save output
output_df2.to_csv(r"C:\Users\A4023862\OneDrive - Astellas Pharma Inc\LRF\Synchronization CSP & AP\output\output_phase2.csv")

In [None]:
# Analysis
strategic_products = ['xtandi', 'xospata', 'padcev', 'evrenzo', 'fezo', 'zolbe']

print('\n')
print('Sales 10th')
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xtandi')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xospata')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='padcev')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='evrenzo')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='fezo')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='zolbe')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product'].isin(strategic_products))]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='mirabegron')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='tacrolimus')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='lexiscan')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='others')]['sales_RA_10'])))
print(round(sum(output_df2[(output_df2['year'] ==2025)]['sales_RA_10'])))

print('\n')
print('Sales 50th')
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xtandi')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xospata')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='padcev')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='evrenzo')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='fezo')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='zolbe')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product'].isin(strategic_products))]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='mirabegron')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='tacrolimus')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='lexiscan')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='others')]['sales_RA_50'])))
print(round(sum(output_df2[(output_df2['year'] ==2025)]['sales_RA_50'])))

print('\n')
print('Sales 90th')
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xtandi')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xospata')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='padcev')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='evrenzo')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='fezo')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='zolbe')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product'].isin(strategic_products))]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='mirabegron')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='tacrolimus')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='lexiscan')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='others')]['sales_RA_90'])))
print(round(sum(output_df2[(output_df2['year'] ==2025)]['sales_RA_90'])))

print('\n')
print('Sales')
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xtandi')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='xospata')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='padcev')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='evrenzo')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='fezo')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='zolbe')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product'].isin(strategic_products))]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='mirabegron')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='tacrolimus')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='lexiscan')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025) & (output_df2['product']=='others')]['sales'])))
print(round(sum(output_df2[(output_df2['year'] ==2025)]['sales'])))