In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from datetime import datetime
import time
import os
import warnings
warnings.filterwarnings("ignore")

######################
# DATA
######################
input_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\ForecastAI\xTrend\inputs'
output_folder = r'C:\Users\A4023862\OneDrive - Astellas Pharma Inc\ForecastAI\xTrend\outputs'

######################
# READ IN DATA
######################
input_file = 'inputs_v1.xlsx'
path = os.path.join(input_folder, input_file)

data=pd.read_excel(path, sheet_name='rev_fct', engine='openpyxl')
unc_pr=pd.read_excel(path, sheet_name='unc_pr', engine='openpyxl')
unc_base_df=pd.read_excel(path, sheet_name='unc_base', engine='openpyxl')

data.head()

  from pandas.core.computation.check import NUMEXPR_INSTALLED


Unnamed: 0,ACCOUNT,COST_OBJECTS,PRODUCT_PC,MONTH,AMOUNT,AMOUNT_2,CURRENCY
0,REVENUE,D_ACN - ACN (China),P_ADG_TOT - Advagraf/GRA Total,FY2021.APR,250886900.0,250886900.0,JPY
1,REVENUE,D_ACN - ACN (China),P_ADG_TOT - Advagraf/GRA Total,FY2021.MAY,248038800.0,498925700.0,JPY
2,REVENUE,D_ACN - ACN (China),P_ADG_TOT - Advagraf/GRA Total,FY2021.JUN,280602400.0,779528100.0,JPY
3,REVENUE,D_ACN - ACN (China),P_ADG_TOT - Advagraf/GRA Total,FY2021.JUL,317798200.0,1097326000.0,JPY
4,REVENUE,D_ACN - ACN (China),P_ADG_TOT - Advagraf/GRA Total,FY2021.AUG,281006400.0,1378333000.0,JPY


In [2]:
####################
# DATA PREP
####################
# Fix dates
data['ds'] = ""

# Extract substring of ds to parse into date
for i in range(0,len(data)):
    data['ds'][i]=data['MONTH'][i].split('FY',1)[1]
    
# Convert to date
data['ds'] =  pd.to_datetime(data['ds'], format='%Y.%b')
data['year'], data['month'], data['day'] = data['ds'].dt.year, data['ds'].dt.month, data['ds'].dt.day

# Address fiscal year dates
for i in range(0,len(data['ds'])):
    if data['month'][i] <4:
        data['year'][i]=data['year'][i]+1
        
# Create new date
data['ds']=pd.to_datetime(data[['year', 'month', 'day']])

# Subset Columns
data=data[['CURRENCY', 'COST_OBJECTS', 'PRODUCT_PC', 'year', 'ds', 'AMOUNT']]
data.columns = ['currency', 'region', 'product', 'fiscal_year', 'ds', 'value']
data.shape

# Save new sales data
output_file = 'output1.csv'
path = os.path.join(output_folder, output_file)
data.to_csv(path)

In [3]:
####################
# xTREND
####################
# Retain original data
data1=data
print(data.shape)
print(data1.shape)

# Create column for xTrend vector
data1['imp_xt']=1

for i in range(0, len(unc_pr)):
    print(i)

    # Params
    product = unc_pr['product'][i]
    region = unc_pr['region'][i]
    sdate = unc_pr['sdate'][i]
    edate = unc_pr['edate'][i]
    simp = unc_pr['simp'][i]
    eimp = unc_pr['eimp'][i]
    base_sales = unc_pr['base_sales'][i]

    # Locate impacted values
    data2=data1[(data1['region']==region) & (data1['product']==product)].reset_index(drop=True)

    # Update orig_df
    data3=data1[(data1['region']!=region) | (data1['product']!=product)].reset_index(drop=True)

    # Update base sales if needed
    if (base_sales == 'last'):
        data2.loc[((data2['ds']>=sdate) & (data2['ds']<=edate)), 'value'] = data2.loc[data2['ds']==sdate, 'value'].values[0]

    # Set start and end values with log transformation
    data2.loc[data2['ds']==sdate, 'imp_xt'] = np.log(simp*100)
    data2.loc[data2['ds']==edate, 'imp_xt'] = np.log(eimp*100)

    # Replace values in-between with NA
    data2.loc[((data2['ds']>sdate) & (data2['ds']<edate)), 'imp_xt'] = np.nan

    # Linear interpolate values between sdate and edate
    data2.loc[((data2['ds']>sdate) & (data2['ds']<edate)), 'imp_xt']=data2['imp_xt'].interpolate(method ='linear', limit_direction ='forward')
    
    # Invert previous log transformation including sdate and edate
    data2.loc[((data2['ds']>=sdate) & (data2['ds']<=edate)), 'imp_xt'] = np.exp(data2['imp_xt'])/100
    
    # Update sales values
    data2['value']=data2['value']*data2['imp_xt']
    
    # Add back to orig df
    data1=pd.concat([data2, data3], ignore_index=True, axis=0)

print(data1.shape)


# Save new sales data
output_file = 'output2.csv'
path = os.path.join(output_folder, output_file)
data2.to_csv(path)

(125136, 6)
(125136, 6)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
(125136, 7)


In [4]:
old=sum(data1[(data1['product']==unc_pr['product'][0]) & (data1['region']==unc_pr['region'][0]) & (data1['fiscal_year']==2031)]['value'])
new=sum(data[(data['product']==unc_pr['product'][0]) & (data['region']==unc_pr['region'][0]) & (data['fiscal_year']==2031)]['value'])

print(old)
print(new)

1609002572.8635244
10296068323.042477


In [5]:
####################
# BASELINE UNCERTAINTY
####################
data1['value_xt_ra_10']=0
data1['value_xt_ra_25']=0
data1['value_xt_ra_50']=0
data1['value_xt_ra_75']=0
data1['value_xt_ra_90']=0
data1['value_xt_ra_mu']=0
data1['value_xt_ra_sd']=0

# Separate forcasted and actuals
data_act = data1[data1['ds']<'2023-03-01'].reset_index(drop=True)
data_fct = data1[data1['ds']>='2023-03-01'].reset_index(drop=True)

for i in range(0, len(data_fct)):
# for i in range(0, 2):

    product = data_fct['product'][i]
    region = data_fct['region'][i]
    
    n=100
    sales_ra = []
        
    for j in range(n):

        # Get Base Uncertainty
        unc_base=unc_base_df[(unc_base_df['product'] ==product) & (unc_base_df['region'] ==region)]['unc_base']
        # unc_base=0.03
        unc_base_prob=np.random.uniform(low=-unc_base, high=unc_base)

        # Generate RA sales
        sales=data_fct['value'][i]
        unc_all=float(sales) + float(sales*unc_base_prob)
        # print(unc_all)
        sales_ra.append(unc_all)
        
    data_fct['value_xt_ra_10'][i] = np.percentile(sales_ra, 10)
    data_fct['value_xt_ra_25'][i] = np.percentile(sales_ra, 25)
    data_fct['value_xt_ra_50'][i] = np.percentile(sales_ra, 50)
    data_fct['value_xt_ra_75'][i] = np.percentile(sales_ra, 75)
    data_fct['value_xt_ra_90'][i] = np.percentile(sales_ra, 90)
    data_fct['value_xt_ra_mu'][i] = np.mean(sales_ra)
    data_fct['value_xt_ra_sd'][i] = np.std(sales_ra)

# Update actuals
data_act['value_xt_ra_mu'] = data_act['value']
data_act['value_xt_ra_10'] = data_act['value']
data_act['value_xt_ra_25'] = data_act['value']
data_act['value_xt_ra_50'] = data_act['value']
data_act['value_xt_ra_75'] = data_act['value']
data_act['value_xt_ra_90'] = data_act['value']

# Stack
output3=pd.concat([data_act, data_fct], ignore_index=True, axis=0)
    
# Save new sales data
output_file = 'output3.csv'
path = os.path.join(output_folder, output_file)
output3.to_csv(path)

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\A4023862\\OneDrive - Astellas Pharma Inc\\ForecastAI\\xTrend\\outputs\\output3.csv'

In [7]:
####################
# UPDATE DELIVERABLE
####################
output4=output3[['currency','region','product','fiscal_year','ds','value','value_xt_ra_10','value_xt_ra_25','value_xt_ra_50','value_xt_ra_75','value_xt_ra_90','value_xt_ra_mu','value_xt_ra_sd']]

# Save new sales data
output_file = 'output4.csv'
path = os.path.join(output_folder, output_file)
output4.to_csv(path)