**Importing Libraries**

In [None]:
import pandas as pd
import numpy as np
import dtale
import statsmodels.graphics.tsaplots as tsa_plots
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

**Reading Data**

In [None]:
data = pd.read_csv("Pharma_Bounce_Rate.csv")

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data['DrugName'].nunique()

**Data Exploration : Checking & dropping duplicates**

In [None]:
duplicate = data.duplicated()
duplicate

In [None]:
sum(duplicate)

In [None]:
unique = data["DrugName"].value_counts()
data1 = data.drop_duplicates()

In [None]:
duplicate1 = data1.duplicated()
sum(duplicate1)

**Data Exploration : Checking for missing values**

In [None]:
data1.isna().sum()

In [None]:
data1.dropna(inplace=True)

In [None]:
data1.isna().sum()

**Data Preprocessing : Converting the datatype of Date column from Object to Datetime format**

In [None]:
data1['Dateofbill'] = pd.to_datetime(data1['Dateofbill'])

**Data Preprocessing : Sorting the data column**

In [None]:
data1 = data1.sort_values(by='Dateofbill')
data1.head()

**Auto EDA using dtale**

In [None]:
d = dtale.show(data1)
d.open_browser()

**Time Series Analysis**

In [None]:
data1['DrugName'].value_counts().head(10).plot(kind = 'bar')
data1 = data1[["Dateofbill","Quantity", "DrugName"]]
data1 = data1.set_index('Dateofbill')
data1.index = pd.to_datetime(data1.index)
grouped_data = data1.groupby('DrugName')

**Forecasting For Top 5 drugs**

In [None]:
A = grouped_data.get_group('SODIUM CHLORIDE IVF 100ML')
A1 = grouped_data.get_group('MULTIPLE ELECTROLYTES 500ML IVF')
A2 = grouped_data.get_group('PARACETAMOL 1GM IV INJ')
A3 = grouped_data.get_group('SODIUM CHLORIDE 0.9%')
A4 = grouped_data.get_group('PARACETAMOL 150MG')

In [None]:
A = A[["Quantity"]]
A.rename(columns = {"Quantity" : "SODIUM CHLORIDE IVF 100ML"}, inplace=True)
A = A.resample('M').sum()
A.plot()

In [None]:
A1 = A1[["Quantity"]]
A1.rename(columns = {"Quantity" : "MULTIPLE ELECTROLYTES 500ML IVF"}, inplace=True)
A1 = A1.resample('M').sum()
A1.plot()

In [None]:
A2 = A2[["Quantity"]]
A2.rename(columns = {"Quantity" : "PARACETAMOL 1GM IV INJ"}, inplace=True)
A2 = A2.resample('M').sum()
A2.plot()

In [None]:
A3 = A3[["Quantity"]]
A3.rename(columns = {"Quantity" : "SODIUM CHLORIDE 0.9%"}, inplace=True)
A3 = A3.resample('M').sum()
A3.plot()

In [None]:
A4 = A4[["Quantity"]]
A4.rename(columns = {"Quantity" : "PARACETAMOL 150MG"}, inplace=True)
A4 = A4.resample('M').sum()
A4.plot()

In [None]:
from statsmodels.tsa.stattools import adfuller

In [None]:
def adf_test(series):
    result=adfuller(series)
    print('ADF Statistics: {}'.format(result[0]))
    print('p- value: {}'.format(result[1]))
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
    else:
        print("Weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")

In [None]:
adf_test(A)
adf_test(A1)
adf_test(A2)
adf_test(A3)
adf_test(A4)

In [None]:
def decompose(df):
    result_add = seasonal_decompose(df, model = 'additive', extrapolate_trend='freq',  period=1)

    plt.rcParams.update({'figure.figsize': (20, 10)})
    result_add.plot().suptitle('Additive Decompose', fontsize=30)
    plt.show()
    
    return  result_add

In [None]:
decompose(A)
decompose(A1)
decompose(A2)
decompose(A3)
decompose(A4)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

plot_acf(A)  
plot_acf(A1) 
plot_acf(A2) 
plot_acf(A3) 
plot_acf(A4)

In [None]:
plot_pacf(A , lags = 5)  
plot_pacf(A1 , lags = 5) 
plot_pacf(A2 , lags = 5) 
plot_pacf(A3 , lags = 5) 
plot_pacf(A4 , lags = 5) 

In [None]:
def MAPE(pred, org):
    temp = np.abs((pred-org)/org)*100
    return np.mean(temp)

In [None]:
from pmdarima import auto_arima

import warnings
warnings.filterwarnings("ignore")

Am = auto_arima(A, suppress_warnings=True)
Am.summary()

Am1 = auto_arima(A1, suppress_warnings=True)
Am1.summary()

Am2 = auto_arima(A2, suppress_warnings=True)
Am2.summary()

Am3 = auto_arima(A3, suppress_warnings=True)
Am3.summary()

Am4 = auto_arima(A4, suppress_warnings=True)
Am4.summary()

In [None]:
Drug1=ARIMA(A,order=(1, 1, 12))
Drug1=Drug1.fit()
Drug1.summary()

pred=Drug1.predict(start=1,end=12, typ='levels').rename('ARIMA predictions')
pred.plot()

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
mean_absolute_percentage_error(A['SODIUM CHLORIDE IVF 100ML'], pred)

predf=Drug1.predict(start=12,end=24,typ='levels').rename('ARIMA predictions')
predf
Drug1.save('drug_1.pkl')

In [None]:
Drug2 = ARIMA(A1,order=(4, 1, 12))
Drug2 = Drug2.fit()
Drug2.summary()
A1.plot()
pred=Drug2.predict(start=1,end=12, typ='levels').rename('ARIMA predictions')
pred.plot()

In [None]:
mean_absolute_percentage_error(A1['SEVOFLURANE 99.97%'], pred)
predf=Drug2.predict(start=12,end=24,typ='levels').rename('ARIMA predictions')
predf
Drug2.save('drug_2.pkl')

In [None]:
Drug3 = ARIMA(A2,order=(3, 1, 12))
Drug3 = Drug3.fit()
Drug3.summary()
A2.plot()
pred=Drug3.predict(start=1,end=12, typ='levels').rename('ARIMA predictions')
pred.plot()

In [None]:
mean_absolute_percentage_error(A2['SODIUM CHLORIDE 0.9%'], pred)
predf=Drug3.predict(start=12,end=24,typ='levels').rename('ARIMA predictions')
predf
Drug3.save('drug_3.pkl')

In [None]:
Drug4 = ARIMA(A3,order=(2, 1, 12))
Drug4 = Drug4.fit()
Drug4.summary()
A3.plot()
pred=Drug4.predict(start=1,end=12, typ='levels').rename('ARIMA predictions')
pred.plot()

In [None]:
mean_absolute_percentage_error(A3['ONDANSETRON 2MG/ML'], pred)
predf=Drug4.predict(start=12,end=24,typ='levels').rename('ARIMA predictions')
predf
Drug4.save('drug_4.pkl')

In [None]:
Drug5 = ARIMA(A4,order=(1, 1, 12))
Drug5 = Drug5.fit()
Drug5.summary()
A4.plot()
pred=Drug5.predict(start=1,end=12, typ='levels').rename('ARIMA predictions')
pred.plot()

In [None]:
mean_absolute_percentage_error(A4['MULTIPLE ELECTROLYTES 500ML IVF'], pred)
predf=Drug5.predict(start=12,end=24,typ='levels').rename('ARIMA predictions')
predf
Drug5.save('drug_5.pkl')

In [None]:
import statsmodels.api as sm
model1 = sm.load('drug_1.pkl')
model2 = sm.load('drug_2.pkl')
model3 = sm.load('drug_3.pkl')
model4 = sm.load('drug_4.pkl')
model5 = sm.load('drug_5.pkl')

predf1=model1.predict(start=12,end=24).rename('ARIMA predictions')
predf1

predf2=model2.predict(start=12,end=24).rename('ARIMA predictions')
predf2

predf3=model3.predict(start=12,end=24).rename('ARIMA predictions')
predf3

predf4=model4.predict(start=12,end=24).rename('ARIMA predictions')
predf4

predf5=model5.predict(start=12,end=24).rename('ARIMA predictions')
predf5