<a href="https://colab.research.google.com/github/VejAlDatt/Prediction-System-for-Emergency-Operators/blob/main/ForcastBaselineModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Baseline Model**

This baseline model is purposeful to find the number and percentage of shortages and surpluses of emergency standby drivers, and the average activation rate.

In [None]:
#Importation of libraries
import numpy, pandas
import sklearn 
from sklearn import *

In [None]:
#Reading the dataset to a new dataframe
sicknessDataframe = pandas.read_csv('/content/sickness_table.csv', index_col=0)

In [None]:
#Creates a new dataframe
testdataframe = sicknessDataframe.iloc[922:].copy()

#Calculates the root mean squared error (RMSE) between the values in the 'n_sby' column and the 'sby_need' column of the testdataframe
rmse = (sklearn.metrics.mean_squared_error(testdataframe['n_sby'], testdataframe['sby_need']))**0.5
print(f'Testing period RMSE: {rmse}')

#Create new columns in the testdataframe, 'shortage' and 'surplus'
testdataframe.loc[testdataframe['n_sby'] < testdataframe['sby_need'], 'shortage'] = 1
testdataframe.loc[testdataframe['n_sby'] >= testdataframe['sby_need'], 'shortage'] = 0

testdataframe.loc[testdataframe['n_sby'] > testdataframe['sby_need'], 'surplus'] = 1
testdataframe.loc[testdataframe['n_sby'] <= testdataframe['sby_need'], 'surplus'] = 0

#Calculates the number and percentage of rows in the testdataframe where the 'shortage' and 'surplus' columns are equal to 1
shortage_count = (testdataframe['shortage'] == 1).sum()
shortage_pct = shortage_count / len(testdataframe) * 100
print(f'Shortage days: {shortage_count}, {shortage_pct}%')

surplus_count = (testdataframe['surplus'] == 1).sum()
surplus_pct = surplus_count / len(testdataframe) * 100
print(f'Surplus days: {surplus_count}, {surplus_pct}%')

#Creates a new column 'pctage' in the test_df dataframe, which is a copy of the testdataframe with the new column
test_df = testdataframe.assign(pctage=testdataframe.n_sby / testdataframe.sby_need)
#The 'pctage' column is replaced with 0.0 wherever the value is not a finite number
test_df.pctage.where(~numpy.isinf(test_df.pctage), 0.0, inplace=True)
#Average value of the 'pctage' column is calculated and multiplied by 100
avg_activation = test_df.pctage.mean() * 100

print(f'Average activation rate: {avg_activation:.2f}%')


Testing period RMSE: 112.51639494062536
Shortage days: 48, 20.869565217391305%
Surplus days: 182, 79.13043478260869%
Average activation rate: 50.66%
