# ANNEX C
# PREDICTION CODE FOR WORLD WIDE COVID 19 CASES
## SUBMITTED BY: FAISAL JAVED
### MACHINE LEARNING PROJECT (MS RIME 2020)

In [1]:
import numpy as np 
import pandas as pd

## DATA SET DETAILS 
### GATHERING & CLEANING

In [2]:
#Importing covid data excell file using pandas
Data = pd.read_excel("Country wise Covid Data.xlsx")

In [3]:
#printing data
Data.head()

Unnamed: 0,CONTINENT,LOCATION,DATE,COUNTRY_SERIES,DATE_CODE,COUNTRY_CODE,POPULATION DENSITY,HUMAN_DEVELOPMENT_INDEX,TEMPERATURES,CASES
0,Asia,Afghanistan,2020-01-23,0,0,0,54.422,0.498,7.0,0
1,Asia,Afghanistan,2020-01-24,1,1,0,54.422,0.498,7.0,0
2,Asia,Afghanistan,2020-01-25,2,2,0,54.422,0.498,13.0,0
3,Asia,Afghanistan,2020-01-26,3,3,0,54.422,0.498,2.0,0
4,Asia,Afghanistan,2020-01-27,4,4,0,54.422,0.498,6.0,0


In [4]:
#Add a column of ones for the bias term. I chose 1 because if you multiply one with any value, that value does not change.
Data = pd.concat([pd.Series(1,index=Data.index,name = '00'),Data],axis = 1)

In [5]:
Data = Data.sample(frac=1) #Rearranging DataSet to contain random entries 

In [6]:
#Assigning our x parameters from data set by dropping extra coloumns
X = Data.drop(['LOCATION','CONTINENT','DATE','COUNTRY_SERIES','CASES',],1)

### Defining New Features

In [7]:
X['HDI']=X['HUMAN_DEVELOPMENT_INDEX']*X['POPULATION DENSITY']
X['Temperature_sq'] = X['TEMPERATURES']**2
X['Population_Density_sq'] = X['POPULATION DENSITY']**2

In [8]:
# defining cases as our output variable in our data set
Y = Data['CASES']


### SCALING OF DATA

In [9]:
#np.spacing(0) to avoid division 0/0.
x = X / (X.max(axis=0) + np.spacing(0))


### SPLITTING OF DATA SET INTO TRAIN,TEST & VALID DATA

In [10]:
data_train = round(0.6*len(Data))
data_valid = round(data_train+0.2*len(Data))

In [11]:
train_x = x[:data_train]
valid_x = x[data_train:data_valid]
test_x = x[data_valid:]

In [12]:
train_y = Y[:data_train]
valid_y = Y[data_train:data_valid]
test_y = Y[data_valid:]

### USING THEETAS FROM TRAINING MODEL:

In [13]:
theeta = np.array([   0, 1509,  263,    0,  538, -102,    0,    0,    0])
#theeta to be imported from training models

In [14]:
theeta

array([   0, 1509,  263,    0,  538, -102,    0,    0,    0])

In [15]:
def Date_Code(year,month,day):
    import datetime
    date = datetime.date(2020, 1, 23)
    date1 = datetime.date(year,month,day)
    date_code = (date1 - date)
    dayss = date_code.days
    print ('Date_code is:'  ,dayss)
    return dayss

In [16]:
year = int(input('Enter a year: '))
month = int(input('Enter a month: '))
day = int(input('Enter a day: '))
d = Date_Code(year,month,day)
d = d/X['DATE_CODE'].max()
d

Enter a year: 2020
Enter a month: 12
Enter a day: 29
Date_code is: 341


1.0689655172413792

In [17]:
t = float(input('Enter value of Temperature: '))
t = t/X['TEMPERATURES'].max()
t

Enter value of Temperature: -3


-0.057692307692307696

In [18]:
s = float(input('Enter state Code: '))
s = s/X['COUNTRY_CODE'].max()
s

Enter state Code: 1


0.0058823529411764705

In [19]:
p = float(input('Enter population Density: '))
p = p/X['POPULATION DENSITY'].max()
p

Enter population Density: 54.422


0.002812869879829435

In [20]:
hdi = float(input('Enter value of Human Devlopment Index: '))
hdi = hdi/X['HUMAN_DEVELOPMENT_INDEX'].max()
hdi

Enter value of Human Devlopment Index: 0.498


0.5225603357817419

In [21]:
x_a = [1,d,s,p,hdi,t,hdi*p,t**2,p**2],
x_a

([1,
  1.0689655172413792,
  0.0058823529411764705,
  0.002812869879829435,
  0.5225603357817419,
  -0.057692307692307696,
  0.0014698942289140175,
  0.003328402366863906,
  7.91223696085166e-06],)

### PREDICTION CODE

In [22]:
def prediction(theeta,features):
    estimated_count = theeta*features
    return estimated_count

In [23]:
y_hatt = prediction(theeta,x_a)
y_hatt = np.sum(y_hatt,axis=1)
print ("The Estimated count of Covid Cases is:   " , y_hatt)

The Estimated count of Covid Cases is:    [1901.63810038]


### ERROR FROM ACTUAL CASES (IF AVAILABLE)

In [24]:
### INPUT ACTUAL CASES IF AVAILABLE
actual_y = float(input('Enter value of Actual Cases: '))
actual_y

Enter value of Actual Cases: 241


241.0

In [25]:
### ERROR ON ACTUAL CASES vs PREDICTED CASES
def RMSE(y_hatt, actual_y):
    return np.sqrt((y_hatt - actual_y) ** 2).mean()
rmse_val = RMSE(np.array(y_hatt), np.array(actual_y))
print(f" Rms error is:  {rmse_val}")

 Rms error is:  1660.638100375963


In [26]:
def MSE(y_hatt, actual_y):
    return (1/len(x_a))*np.sum((y_hatt - actual_y) ** 2)
mse_val = MSE(np.array(y_hatt), np.array(actual_y))
print(f" Mean Square Error is:  {mse_val}")

 Mean Square Error is:  2757718.9004202867
