# ANNEX: C
# PREDICTION CODE FOR US COVID 19 CASES
## SUBMITTED BY: FAISAL JAVED
### MACHINE LEARNING PROJECT (MS RIME 2020)

In [1]:
import pandas as pd
import numpy as np

## DATA SET DETAILS 
### GATHERING & CLEANING

In [2]:
Data = pd.read_excel("USA Statewise Covid Data.xlsx")
Data.head()

Unnamed: 0,Date,Xo,Date_Code,States_Code,Temperatures,Humidity,LandArea,Population_Density,Cases_per_day
0,2020-12-06,1,0,1,11.6,77.1,570641,1.2863,757
1,2020-12-06,1,0,2,46.6,71.6,50645,96.9221,2288
2,2020-12-06,1,0,3,41.3,70.9,52035,58.403,1542
3,2020-12-06,1,0,4,38.0,80.0,77,716.0,0
4,2020-12-06,1,0,5,43.6,38.5,113594,64.9549,5376


In [3]:
Data = Data.sample(frac=1)

In [4]:
X = Data.drop(['Date','Cases_per_day','LandArea'],1)

In [5]:
Y = Data['Cases_per_day']

In [6]:
X['Population_Density']= X['Population_Density'].astype(float)

### Defining New Features

In [7]:
X['Weather']=X['Temperatures']*X['Humidity']
X['Temperature_sq'] = X['Temperatures']**2
X['Humidity_sq'] = X['Humidity']**2
X['Population_Density_sq'] = X['Population_Density']**2

In [8]:
X.head()

Unnamed: 0,Xo,Date_Code,States_Code,Temperatures,Humidity,Population_Density,Weather,Temperature_sq,Humidity_sq,Population_Density_sq
14612,1,260,53,41.4,71.4,117.3273,2955.96,1713.96,5097.96,13765.695325
11216,1,200,17,62.5,70.9,228.0246,4431.25,3906.25,5026.81,51995.218205
4295,1,76,40,73.3,64.0,57.6546,4691.2,5372.89,4096.0,3324.052901
9852,1,175,53,59.6,71.4,117.3273,4255.44,3552.16,5097.96,13765.695325
12033,1,214,50,62.9,68.7,218.4404,4321.23,3956.41,4719.69,47716.208352


### SCALING OF DATA

In [9]:
x = X/(X.max(axis=0) + np.spacing(0))

### SPLITTING OF DATA SET INTO TRAIN,TEST & VALID DATA

In [10]:
data_train = round(0.6*len(Data))
data_valid = round(data_train+0.2*len(Data))

In [11]:
train_x = x[:data_train]
valid_x = x[data_train:data_valid]
test_x = x[data_valid:]

In [12]:
train_y = Y[:data_train]
valid_y = Y[data_train:data_valid]
test_y = Y[data_valid:]

### USING THEETAS FROM TRAINING MODEL:

In [13]:
theeta = np.array([ 365, -115,   82,  156,  201,    0,  135,  100,  154,    0])
#theeta to be imported from training models

In [14]:
theeta

array([ 365, -115,   82,  156,  201,    0,  135,  100,  154,    0])

### TAKING INPUT FROM THE USER

In [15]:
prediction_df = pd.read_excel('Book2.xlsx')
prediction_df

Unnamed: 0,state_code,state_name,population
0,1,Alaska,1.2863
1,2,Alabama,96.9221
2,3,Arkansas,58.403
3,4,American Samoa,716.0
4,5,Arizona,64.9549
5,6,California,256.373
6,7,Colorado,56.4012
7,8,Connecticut,735.87
8,9,District Of Columbia,11535.0
9,10,Delaware,504.307


In [16]:
def Date_Code(year,month,day):
    import datetime
    date = datetime.date(2020, 12, 6)
    date1 = datetime.date(year,month,day)
    date_code = (date - date1)
    dayss = date_code.days
    print ('Date_code is:'  ,dayss)
    return dayss

In [18]:
year = int(input('Enter a year: '))
month = int(input('Enter a month: '))
day = int(input('Enter a day: '))
d = Date_Code(year,month,day)
d = d/X['Date_Code'].max()
d

Enter a year: 2020
Enter a month: 12
Enter a day: 30
Date_code is: -24


-0.09090909090909091

In [19]:
t = float(input('Enter value of Temperature: '))
t = t/X['Temperatures'].max()
t

Enter value of Temperature: -3


-0.036231884057971016

In [20]:
hum = float(input('Enter value of Humidity: '))
hum = hum/X['Humidity'].max()
hum

Enter value of Humidity: 90


1.125

In [21]:
s = float(input('Enter state Code: '))
s = s/X['States_Code'].max()
s

Enter state Code: 56


1.0

In [22]:
p = float(input('Enter population Density: '))
p = p/X['Population_Density'].max()
p

Enter population Density: 5


0.00043346337234503684

In [23]:
x_a = [1,d,s,t,hum,p,t*hum,t**2,d**2,p**2],
x_a

([1,
  -0.09090909090909091,
  1.0,
  -0.036231884057971016,
  1.125,
  0.00043346337234503684,
  -0.04076086956521739,
  0.0013127494223902543,
  0.008264462809917356,
  1.8789049516473205e-07],)

### PREDICTION CODE

In [24]:
def prediction(theeta,features):
    estimated_count = theeta*features
    return estimated_count

In [25]:
y_hatt = prediction(theeta,x_a)
y_hatt = np.sum(y_hatt,axis=1)
print ("The Estimated count of Covid Cases is:   " , y_hatt)

The Estimated count of Covid Cases is:    [673.82865637]


### ERROR FROM ACTUAL CASES (IF AVAILABLE)

In [26]:
### INPUT ACTUAL CASES IF AVAILABLE
actual_y = float(input('Enter value of Actual Cases: '))
actual_y

Enter value of Actual Cases: 2000


2000.0

In [27]:
### ERROR ON ACTUAL CASES vs PREDICTED CASES
def RMSE(y_hatt, actual_y):
    return np.sqrt((y_hatt - actual_y) ** 2).mean()
rmse_val = RMSE(np.array(y_hatt), np.array(actual_y))
print(f" Rms error is:  {rmse_val}")

 Rms error is:  1326.1713436348361


In [28]:
def MSE(y_hatt, actual_y):
    return (1/len(x_a))*np.sum((y_hatt - actual_y) ** 2)
mse_val = MSE(np.array(y_hatt), np.array(actual_y))
print(f" Mean Square Error is:  {mse_val}")

 Mean Square Error is:  1758730.4326782266
