# Import Package

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
from sklearn.metrics import r2_score

# from sklearn.externals import joblib
import joblib

import pickle
import uuid



# Data

## Download Data

In [2]:
import os.path
if not os.path.isfile('Covid_Data.zip'):
  !gdown --id 1oPQDuqWnH9v72qJiiyqI5LXhdV0yGaPE
  !unzip 'Covid_Data.zip'


In [3]:
descriptor = ("d", "r", "c")
filenames = [r'time_series_19-covid-Deaths.csv',
             r'time_series_19-covid-Recovered.csv',
             r'time_series_19-covid-Confirmed.csv']

print(zip(descriptor, filenames))



<zip object at 0x7f3ff5ee23c0>


## Load Data & Feature Enginering;dgk

In [4]:
dfs = []
DATAs = []
for descriptor_, fname in zip(descriptor, filenames):
    print("Working on:",'Input Data/'+ fname)
    df = pd.read_csv('Input Data/'+ fname)
    df.drop(columns=["Province/State","Country/Region"], inplace=True)
    DATA = np.array((0,0,0,0))

    
    for i, j in df.iterrows():
        # print(i, j)
        latitude = j['Lat']
        longitude = j['Long']
        
        for k,l in j.iteritems():
            if k=='Lat':
                continue
            if k=='Long':
                continue
            date = datetime.strptime(k, '%m/%d/%y')
            day = date - date.strptime("01/22/20", '%m/%d/%y')
            days = day.days
            #print(days)
            temp = np.array([j["Lat"], j['Long'], days, l])
            #print(temp)
            DATA = np.vstack((DATA,temp))  
            
    DATA = np.delete(DATA, 0,0)
    np.random.shuffle(DATA)
    
    dfs.append(df)
    DATAs.append(DATA)


Working on: Input Data/time_series_19-covid-Deaths.csv
Working on: Input Data/time_series_19-covid-Recovered.csv
Working on: Input Data/time_series_19-covid-Confirmed.csv


In [5]:
dfs[0]

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20
0,15.0000,101.0000,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
1,36.0000,138.0000,0,0,0,0,0,0,0,0,...,6,6,6,6,6,6,10,10,15,16
2,1.2833,103.8333,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,28.1667,84.2500,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2.5000,112.5000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401,42.5922,-83.3362,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
402,42.2791,-83.3362,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
403,39.5393,-75.6674,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
404,22.0000,-80.0000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
DATAs

[array([[ 40.5726, -74.4927,  39.    ,   0.    ],
        [  8.6195,   0.8248,  50.    ,   0.    ],
        [ 64.9631, -19.0208,  27.    ,   0.    ],
        ...,
        [ 30.5927, -81.8224,  46.    ,   0.    ],
        [ 42.5063,   1.5218,  19.    ,   0.    ],
        [ 42.632 , -70.7829,  32.    ,   0.    ]]),
 array([[  22.1667,  113.55  ,   27.    ,    5.    ],
        [  47.175 , -120.9319,   27.    ,    0.    ],
        [  40.4483,  -86.1345,    6.    ,    0.    ],
        ...,
        [  42.632 ,  -70.7829,   40.    ,    0.    ],
        [  38.7646, -121.9018,   35.    ,    0.    ],
        [  38.7849,  -76.8721,   50.    ,    0.    ]]),
 array([[  44.2998,  -99.4388,    1.    ,    0.    ],
        [  35.4437,  139.638 ,   21.    ,    0.    ],
        [  37.6681,  -84.6701,   30.    ,    0.    ],
        ...,
        [  39.3054,  117.323 ,   33.    ,  135.    ],
        [  37.0454, -121.958 ,   47.    ,    0.    ],
        [  38.9072,  -77.0369,   26.    ,    0.    ]])]

In [7]:
DATAs[0]

array([[ 40.5726, -74.4927,  39.    ,   0.    ],
       [  8.6195,   0.8248,  50.    ,   0.    ],
       [ 64.9631, -19.0208,  27.    ,   0.    ],
       ...,
       [ 30.5927, -81.8224,  46.    ,   0.    ],
       [ 42.5063,   1.5218,  19.    ,   0.    ],
       [ 42.632 , -70.7829,  32.    ,   0.    ]])

# Models

## Confirmed

In [8]:
DATA = DATAs[2]
input_data = DATA[:,:-1]
print(input_data.shape)
output_data = DATA[:, -1]
print(output_data.shape)
input_train, input_test, output_train, output_test = train_test_split(input_data, output_data)

(20706, 3)
(20706,)


In [9]:
clf = MLPRegressor(hidden_layer_sizes = (4,4,4,4),
                   activation='relu',
                   solver='lbfgs',
                   learning_rate='constant',
                   learning_rate_init=0.1,
                   alpha=0.0001,
                   max_iter=10000000)
clf.fit(input_train, output_train)

MLPRegressor(hidden_layer_sizes=(4, 4, 4, 4), learning_rate_init=0.1,
             max_iter=10000000, solver='lbfgs')

In [10]:
clf.score(input_train, output_train)


0.9690185166634807

In [11]:
clf.score(input_test, output_test)

0.9236193967959558

## Recovered

In [12]:
DATA = DATAs[1]
input_data = DATA[:,:-1]
print(input_data.shape)
output_data = DATA[:, -1]
print(output_data.shape)
input_train, input_test, output_train, output_test = train_test_split(input_data, output_data)

(20706, 3)
(20706,)


In [13]:
clf = MLPRegressor(hidden_layer_sizes = (4,4,4,4),
                   activation='relu',
                   solver='lbfgs',
                   learning_rate='constant',
                   learning_rate_init=0.5,
                   alpha=0.001,
                   max_iter=1000000000)
clf.fit(input_train, output_train)

MLPRegressor(alpha=0.001, hidden_layer_sizes=(4, 4, 4, 4),
             learning_rate_init=0.5, max_iter=1000000000, solver='lbfgs')

In [14]:
clf.score(input_test, output_test)

0.052095222238248806


## Dead

In [15]:
DATA = DATAs[0]
input_data = DATA[:,:-1]
print(input_data.shape)
output_data = DATA[:, -1]
print(output_data.shape)
input_train, input_test, output_train, output_test = train_test_split(input_data, output_data)

(20706, 3)
(20706,)


In [16]:
clf = MLPRegressor(hidden_layer_sizes = (4,4,4,4),
                   activation='relu',
                   solver='lbfgs',
                   learning_rate='adaptive',
                   learning_rate_init=0.01,
                   alpha=0.01,
                   max_iter=1000000000)
clf.fit(input_train, output_train)

MLPRegressor(alpha=0.01, hidden_layer_sizes=(4, 4, 4, 4),
             learning_rate='adaptive', learning_rate_init=0.01,
             max_iter=1000000000, solver='lbfgs')

In [17]:
clf.score(input_test, output_test)

-0.049701352027102574