In [1]:
!pip install pandas

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing as pre

In [3]:
covid_data = pd.read_csv('covid_data_clean.csv')
countries_data = pd.read_csv('countries_data_clean.csv',decimal=',')

In [4]:
covid_data.head(10)

Unnamed: 0.1,Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,Ellapsed Days
0,0,Afghanistan,2020-02-24,1,0,0,0
1,1,Afghanistan,2020-02-25,1,0,0,1
2,2,Afghanistan,2020-02-26,1,0,0,2
3,3,Afghanistan,2020-02-27,1,0,0,3
4,4,Afghanistan,2020-02-28,1,0,0,4
5,5,Afghanistan,2020-02-29,1,0,0,5
6,6,Afghanistan,2020-03-01,1,0,0,6
7,7,Afghanistan,2020-03-02,1,0,0,7
8,8,Afghanistan,2020-03-03,1,0,0,8
9,9,Afghanistan,2020-03-04,1,0,0,9


In [5]:
countries_data.head(10)

Unnamed: 0.1,Unnamed: 0,Country,Population,Pop. Density (per sq. mi.),Infant mortality (per 1000 births),GDP ($ per capita),Deathrate
0,0,Afghanistan,31056997,48.0,163.07,700.0,20.34
1,1,Albania,3581655,124.6,21.52,4500.0,5.22
2,2,Algeria,32930091,13.8,31.0,6000.0,4.61
3,3,Samoa,57794,290.4,9.27,8000.0,3.27
4,4,Andorra,71201,152.1,4.05,19000.0,6.25
5,5,Angola,12127071,9.7,191.19,1900.0,24.2
6,7,Antigua and Barbuda,69108,156.0,19.46,11000.0,5.37
7,8,Argentina,39921833,14.4,15.18,11200.0,7.55
8,9,Armenia,2976372,99.9,23.28,3500.0,8.23
9,11,Australia,20264082,2.6,4.69,29000.0,7.51


In [6]:
#Merge the two columns together and drop uncessary columns
merged_inner = pd.merge(left=covid_data, right=countries_data, left_on='Country/Region', right_on='Country').drop(columns=['Country', 'Date', 'Unnamed: 0_x', 'Unnamed: 0_y'])

#Drop rows with at least one missing value
merged_inner.dropna(inplace=True)

In [7]:
#Get set with the values that the model receives
X = merged_inner.drop(columns=['Country/Region', 'Confirmed'])

#Values the model should predict
Y = {'Cases' : merged_inner['Confirmed'] , 'Deaths' : merged_inner['Deaths']}

In [8]:

#Test case
variables = ['Cases' , 'Deaths']

variablesScores = {}

iteration_scores = []
    
#different nr of iterations to compare results
max_iter_array = [50, 100, 150, 200, 250]

for variable in variables:
    print('Starting training for ' , variable, '.')
    
    #Setting up training sets
    X_train, X_test, y_train, y_test = train_test_split(X,Y[variable])
    
    scaler = pre.StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)
    
    for it in max_iter_array:
        mlpr = MLPRegressor(max_iter = it)
        print('On iteration ', it)
        
        #need to fix date
        mlpr.fit(X_train , y_train)
        
        #predict model
        #predict_train = mlpr.predict(X_train_scaled)
        
        #predict test
        predict_test = mlpr.predict(X_test_scaled)
        print('Predtic: ' , predict_test)
        
        score = mlpr.score(X_test, y_test)
        print(score)
        iteration_scores.append(score)   
        
    variablesScores[variable] = (iteration_scores)
print(variablesScores)    
print("Done")

Starting training for  Cases .
On iteration  50




Predtic:  [ 2.21574068  1.92785255 -0.69394925 ... -0.57689311  0.290135
 -0.86146124]
0.38862399609945497
On iteration  100




Predtic:  [ 4.35703622  3.43601621 -4.77735382 ... -2.79416475  0.01273547
 -5.54074461]
0.6832684451996909
On iteration  150
Predtic:  [ 0.97430349  2.34703453 -4.09693845 ... -2.53187188 -0.12356761
 -5.29269611]
-3.8532817055762623
On iteration  200
Predtic:  [ 3.72327728  2.2343074  -0.20009435 ... -0.71595522 -0.07977032
 -0.16837258]
0.4054800177428044
On iteration  250
Predtic:  [ 2.46762631  1.04251091 -1.16927508 ... -1.19470798 -0.79805999
 -1.20331283]
-37.47181181754443
Starting training for  Deaths .
On iteration  50
Predtic:  [0.45548216 0.34095933 1.59628617 ... 0.32155684 0.50853445 0.32013396]
-786.5996055467817
On iteration  100
Predtic:  [-0.35466214 -0.30404802 -0.39877188 ... -0.48051424 -0.29560096
 -0.38780269]
-232.3526034667456
On iteration  150
Predtic:  [ 0.62957727 -0.15055834  6.44548589 ...  0.07281443  0.29515046
 -0.03218325]
-0.39741871306724974
On iteration  200
Predtic:  [-0.22327471 -0.22685368  2.77781009 ... -0.0854578  -0.24262497
 -0.08212772]
0.