In [1]:
!pip install --upgrade pip
!pip install tensorflow keras pandas numpy sklearn matplotlib

Requirement already up-to-date: pip in /home/nbuser/anaconda3_501/lib/python3.6/site-packages (20.1.1)


In [122]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
import keras.backend as K
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam, SGD
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

np.set_printoptions(precision = 3)

In [3]:
dataset_path = './data/covid_19_data.csv'
dataset = pd.read_csv(
    dataset_path, 
    usecols = [
        'Country/Region',
        'Confirmed',
        'Deaths',
        'Recovered'
    ]
)
dataset.head()

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered
0,Mainland China,1,0,0
1,Mainland China,14,0,0
2,Mainland China,6,0,0
3,Mainland China,1,0,0
4,Mainland China,0,0,0


In [4]:
def preprocess_dataset(dataset, column_index):
    for i in range(len(dataset[column_index])):
        if dataset[column_index][i] == "('St. Martin',)":
            dataset[column_index][i] = 'St. Martin'
        if dataset[column_index][i] == " Azerbaijan":
            dataset[column_index][i] = 'Azerbaijan'
        if dataset[column_index][i] == "Bahamas, The":
            dataset[column_index][i] = "Bahamas"
        
    encoded_countries = {id : country for id, country in enumerate(set(sorted(dataset[column_index])))}
    decoded_countries = {country : id for id, country in enumerate(set(sorted(dataset[column_index])))}
    dataset = dataset.sort_values(by = column_index)
        
    new_dataset = {c : None for c in set(sorted(dataset[column_index]))}
    confirmed = []
    deaths = []
    recovered = []
    
    cached_country = dataset[column_index][0]
    for i, country in enumerate(dataset[column_index]):
        if cached_country == country:
            confirmed.append(dataset['Confirmed'][i])
            deaths.append(dataset['Deaths'][i])
            recovered.append(dataset['Recovered'][i])

        else:
            if i == len(dataset[column_index]):
                cached_country = country
      
            new_dataset.update(
                { 
                    cached_country : [
                        np.nan_to_num(np.mean(confirmed)),
                        np.nan_to_num(np.mean(deaths)),
                        np.nan_to_num(np.mean(recovered))
                    ]
                }
            )
                        
            confirmed = []
            deaths = []
            recovered = []
            
            cached_country = country
    return encoded_countries, decoded_countries, pd.DataFrame(data = new_dataset)

In [5]:
encoded_countries, decoded_countries, preprocessed_dataset = preprocess_dataset(dataset, 'Country/Region')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [113]:
def normalize(x, index):
    return (x - dataset.describe()[index]['mean']) / dataset.describe()[index]['std']

def create_features_labels(preprocessed_dataset):
    features = np.array(list(encoded_countries.keys()))
    
    confirmed = np.nan_to_num(np.array(preprocessed_dataset, "float")[0])
    confirmed = normalize(confirmed, "Confirmed")
    
    deaths = np.nan_to_num(np.array(preprocessed_dataset, "float")[1])
    deaths = normalize(deaths, "Deaths")
    
    recovered = np.nan_to_num(np.array(preprocessed_dataset, "float")[2])
    recovered = normalize(recovered, "Recovered")
 
    labels = []
    
    for c, d, r in zip(confirmed, deaths, recovered):
        labels.append([c, d, r])
        
    labels = np.array(labels)
        
    return features, labels

x, y = create_features_labels(preprocessed_dataset)

In [85]:
def det_coeff(y_true, y_pred):
    u = K.sum(K.square(y_true - y_pred))
    v = K.sum(K.square(y_true - K.mean(y_true)))
    return K.ones_like(v) - (u / v)

In [114]:
def build_model():
    model = Sequential()
    
    input_layer = Dense(
        128,
        input_dim = 1,
        activation = 'relu',
    )
    
    dropout1 = Dropout(0.1)
    batch_norm_layer = BatchNormalization(
        momentum = 0.99,
        trainable = True,
    )
        
    hl1 = Dense(
        128,
        activation = 'relu',
#         kernel_regularizer=keras.regularizers.l1_l2(l1=1e-5, l2=1e-4),
#         bias_regularizer=keras.regularizers.l2(1e-4)
    )
    
    dropout2 = Dropout(0.2)
    
    hl2 = Dense(
        32,
        activation = 'relu'
    )
    
    output_layer = Dense(
        3,
        activation = 'linear'
    )
    
    model.add(input_layer)
#     model.add(dropout1)
#     model.add(batch_norm_layer)
    model.add(hl1)
#     model.add(dropout2)
    model.add(hl2)
    model.add(output_layer)
    
    model.compile(
        loss = 'mse',
        optimizer = Adam(
            learning_rate=0.001
        ),
        metrics = ['mse']
#         metrics = ['acc']
    )
    
    return model

In [115]:
model = build_model()
model.summary()

Model: "sequential_37"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_145 (Dense)            (None, 128)               256       
_________________________________________________________________
dense_146 (Dense)            (None, 128)               16512     
_________________________________________________________________
dense_147 (Dense)            (None, 32)                4128      
_________________________________________________________________
dense_148 (Dense)            (None, 3)                 99        
Total params: 20,995
Trainable params: 20,995
Non-trainable params: 0
_________________________________________________________________


In [25]:
EPOCHS = 100

In [116]:
model.fit(
    x = x,
    y = y,
    batch_size = 8,
    validation_split = 0.1,
    epochs = EPOCHS
)

Train on 198 samples, validate on 22 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

<keras.callbacks.callbacks.History at 0x7ff000e79588>

In [117]:
encoded_countries

{0: 'Belize',
 1: 'Ecuador',
 2: 'Others',
 3: 'Serbia',
 4: 'Djibouti',
 5: 'Hungary',
 6: 'Gambia',
 7: 'Iraq',
 8: 'West Bank and Gaza',
 9: 'Iceland',
 10: 'UK',
 11: 'Zimbabwe',
 12: 'Romania',
 13: 'Somalia',
 14: 'Rwanda',
 15: 'Bhutan',
 16: 'Kuwait',
 17: 'Niger',
 18: 'Liberia',
 19: 'Seychelles',
 20: 'Singapore',
 21: 'Jersey',
 22: 'Sao Tome and Principe',
 23: 'France',
 24: 'Cape Verde',
 25: 'Congo (Kinshasa)',
 26: 'Bahrain',
 27: 'Senegal',
 28: 'El Salvador',
 29: 'Republic of Ireland',
 30: 'Taiwan',
 31: 'Czech Republic',
 32: 'Hong Kong',
 33: 'Cameroon',
 34: 'Greece',
 35: 'Slovakia',
 36: 'East Timor',
 37: 'Tunisia',
 38: 'occupied Palestinian territory',
 39: 'Ghana',
 40: 'Sweden',
 41: 'Vatican City',
 42: 'Lebanon',
 43: 'Tanzania',
 44: 'Barbados',
 45: 'Mongolia',
 46: 'Turkey',
 47: 'Mauritius',
 48: 'Sri Lanka',
 49: 'Guatemala',
 50: 'Burkina Faso',
 51: 'Madagascar',
 52: 'Jordan',
 53: 'Cabo Verde',
 54: 'Western Sahara',
 55: 'Curacao',
 56: 'Gibra

In [119]:
y_pred = model.predict([138])