In [44]:
import os
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_log_error
from keras.layers import LSTM, Dense, Input, Bidirectional
from keras.optimizers import Adagrad, Adam
from keras.utils import to_categorical
from keras.models import load_model, Model


In [45]:
DATA_DIR = 'data'

def load_initial_data(path):
    train_path = os.path.join(path, 'train.csv')
    test_path = os.path.join(path, 'test.csv')
    train_data, test_data = pd.read_csv(train_path), pd.read_csv(test_path)
    return train_data, test_data

In [46]:
def extract_days_from_first_infection(data, write_file=False):
    result = list()
    prev_region, prev_country = None, None
    current_count = 0
    found_case = False
    for value in data.values:
        if prev_country is None:
            prev_region, prev_country = value[1], value[2]
        if value[1] is prev_region and value[2] == prev_country:
            if value[6] == 0:
                result.append(current_count)
            elif value[6] != 0 and not found_case:
                found_case = True
                result.append(current_count)
            elif value[6] != 0 and found_case:
                current_count += 1
                result.append(current_count)
        else:
            found_case = False
            current_count = 0
            if value[6] == 0:
                result.append(current_count)
            elif value[6] != 0:
                found_case = True
                result.append(current_count)
        prev_region, prev_country = value[1], value[2]
    data['Days since first infection'] = result
    if write_file:
        data.to_csv('added_days_train.csv', index=False)
    

def add_days_from_first_infection_test(train_data, test_data, write_file=False):
    max_counts = dict()
    result = list()
    for value in train_data.values:
        max_counts[str(value[1]) + str(value[2])] = value[-1]
    previous_key = None
    current_count = 0
    for value in test_data.values:
        key = str(value[1]) + str(value[2])
        if previous_key is None or key != previous_key:
            current_count = max_counts[key] + 1
        else:
            current_count += 1
        result.append(current_count)
        previous_key = key
    test_data['Days since first infection'] = result
    if write_file:
        test_data.to_csv('added_days_test.csv', index=False)

In [None]:
def build_model(train_data):
    inputs = Input(shape=train_data[0].shape)
    lstm_1 = Bidirectional(LSTM(units=32, activation='softsign', return_sequences=True))(inputs)
    lstm_2 = Bidirectional(LSTM(units=16, activation='softsign'))(lstm_1)
    output = Dense(1)(lstm_2)
    model = Model(inputs=inputs, outputs=output)
    model.compile(optimizer=Adagrad(), loss=mean_squared_log_error, metrics=['acc'])
    return model

In [47]:
train, test = load_initial_data(DATA_DIR)
extract_days_from_first_infection(train)
add_days_from_first_infection_test(train, test)


{'nanAfghanistan': 28, 'nanAlbania': 14, 'nanAlgeria': 27, 'nanAndorra': 21, 'nanAntigua and Barbuda': 10, 'nanArgentina': 20, 'nanArmenia': 22, 'nanAruba': 0, 'Australian Capital TerritoryAustralia': 10, 'From Diamond PrincessAustralia': 0, 'New South WalesAustralia': 57, 'Northern TerritoryAustralia': 15, 'QueenslandAustralia': 54, 'South AustraliaAustralia': 51, 'TasmaniaAustralia': 21, 'VictoriaAustralia': 57, 'Western AustraliaAustralia': 23, 'nanAustria': 27, 'nanAzerbaijan': 22, 'nanBahrain': 28, 'nanBangladesh': 15, 'nanBarbados': 6, 'nanBelarus': 24, 'nanBelgium': 48, 'nanBenin': 7, 'nanBhutan': 17, 'nanBolivia': 12, 'nanBosnia and Herzegovina': 18, 'nanBrazil': 26, 'nanBrunei': 14, 'nanBulgaria': 15, 'nanBurkina Faso': 13, 'nanCambodia': 56, 'nanCameroon': 0, 'AlbertaCanada': 17, 'British ColumbiaCanada': 55, 'Grand PrincessCanada': 10, 'ManitobaCanada': 10, 'New BrunswickCanada': 12, 'Newfoundland and LabradorCanada': 8, 'Nova ScotiaCanada': 0, 'OntarioCanada': 57, 'Prince E