In [4]:
import json
import csv
import numpy as np
import matplotlib.pyplot as plt
import requests
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import sklearn.metrics as sm
import pymongo
from pymongo import MongoClient
import datetime
%matplotlib inline

CONNECTION_STRING = '%MONGO_DB_CONNECTION_STRING%'
client = MongoClient(CONNECTION_STRING)
collection = client['prediction-db']['predictions']

ModuleNotFoundError: No module named 'sklearn'

In [None]:
def prepare_data(countryInfo): 
    data_file = open('./data.csv', 'w', newline='')
    csv_writer = csv.writer(data_file)

    count = 0
    for data in countryInfo['history']:
        if count == 0:
            count += 1
            header = data.keys()
            csv_writer.writerow(header)
        csv_writer.writerow(data.values())
    data_file.close()

    df = pd.read_csv('./data.csv')
    return df

def prepare_model_data(df, learningField):
    arr = df[:-1].drop('day', axis=1).values
    y_temp = list(df[learningField])
    X = []
    y = []
    for i in range(0, len(arr)):
        X.append(arr[i])
        y.append(y_temp[i + 1])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    return X_train, X_test, y_train, y_test

def prepare_model(X_train, y_train):
    regressor = RandomForestRegressor(n_estimators = 500, n_jobs = -1)
    regressor.fit(X_train, y_train)
    return regressor

In [None]:
# predict_file = open('./predict.csv', 'w', newline='')
# writer = csv.writer(predict_file)
# writer.writerow([
#     'country', 
#     'Daily Dead Explain variance score', 'Daily Dead R2 score', 'daily dead in daily dead', 'daily infected in daily dead', 'daily recovered in daily dead', 'weekly dead in daily dead', 'weekly infected in daily dead', 'weekly recovered in daily dead',
#     'Daily Infected Explain variance score', 'Daily Infected R2 score', 'daily dead in Daily Infected', 'daily infected in Daily Infected', 'daily recovered in Daily Infected', 'weekly dead in Daily Infected', 'weekly infected in Daily Infected', 'weekly recovered in Daily Infected',
#     'Daily Recovered Explain variance score', 'Daily Recovered R2 score', 'daily dead in Daily Recovered', 'daily infected in Daily Recovered', 'daily recovered in Daily Recovered', 'weekly dead in Daily Recovered', 'weekly infected in Daily Recovered', 'weekly recovered in Daily Recovered',
#     'Weekly Dead Explain variance score', 'Weekly Dead R2 score', 'daily dead in Weekly Dead', 'daily infected in Weekly Dead', 'daily recovered in Weekly Dead', 'weekly dead in Weekly Dead', 'weekly infected in Weekly Dead', 'weekly recovered in Weekly Dead',
#     'Weekly Infected Explain variance score', 'Weekly Infected R2 score', 'daily dead in Weekly Infected', 'daily infected in Weekly Infected', 'daily recovered in Weekly Infected', 'weekly dead in Weekly Infected', 'weekly infected in Weekly Infected', 'weekly recovered in Weekly Infected',
#     'Weekly Recovered Explain variance score', 'Weekly Recovered R2 score', 'daily dead in Weekly Recovered', 'daily infected in Weekly Recovered', 'daily recovered in Weekly Recovered', 'weekly dead in Weekly Recovered', 'weekly infected in Weekly Recovered', 'weekly recovered in Weekly Recovered',
# ])

countries = list(set(list(map(lambda country: country['country'], requests.get('http://localhost:3000/covid/countries').json()))))
for c in countries: 
    try:
        countryInfo = requests.get(f'http://localhost:3000/covid/prediction-data?country={c}').json()  
#     collection.update_one({'iso2': c}, { "$set": { 'history': countryInfo['history'], 'lastUpdated': countryInfo['lastUpdated'] } })
        collection.insert_one({ 'iso2': c, 'history': countryInfo['history'], 'lastUpdated': countryInfo['lastUpdated'] })
    except Exception as e:
        print(f"An error occurred for country {c}: {e}")
        continue

    df = prepare_data(countryInfo)
    
    # daily dead
    X_train, X_test, y_train, y_test = prepare_model_data(df, 'dailyDead')
    daily_dead_regressor = prepare_model(X_train, y_train)
    y_pred = daily_dead_regressor.predict(X_test)
    daily_dead_importances = pd.DataFrame({'feature':pd.DataFrame(X_train).columns,'importance':np.round(daily_dead_regressor.feature_importances_, 3)})
    daily_dead_importances = daily_dead_importances.set_index('feature')
    daily_dead_variance = round(sm.explained_variance_score(y_test, y_pred), 2)
    daily_dead_r2 = round(sm.r2_score(y_test, y_pred), 2)
    
    # daily infected
    X_train, X_test, y_train, y_test = prepare_model_data(df, 'dailyInfected')
    daily_infected_regressor = prepare_model(X_train, y_train)
    y_pred = daily_infected_regressor.predict(X_test)
    daily_infected_importances = pd.DataFrame({'feature':pd.DataFrame(X_train).columns,'importance':np.round(daily_infected_regressor.feature_importances_, 3)})
    daily_infected_importances = daily_infected_importances.set_index('feature')
    daily_infected_variance = round(sm.explained_variance_score(y_test, y_pred), 2)
    daily_infected_r2 = round(sm.r2_score(y_test, y_pred), 2)
    
    # daily recovered
    X_train, X_test, y_train, y_test = prepare_model_data(df, 'dailyRecovered')
    daily_recovered_regressor = prepare_model(X_train, y_train)
    y_pred = daily_recovered_regressor.predict(X_test)
    daily_recovered_importances = pd.DataFrame({'feature':pd.DataFrame(X_train).columns,'importance':np.round(daily_recovered_regressor.feature_importances_, 3)})
    daily_recovered_importances = daily_recovered_importances.set_index('feature')
    daily_recovered_variance = round(sm.explained_variance_score(y_test, y_pred), 2)
    daily_recovered_r2 = round(sm.r2_score(y_test, y_pred), 2)
    
    # weekly dead
    X_train, X_test, y_train, y_test = prepare_model_data(df, 'weeklyDead')
    weekly_dead_regressor = prepare_model(X_train, y_train)
    y_pred = weekly_dead_regressor.predict(X_test)
    weekly_dead_importances = pd.DataFrame({'feature':pd.DataFrame(X_train).columns,'importance':np.round(weekly_dead_regressor.feature_importances_, 3)})
    weekly_dead_importances = weekly_dead_importances.set_index('feature')
    weekly_dead_variance = round(sm.explained_variance_score(y_test, y_pred), 2)
    weekly_dead_r2 = round(sm.r2_score(y_test, y_pred), 2)
    
    # weekly infected
    X_train, X_test, y_train, y_test = prepare_model_data(df, 'weeklyInfected')
    weekly_infected_regressor = prepare_model(X_train, y_train)
    y_pred = weekly_infected_regressor.predict(X_test)
    weekly_infected_importances = pd.DataFrame({'feature':pd.DataFrame(X_train).columns,'importance':np.round(weekly_infected_regressor.feature_importances_, 3)})
    weekly_infected_importances = weekly_infected_importances.set_index('feature')
    weekly_infected_variance = round(sm.explained_variance_score(y_test, y_pred), 2)
    weekly_infected_r2 = round(sm.r2_score(y_test, y_pred), 2)
    
    # weekly recovered
    X_train, X_test, y_train, y_test = prepare_model_data(df, 'weeklyRecovered')
    weekly_recovered_regressor = prepare_model(X_train, y_train)
    y_pred = weekly_recovered_regressor.predict(X_test)
    weekly_recovered_importances = pd.DataFrame({'feature':pd.DataFrame(X_train).columns,'importance':np.round(weekly_recovered_regressor.feature_importances_, 3)})
    weekly_recovered_importances = weekly_recovered_importances.set_index('feature')
    weekly_recovered_variance = round(sm.explained_variance_score(y_test, y_pred), 2)
    weekly_recovered_r2 = round(sm.r2_score(y_test, y_pred), 2)
    
#     writer.writerow([
#         c,
#         daily_dead_variance, daily_dead_r2, daily_dead_importances.values[0][0], daily_dead_importances.values[1][0], daily_dead_importances.values[2][0], daily_dead_importances.values[3][0], daily_dead_importances.values[4][0], daily_dead_importances.values[5][0],
#         daily_infected_variance, daily_infected_r2, daily_infected_importances.values[0][0], daily_infected_importances.values[1][0], daily_infected_importances.values[2][0], daily_infected_importances.values[3][0], daily_infected_importances.values[4][0], daily_infected_importances.values[5][0],
#         daily_recovered_variance, daily_recovered_r2, daily_recovered_importances.values[0][0], daily_recovered_importances.values[1][0], daily_recovered_importances.values[2][0], daily_recovered_importances.values[3][0], daily_recovered_importances.values[4][0], daily_recovered_importances.values[5][0],
#         weekly_dead_variance, weekly_dead_r2, weekly_dead_importances.values[0][0], weekly_dead_importances.values[1][0], weekly_dead_importances.values[2][0], weekly_dead_importances.values[3][0], weekly_dead_importances.values[4][0], weekly_dead_importances.values[5][0],
#         weekly_infected_variance, weekly_infected_r2, weekly_infected_importances.values[0][0], weekly_infected_importances.values[1][0], weekly_infected_importances.values[2][0], weekly_infected_importances.values[3][0], weekly_infected_importances.values[4][0], weekly_infected_importances.values[5][0],,
#         weekly_recovered_variance, weekly_recovered_r2, weekly_recovered_importances.values[0][0], weekly_recovered_importances.values[1][0], weekly_recovered_importances.values[2][0], weekly_recovered_importances.values[3][0], weekly_recovered_importances.values[4][0], weekly_recovered_importances.values[5][0],
#     ])
    
    lastDay = countryInfo['history'][-1]
    data = [[ lastDay['dailyDead'], lastDay['dailyInfected'], lastDay['dailyRecovered'], lastDay['weeklyDead'], lastDay['weeklyInfected'], lastDay['weeklyRecovered'] ]]
    dt = lastDay['day']
    predicted = []
    for i in range(1, 8):
        daily_dead_prediction = round(daily_dead_regressor.predict(data)[0])
        daily_infected_prediction = round(daily_infected_regressor.predict(data)[0])
        daily_recovered_prediction = round(daily_recovered_regressor.predict(data)[0])
        weekly_dead_prediction = round(weekly_dead_regressor.predict(data)[0])
        weekly_infected_prediction = round(weekly_infected_regressor.predict(data)[0])
        weekly_recovered_prediction = round(weekly_recovered_regressor.predict(data)[0])
        data = [[daily_dead_prediction, daily_infected_prediction, daily_recovered_prediction, weekly_dead_prediction, weekly_infected_prediction, weekly_recovered_prediction]]
        dt = (datetime.date(year=int(dt[0:4]), month=int(dt[4:6]), day=int(dt[6:8])) + datetime.timedelta(days=1)).strftime('%Y%m%d')
        predicted.append({ 
            "day": dt, 
            "dailyDead": daily_dead_prediction, 
            "dailyInfected": daily_infected_prediction, 
            "dailyRecovered": daily_recovered_prediction, 
            "weeklyDead": weekly_dead_prediction,
            "weeklyInfected": weekly_infected_prediction,
            "weeklyRecovered": weekly_recovered_prediction,
        })
    accuracy = { 
        'dailyDead': daily_dead_r2, 
        'dailyInfected': daily_infected_r2, 
        'dailyRecovered': daily_recovered_r2, 
        'weeklyDead': weekly_dead_r2, 
        'weeklyInfected': weekly_infected_r2, 
        'weeklyRecovered': weekly_recovered_r2
    }
    collection.update_one({'iso2': c}, { "$set": { 'predicted': predicted, 'accuracy': accuracy } })  

# predict_file.close()

In [None]:
# c = 'BY'
# countryInfo = requests.get(f'http://localhost:3000/covid/prediction-data?country={c}').json()
   
# df = prepare_data(countryInfo)

# X_train, X_test, y_train, y_test = prepare_model_data(df, 'dead')
# deadRegressor = prepare_model(X_train, y_train)
# y_pred = deadRegressor.predict(X_test)
# print("Dead Explain variance score =", round(sm.explained_variance_score(y_test, y_pred), 2)) 
# print("Dead R2 score =", round(sm.r2_score(y_test, y_pred), 2))

# X_train, X_test, y_train, y_test = prepare_model_data(df, 'infected')
# infectedRegressor = prepare_model(X_train, y_train)
# y_pred = infectedRegressor.predict(X_test)
# print("Infected Explain variance score =", round(sm.explained_variance_score(y_test, y_pred), 2)) 
# print("Infected R2 score =", round(sm.r2_score(y_test, y_pred), 2))

# X_train, X_test, y_train, y_test = prepare_model_data(df, 'recovered')
# recoveredRegressor = prepare_model(X_train, y_train)
# y_pred = recoveredRegressor.predict(X_test)
# print("Recovered Explain variance score =", round(sm.explained_variance_score(y_test, y_pred), 2)) 
# print("Recovered R2 score =", round(sm.r2_score(y_test, y_pred), 2))

# X_train, X_test, y_train, y_test = prepare_model_data(df, 'vaccinated')
# vaccinatedRegressor = prepare_model(X_train, y_train)
# y_pred = vaccinatedRegressor.predict(X_test)
# print("Vaccinated Explain variance score =", round(sm.explained_variance_score(y_test, y_pred), 2)) 
# print("Vaccinated R2 score =", round(sm.r2_score(y_test, y_pred), 2))
### plot graphs for visual interptretation of accuracy

In [None]:
c = 'PL'
countryInfo = requests.get(f'http://localhost:3000/covid/prediction-data?country={c}').json()
df = prepare_data(countryInfo)
X_train, X_test, y_train, y_test = prepare_model_data(df, 'dailyDead')

regressor1 = RandomForestRegressor(n_estimators = 500, max_features='auto', n_jobs = -1)
regressor1.fit(X_train, y_train)
y = regressor1.predict(X_test)

print(len(y), len(y_test))

# fig, ax = plt.subplots()  # Create a figure containing a single axes.
# ax.plot();  # Plot some data on the axes.
# ax.set_xlabel('max_depth')  # Add an x-label to the axes.
# ax.set_ylabel('oob score')  # Add a y-label to the axes.
# ax.set_title("max_depth for weekly dead")

######
# X_train, X_test, y_train, y_test = prepare_model_data(df, 'dailyDead')

# regressor1 = RandomForestRegressor(n_estimators = 1000, max_depth=None, oob_score=True, n_jobs = -1)
# regressor1.fit(X_train, y_train)
# r1 = round(regressor1.oob_score_, 2)


# fig, ax = plt.subplots()  # Create a figure containing a single axes.
# ax.plot(['None', 1, 2, 3, 4, 5], [r1, r2, r3, r4, r5, r6]);  # Plot some data on the axes.
# ax.set_xlabel('day')  # Add an x-label to the axes.
# ax.set_ylabel('value')  # Add a y-label to the axes.
# ax.set_title("Actual and predicted result for every param")