<a href="https://colab.research.google.com/github/akitim/comptech-winter-school-2022_covid19-prediction/blob/main/predict_to_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
# !pip install httpx
# !pip install prophet
# !mkdir tmp


In [30]:
from prophet import Prophet
import pandas as pd
import httpx
import numpy as np
import json
import pickle
from datetime import date, timedelta, datetime
from statsmodels.tsa.holtwinters import ExponentialSmoothing as HWES
import statsmodels.api as sm

def get_cases(days):
  data = httpx.get(f"https://storage.yandexcloud.net/covid-19/cases{\
      ('_' + str(days)) if days != 0 else ''}.pickle").content
  return pickle.loads(data)

# ================================PROPHET================================
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

def get_train_data(countries, source_country, cases, field):
    df = pd.DataFrame(index=cases[cases['location'] == source_country]["date"],
                      columns=['y', 'ds'] + [i for i, _ in countries])
    df['y'] = cases[cases['location'] == source_country].reset_index().set_index("date")[field]
    df['ds'] = cases[cases['location'] == source_country].reset_index().set_index("date").index
    c = cases["location"].unique()

    for country, info in countries:
        if country in c:
            df[country] = cases[cases["location"] == country][field]
        else:
            del df[country]
    return df.fillna(method='ffill').dropna()


def get_df_future(days_predict, m, countries, cases, field, train_last):
    d = pd.date_range(train_last, periods=days_predict + 1, closed='right')
    future = pd.DataFrame()
    future["ds"] = d

    g = cases['location'].unique()

    for country, info in countries:
        if country in g:
            future[country] = cases[cases['location'] == country][field].iloc[
                              -days_predict + info["lag"]: info["lag"]].values

    return future.fillna(method='ffill')


def get_predict(days_predict, source_country, cases, lags) -> np.array:
    countries = sorted(filter(lambda x: -x[1]["lag"] > days_predict, lags[source_country].items()),
                       key=lambda x: x[1]["similarity"])[-2:]
    field = 'new_cases_smoothed'
    train = get_train_data(countries, source_country, cases, field)

    m = Prophet(
        daily_seasonality=False,
        yearly_seasonality=True,
        weekly_seasonality=True,
        changepoint_prior_scale=0.8,
        seasonality_mode='multiplicative'
    )
    for i in train.columns:
        if i != 'ds' and i != 'y':
            m.add_regressor(i, mode="multiplicative")
    m.fit(train)
    future = get_df_future(days_predict, m, countries, cases, field, train.index[-1])
    forecast = m.predict(future)

    return forecast[['ds', 'yhat']]
# ///////////////////////////////////////////////////////////////////////
# =======================================================================

# =================================MEAN==================================
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
  
def get_predict_mean(country, n_days, need_predict, cases_all, use_filter=True, pred_from=None):

    field_name = ("filtered_" if use_filter else "") + "history"
    threshold = date(year=2021, month=1, day=1)

    just_cases = cases_all[country][field_name]
    if pred_from:
        mean_value = np.mean(just_cases[pred_from - n_days : pred_from].values)
    else:
        mean_value = np.mean(just_cases[- n_days : ].values)

    predict = np.asarray([mean_value for i in range(0, need_predict)])

    return predict

# ///////////////////////////////////////////////////////////////////////
# =======================================================================

# ================================LINEAR=================================
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

# ///////////////////////////////////////////////////////////////////////
# =======================================================================

# =============================HOLT-WINTERS==============================
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

def hwes_predict(data, country, days):
 
    # create time series for country
    df = data[data['location'] == country]
    ts = df.set_index(pd.DatetimeIndex(df['date'])).asfreq('D').total_cases
  
    # split the time series
    train = ts.iloc[:-days]

    # fit the model
    model = HWES(train, seasonal_periods=4, trend='mul', seasonal='add')
    fitted = model.fit()

    predict = fitted.forecast(days).values

    return predict
# ///////////////////////////////////////////////////////////////////////
# =======================================================================

# ================================SARIMA=================================
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

# ///////////////////////////////////////////////////////////////////////
# =======================================================================

if __name__ == "__main__":
    with open("people_structure.csv", 'wb') as f:
        f.write(httpx.get("https://storage.yandexcloud.net/covid-19/people_structure.csv").content)

    df_sec = pd.read_csv("people_structure.csv")
    lags = json.loads(httpx.get("https://storage.yandexcloud.net/covid-19/lags.json").content)
    cases = get_cases(0)
    days_predict = 30

    index = None
    df = pd.DataFrame()
    for country in lags.keys():
        try:
            predict = get_predict(days_predict, country, df_sec, lags)
            if index is None:
                index = predict["ds"]

            d = pd.DataFrame()
            d["Date"] = index
            d["Country"] = country

            d["Mean_7"] = get_predict_mean(country, 7, 30, cases, use_filter=True)
            d["Holt-Winters"] = hwes_predict(df_sec, country, 30)
            d["Prophet"] = predict["yhat"].values
            # Здесь добавляй в Linear, Mean+, Holt предикты

            #####

            df = pd.concat([df, d], ignore_index=True)
        except:
            continue

    df.to_csv("tmp/predict.csv")



In [31]:
data_ = pd.read_csv("tmp/predict.csv")
# data_["Mean"] = 0
data_[data_.Country == 'Honduras']
# data_.Country.unique()

Unnamed: 0.1,Unnamed: 0,Date,Country,Mean_7,Holt_Winters,Prophet
0,0,2022-02-04,Honduras,315.607944,379716.17767,456.548368
1,1,2022-02-05,Honduras,315.607944,379861.0929,452.645061
2,2,2022-02-06,Honduras,315.607944,380109.92824,448.088367
3,3,2022-02-07,Honduras,315.607944,380096.421134,443.176556
4,4,2022-02-08,Honduras,315.607944,380410.817253,438.493282
5,5,2022-02-09,Honduras,315.607944,380555.951001,433.329024
6,6,2022-02-10,Honduras,315.607944,380805.004926,427.805901
7,7,2022-02-11,Honduras,315.607944,380791.716475,425.231192
8,8,2022-02-12,Honduras,315.607944,381106.331316,419.158911
9,9,2022-02-13,Honduras,315.607944,381251.683856,412.845689


In [32]:
all(data_['Holt_Winters'].isna())

False

In [33]:
hwes_predict(df_sec, 'Honduras', 30)



array([379716.17766953, 379861.09290048, 380109.92824017, 380096.42113442,
       380410.81725294, 380555.95100071, 380805.00492597, 380791.71647454,
       381106.33131618, 381251.68385586, 381500.95664186, 381487.88712003,
       381802.72096012, 381948.29256715, 382197.78448942, 382184.93417278,
       382499.98728703, 382645.7782372 , 382895.4895716 , 382882.85873612,
       383198.13140057, 383344.14197   , 383594.07299276, 383581.66191473,
       383897.15440577, 384043.38487095, 384293.53585862, 384281.34481471,
       384597.05740908, 384743.50804683])