# 1. Initialization

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
import math

In [5]:
import statsmodels.api as sm

In [6]:
%matplotlib inline

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# 2. Data Loading and Pre-processing

## 2.1 Loading dataset and cleaning

In [8]:
def load_dataset(location = None):
    df = pd.read_csv('data.csv')
    if location is not None:
        df = df[df['location'] == location]
    return df

In [10]:
def get_stats(X):
    mean = sum(X) / len(X)
    sd = math.sqrt(sum([(xx - mean)*(xx - mean) for xx in X]) / (len(X)-1))
    return {'mean': mean, 'sd': sd, 'min': X.min(), 'max': X.max()}

In [11]:
def get_stats_all(data):
    stats = {}
    for i in range(1, 7):
        attr = f'x{i}'
        stats[attr] = get_stats(data[attr])
    
    return stats

In [12]:
def smoothen_outliers(X, mean, sd, sd_mult=3, win_size=7):
    wins_half = win_size // 2
    out = []
    for i, x in enumerate(X):
        if x < (mean - sd * sd_mult) or x > (mean + sd * sd_mult):
            smoothened_x = (X[i-wins_half:i] + X[i+1:i+wins_half+1]).sum() / (win_size-1)
            out.append(smoothened_x)
        else:
            out.append(x)
    return out

In [13]:
def smoothen_outliers_all(data, stats, sd_mult=3, win_size=7):
    for attr in stats:
        data[attr] = smoothen_outliers(data[attr], stats[attr]['mean'], stats[attr]['sd'], sd_mult, win_size)
    return data

In [14]:
def plot_features_single_location(data, location, stats=None, smoothen=True, outfile=None):
    fig, axs = plt.subplots(2,3, figsize=(16,10))
    fig.suptitle(f'Plots at location {location}')
    
    if stats is None:
        stats = get_stats_all(data)
    
    filtered_data = data[data['location'] == 6].copy()
    filtered_data = smoothen_outliers_all(filtered_data, stats)
    
    for i in range(6):
        a, b = i // 3, i % 3
        attr = f"x{i+1}"
        axs[a][b].plot(filtered_data[attr].tolist())
        axs[a][b].set_title(attr)
    
    
    if outfile:
        plt.savefig(outfile)
    
    plt.show()

In [15]:
def plot_correlation(data, location=None, outfile=None):
    if location is not None:
        filtered_data = data[data['location'] == 6]
    else:
        filtered_data = data
    filtered_data = filtered_data.drop(['location', 'date'], axis=1)
    corr = filtered_data.corr()
    return corr.style.background_gradient(cmap='coolwarm')

## 2.2 PyTorch Datasets

In [None]:
def create_dataset(data, attrs):
    data_ = []
    for attr in attrs:
        data_.append(np.array(data[attr]))
    data_ = np.cat(data_, axis=1)
    
    y_ = np.array(data['x6'])
    
    X = []
    y = []
    for i in range(data_.shape[0] - 21):
        X.append(data_[i, :])
        y.append(y_[i+1:i+21])
    
    

# 3. Evaluation

In [16]:
def mae(x, y):
    return np.absolute(x-y).sum() / x.shape[0]

In [17]:
def amae(preds_batch, targets_batch):
    return sum([mae(*row) for row in zip(preds_batch, targets_batch)]) / preds_batch.shape[0]

# 4. Modeling

In [18]:
dataset = load_dataset(location=6)

In [None]:
!pip install pystan==2.19.1.1
!pip install prophet

Collecting pystan==2.19.1.1
  Using cached pystan-2.19.1.1-cp38-cp38-win_amd64.whl (79.8 MB)
Installing collected packages: pystan


ERROR: Could not install packages due to an OSError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\aklag\\anaconda3\\envs\\torch-nlp\\Lib\\site-packages\\pystan\\stan\\src\\stan\\io\\var_context.hpp'
Consider using the `--user` option or check the permissions.



Collecting pystan==2.19.1.1
  Using cached pystan-2.19.1.1-cp38-cp38-win_amd64.whl (79.8 MB)
Installing collected packages: pystan
Successfully installed pystan-2.19.1.1


  ERROR: Command errored out with exit status 1:
   command: 'C:\Users\aklag\anaconda3\envs\torch-nlp\python.exe' -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'C:\\Users\\aklag\\AppData\\Local\\Temp\\pip-install-s0frvm6t\\prophet_38dc900f5ee4499d8c70cc7c1264142d\\setup.py'"'"'; __file__='"'"'C:\\Users\\aklag\\AppData\\Local\\Temp\\pip-install-s0frvm6t\\prophet_38dc900f5ee4499d8c70cc7c1264142d\\setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' bdist_wheel -d 'C:\Users\aklag\AppData\Local\Temp\pip-wheel-ybhmopjd'
       cwd: C:\Users\aklag\AppData\Local\Temp\pip-install-s0frvm6t\prophet_38dc900f5ee4499d8c70cc7c1264142d\
  Complete output (44 lines):
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib
  creating build\lib\prophet
  creating build\lib\prophet\stan_model
  Traceback (most recent call last):
    

Collecting prophet
  Using cached prophet-1.0.1.tar.gz (65 kB)
Collecting cmdstanpy==0.9.68
  Using cached cmdstanpy-0.9.68-py3-none-any.whl (49 kB)
Collecting pystan~=2.19.1.1
  Using cached pystan-2.19.1.1-cp38-cp38-win_amd64.whl (79.8 MB)
Collecting LunarCalendar>=0.0.9
  Using cached LunarCalendar-0.0.9-py2.py3-none-any.whl (18 kB)
Collecting holidays>=0.10.2
  Using cached holidays-0.13-py3-none-any.whl (172 kB)
Building wheels for collected packages: prophet
  Building wheel for prophet (setup.py): started
  Building wheel for prophet (setup.py): finished with status 'error'
  Running setup.py clean for prophet
Failed to build prophet
Installing collected packages: pystan, LunarCalendar, holidays, cmdstanpy, prophet


  ERROR: Command errored out with exit status 1:
   command: 'C:\Users\aklag\anaconda3\envs\torch-nlp\python.exe' -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'C:\\Users\\aklag\\AppData\\Local\\Temp\\pip-install-hrwn_s9y\\prophet_b7e0698c06d34baca403e53760e71e93\\setup.py'"'"'; __file__='"'"'C:\\Users\\aklag\\AppData\\Local\\Temp\\pip-install-hrwn_s9y\\prophet_b7e0698c06d34baca403e53760e71e93\\setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' bdist_wheel -d 'C:\Users\aklag\AppData\Local\Temp\pip-wheel-9ek_y4yn'
       cwd: C:\Users\aklag\AppData\Local\Temp\pip-install-hrwn_s9y\prophet_b7e0698c06d34baca403e53760e71e93\
  Complete output (44 lines):
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib
  creating build\lib\prophet
  creating build\lib\prophet\stan_model
  Traceback (most recent call last):
    

Collecting prophet
  Downloading prophet-1.0.1.tar.gz (65 kB)
Collecting Cython>=0.22
  Downloading Cython-0.29.28-py2.py3-none-any.whl (983 kB)
Collecting cmdstanpy==0.9.68
  Downloading cmdstanpy-0.9.68-py3-none-any.whl (49 kB)
Collecting pystan~=2.19.1.1
  Downloading pystan-2.19.1.1-cp38-cp38-win_amd64.whl (79.8 MB)
Collecting LunarCalendar>=0.0.9
  Downloading LunarCalendar-0.0.9-py2.py3-none-any.whl (18 kB)
Collecting convertdate>=2.1.2
  Downloading convertdate-2.4.0-py3-none-any.whl (47 kB)
Collecting holidays>=0.10.2
  Downloading holidays-0.13-py3-none-any.whl (172 kB)
Collecting setuptools-git>=1.2
  Downloading setuptools_git-1.2-py2.py3-none-any.whl (10 kB)
Collecting ujson
  Downloading ujson-5.2.0-cp38-cp38-win_amd64.whl (37 kB)
Collecting pymeeus<=1,>=0.3.13
  Downloading PyMeeus-0.5.11.tar.gz (5.4 MB)
Collecting hijri-converter
  Downloading hijri_converter-2.2.3-py3-none-any.whl (14 kB)
Collecting korean-lunar-calendar
  Downloading korean_lunar_calendar-0.2.1-py3-non