<a href="https://colab.research.google.com/github/iamMaverick/laughing-waffle/blob/master/darts_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install darts
!pip install pyyaml==5.4.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyyaml==5.4.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 5.2 MB/s 
[?25hInstalling collected packages: pyyaml
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 6.0
    Uninstalling PyYAML-6.0:
      Successfully uninstalled PyYAML-6.0
Successfully installed pyyaml-5.4.1


In [None]:
!pip install pymannkendall

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pymannkendall
  Downloading pymannkendall-1.4.2-py3-none-any.whl (12 kB)
Installing collected packages: pymannkendall
Successfully installed pymannkendall-1.4.2


In [None]:
#data processing
import pandas as pd
import numpy as np
# import tensorflow as tf
import os

#mongodb client
#from pymongo import MongoClient

# common set of functions
#import common

#data visualization functions
import matplotlib as mp
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10

#Machine learning
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
#statsmodels
from statsmodels.tsa.seasonal import seasonal_decompose, STL


#darts models
import darts
from darts import TimeSeries
from darts.metrics import mape
from darts import models as darts_models
from darts.utils.statistics import check_seasonality, remove_seasonality, plot_acf, remove_trend, stationarity_test_adf, stationarity_test_kpss
#model maker
import pickle

# from prophet import Prophet

In [None]:


def pre_process(df:pd.DataFrame)->pd.DataFrame:
    
    df.datetime = df.datetime.map(lambda x: x.replace("T", " "))
    df.datetime = df.datetime.map(lambda x: x[:-1])
    df.datetime = pd.to_datetime(df.datetime)
    df.close = df.close.astype('float')
    df.reset_index(inplace=True)
    df.set_index('datetime', inplace=True)
    return df

def convert_to_pd_series(df:pd.DataFrame)->pd.Series:
    series = pd.Series(
        df.close.values,
        index = pd.date_range(
            min(df.index),
            periods = len(df.index),
            freq='H'
        )
    )
    return series

def fill_missing_data(ts:TimeSeries)->TimeSeries:
    from darts.utils.missing_values import fill_missing_values
    ts = fill_missing_values(ts)
    return ts

def get_timeseries(obj)->TimeSeries:
    if isinstance(obj, pd.DataFrame):
        ts = fill_missing_data(TimeSeries.from_dataframe(obj))
    elif isinstance(obj, pd.Series):
        ts = TimeSeries.from_series(obj)
    
    return ts

def split_series(ts:TimeSeries)->tuple:
    train, val = ts.split_before(0.85)
    return (train , val)

def trend_test(ts:TimeSeries):
    import pymannkendall as mk
    test = mk.original_test(ts)
    plt.plot(test)
    print(test)

def stationarity_test(timeseries: pd.Series)-> None:
    from statsmodels.tsa.stattools import adfuller
    df_test = adfuller(timeseries, autolag='AIC')
    df_output = pd.Series(df_test[0:4],
                        index = [
                            'Test Statistics','p-value','Number of lags used','Number of observations'
                        ]
                        )
    print(df_output)

def remove_seasonality_trend(ts:TimeSeries)->TimeSeries:
    ts = remove_trend(ts)
    ts = remove_seasonality(ts)
    stationarity_test_adf(ts)
    return ts

def eval_model(model:darts_models, train:TimeSeries, val:TimeSeries)->pd.DataFrame:
    print(f'Filling model {str(model)}.\n')
    model.fit(train)
    print('Making predictions.\n')
    forecast = model.predict(len(val))
    predictions = forecast.pd_dataframe()
    print(f'Model {model} obtains MAPE {mape(val, forecast)}')
    return predictions


In [None]:


data_files = ['SANDUSDT_1h.csv','ETHUSDT_1h.csv','MANAUSDT_1h.csv']
cols = ['datetime','close']
df_predictions=df_train=df_val = pd.DataFrame()

df_main = pre_process(pd.read_csv(f'{data_files[0]}',usecols=cols))
series = convert_to_pd_series(df_main)
ts = remove_seasonality_trend(get_timeseries(series))
train , val = split_series(ts)
df_train, df_val = train.pd_dataframe(), val.pd_dataframe()

In [None]:
model = darts_models.NBEATSModel(input_chunk_length=6, output_chunk_length=3, random_state=42)
model.fit(train, epochs=50, verbose=True)

2022-07-14 08:40:13 darts.models.forecasting.torch_forecasting_model INFO: Train dataset contains 13943 samples.
2022-07-14 08:40:13 darts.models.forecasting.torch_forecasting_model INFO: Time series values are 64-bits; casting model to float64.
2022-07-14 08:40:13 pytorch_lightning.utilities.rank_zero INFO: GPU available: False, used: False
2022-07-14 08:40:13 pytorch_lightning.utilities.rank_zero INFO: TPU available: False, using: 0 TPU cores
2022-07-14 08:40:13 pytorch_lightning.utilities.rank_zero INFO: IPU available: False, using: 0 IPUs
2022-07-14 08:40:13 pytorch_lightning.utilities.rank_zero INFO: HPU available: False, using: 0 HPUs
2022-07-14 08:40:14 pytorch_lightning.callbacks.model_summary INFO: 
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | stacks        | ModuleList       | 6.1 M 


Training: 0it [00:00, ?it/s]