In [1]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import numpy as np
from icecream import ic


def plot_data(df, x_label, y_label, x_col_name, y_col_name):
    fig, ax = plt.subplots()
    ax.set_ylabel(x_label, fontsize=12)
    ax.set_xlabel(y_label, fontsize=12)
    ax.plot(data_frame[x_col_name], data_frame[y_col_name])
    plt.show()

def process_crypto_data(source):

    df = pd.read_csv(source)
    df = df[["time_close", "price_close", "volume_traded", "trades_count"]]
    
    df['date_time'] = df.apply(lambda row: row.time_close.replace('T', ' ').split('.')[0], axis=1)
#     print(df.head())
    # df = df[['date_time', 'sin_hour', 'cos_hour', 'sin_day', 'cos_day', 'sin_month', 'cos_month', 'price_close', 'volume_traded', 'trades_count']]
    
    # encoding the timestamp data cyclically
    timestamps = df['date_time']
    timestamps_hour = np.array([float(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').hour) for t in timestamps])
    timestamps_day = np.array([float(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').day) for t in timestamps])
    timestamps_month = np.array([float(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').month) for t in timestamps])
    timestamps_year = np.array([int(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').year) for t in timestamps])

    hours_in_day = 24
    days_in_month = 30
    month_in_year = 12

    df['sin_hour'] = np.sin(2*np.pi*timestamps_hour/hours_in_day)
    df['cos_hour'] = np.cos(2*np.pi*timestamps_hour/hours_in_day)
    df['sin_day'] = np.sin(2*np.pi*timestamps_day/days_in_month)
    df['cos_day'] = np.cos(2*np.pi*timestamps_day/days_in_month)
    df['sin_month'] = np.sin(2*np.pi*timestamps_month/month_in_year)
    df['cos_month'] = np.cos(2*np.pi*timestamps_month/month_in_year)
    df['year'] = timestamps_year
    
    df = df.fillna(-1) # specifically for weather_precip_path10min which has NaN
#     ic(df)
    return df

In [2]:
df = process_crypto_data('./Data/BTC_Daily_History_20150101_20210427.csv')
split_point = int(len(df)*0.8)
df_train = df[:split_point]
df_test = df[split_point:]
df_train.to_csv('./Data/btc_daily_train.csv')
df_test.to_csv('./Data/btc_daily_test.csv')

In [3]:
from model import Transformer
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import logging
import time # debugging
from plot import plot_training_crypto, plot_loss
from helpers import log_loss, clean_directory
from joblib import load
from icecream import ic
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import math, random

import argparse
# from train_teacher_forcing import *
from btc_train_with_sampling import transformer
from CryptoDataLoader import CryptoDataSet
from torch.utils.data import DataLoader
import torch.nn as nn
import torch
from helpers import clean_directory
from btc_inference import inference

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s %(message)s", datefmt="[%Y-%m-%d %H:%M:%S]")
logger = logging.getLogger(__name__)

def flip_from_probability(p):
    return True if random.random() < p else False

In [4]:
epoch = 30
k = 60
batch_size = 1
# frequency = 100
training_length = 30 # 1 month data for training 
forecast_window = 1 # 1 day data for prediction 
train_csv = "btc_daily_train.csv"
test_csv = "btc_daily_test.csv"
path_to_save_model = "save_model/"
path_to_save_loss = "save_loss/"
path_to_save_predictions = "save_predictions/"
device = "cpu"

train_dataset = CryptoDataSet(csv_name = train_csv, root_dir = "Data/", training_length = training_length, forecast_window = forecast_window)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_dataset = CryptoDataSet(csv_name = test_csv, root_dir = "Data/", training_length = training_length, forecast_window = forecast_window)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

In [5]:
# training
# clean_directory()
# best_model = transformer(train_dataloader, epoch, k, path_to_save_model, path_to_save_loss, path_to_save_predictions, device)

[2021-08-15 19:38:17] [INFO] btc_train_with_sampling Epoch: 0
[2021-08-15 19:38:19] [INFO] CryptoDataLoader idx is --- 1100, start is --- 1306
[2021-08-15 19:38:19] [INFO] CryptoDataLoader idx is --- 100, start is --- 261
[2021-08-15 19:38:23] [INFO] CryptoDataLoader idx is --- 1300, start is --- 377
[2021-08-15 19:38:24] [INFO] CryptoDataLoader idx is --- 1000, start is --- 1178
[2021-08-15 19:38:24] [INFO] CryptoDataLoader idx is --- 1200, start is --- 1081
[2021-08-15 19:38:25] [INFO] CryptoDataLoader idx is --- 600, start is --- 1241
[2021-08-15 19:38:29] [INFO] CryptoDataLoader idx is --- 1700, start is --- 31
[2021-08-15 19:38:30] [INFO] CryptoDataLoader idx is --- 1400, start is --- 1766
[2021-08-15 19:38:33] [INFO] CryptoDataLoader idx is --- 500, start is --- 69
[2021-08-15 19:38:34] [INFO] CryptoDataLoader idx is --- 400, start is --- 595
[2021-08-15 19:38:35] [INFO] CryptoDataLoader idx is --- 900, start is --- 1291
[2021-08-15 19:38:36] [INFO] CryptoDataLoader idx is --- 80

In [5]:
best_model = 'best_train_13.pth'

In [6]:
inference(path_to_save_predictions, forecast_window, test_dataloader, device, path_to_save_model, best_model)

[2021-08-15 20:14:30] [INFO] btc_inference data len is 462
[PLOT 0/25]:   0%|          | 0/462 [00:00<?, ?it/s, data length=1][2021-08-15 20:14:30] [INFO] btc_inference prediction is tensor([[[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]],

        [[0.4519]]], dtype=torch.float64)
[PLOT 0/25]:   0%|          | 1/462 [00:00<00:19, 23.74it/s, data length=1]


IndexError: index 1 is out of bounds for dimension 0 with size 1

In [10]:
for i in range(1):
    print(i)

0
