In [42]:
import pandas as pd
import modules.preprocessing.sampling as sampling
import modules.preprocessing.scaling as scaling
import modules.constants as const

import numpy as np
import modules.training.LSTMmodels as LSTMmodels
import torch.nn as nn
import torch.optim as optim
import modules.training.training as training

import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit
import torch

import modules.utils as utils
import modules.plot_utils as plutils
import modules.plot_constants as pltconst
from modules.plot_constants import uzh_colors

from tqdm import tqdm
import os
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error


In [64]:
pltconst.set_plot_parameters()

In [43]:
# Input features
nft_predictions_dir = "/mnt/Ivana/Results/Tezos/NFT/Predictions/"
nft_metrics_dir = "/mnt/Ivana/Results/Tezos/NFT/Metrics/"

nft_dir = const.tezos_dir + "DataDuringProcessing/NFT/Target_tokens/"

NFT_name = "Lost control_Iskra Velitchkova"

market_df = pd.read_csv(const.input_X_dir + "Market.csv")
technical_df = pd.read_csv(const.input_X_dir + "TechnicalIndicators.csv")
dates = pd.read_csv(const.input_y_dir + "Dates.csv")

dates.Date =  pd.to_datetime(dates.Date)
end_date =  max(dates.Date)

def shorten_df(df, dates, start_date, end_date):
    if "Date" not in df.columns:
        df = pd.concat([dates, df], axis = 1)
    
    df.Date = pd.to_datetime(df.Date)
    df = df[(df.Date >= start_date) & (df.Date <= end_date)]
    df.drop("Date", axis = 1, inplace=True)
    return df


data_names = ["market", "TI", "NFT"]




In [44]:
TEST_SIZE = 0.2
WINDOW_SIZE = 10
STEP_SIZE = 1
OUTPUT_DIM = 1

mse_loss = nn.MSELoss()

N_EPOCHS = 80
N_HIDDEN = 256
LR = 0.001

DO_SEGMENTATION = False

In [47]:
def get_all_relevant_metrics(row, targets_all, CUTOFF = 0):
    prediction = row[4]
    targets = targets_all[-len(prediction):]
    
    if CUTOFF > 0:
        prediction = prediction[:CUTOFF]
        targets = targets[:CUTOFF]
        
    row["mse"] = mean_squared_error(y_true = targets, y_pred = prediction)
    row["rmse"] = mean_squared_error(y_true = targets, y_pred = prediction, squared= False)
    row["mae"] = mean_absolute_error(y_true = targets, y_pred = prediction)
    row["mape"] = mean_absolute_percentage_error(y_true = targets, y_pred = prediction)

    return row

In [53]:

all_results = []
print(f"Running the following configuration: n hidden 1 = {N_HIDDEN}, epochs = {N_EPOCHS}, lr = {LR},  segmentation = {DO_SEGMENTATION}")


for file_name in os.listdir(nft_dir):
    if file_name.startswith("Strange Creatures"): continue
    NFT_name = file_name.replace(".csv", "")
    nft_df = pd.read_csv(nft_dir + file_name)
    nft_df.Date = pd.to_datetime(nft_df.Date)

    start_date = min(nft_df.Date)

    nft_df_short = shorten_df(nft_df, dates, start_date, end_date)
    market_df_short = shorten_df(market_df, dates, start_date, end_date)
    technical_df_short = shorten_df(technical_df, dates, start_date, end_date)

    all_dfs = [market_df_short, technical_df_short, nft_df_short]
    y = nft_df_short.price.values.reshape(-1,1)
    
    dest_file_predictions = nft_predictions_dir + f"Predictions_{NFT_name}.npy"
    dest_file_metrics = nft_metrics_dir + f"Metrics_{NFT_name}.csv"

    all_predictions = []

    for i in tqdm(range(1, 8)):
        # Choose which data combinations to exclude: dont include data set combinations if they do not include either market data or TI data
        if i % 2 == 0: continue

        X_data, dict_chosen = utils.get_data_from_combination_number(i, all_dfs, data_names=data_names)
        X_train, y_train, X_test, y_test, scaler = sampling.prepare_input_data(X_data, y, test_size=TEST_SIZE, window_size=WINDOW_SIZE, step_size=STEP_SIZE, do_segmentation=DO_SEGMENTATION)
        train_loader = sampling.make_data_loader(X_train, y_train, batch_size=X_train.shape[0])

        model = LSTMmodels.LSTMSimple(input_size=X_train.shape[2], hidden_size=N_HIDDEN, output_size=OUTPUT_DIM)
        optimizer = optim.Adam(model.parameters(), lr=LR)

        # Train the model
        model, _ = training.train_model(model, train_loader, n_epochs=N_EPOCHS, optimizer=optimizer, loss_fn = mse_loss)
        
        # Predict on the full test set and unscale the values
        predictions, _ = training.make_prediction(model, X_test, y_test, mse_loss)
        predictions_unsc = scaler.inverse_transform(predictions.reshape(-1,1))

        all_predictions.append({**dict_chosen, **{"prediction": predictions_unsc.flatten()}})

        # Convert predictions to numpy format and save numpy file
        all_predictions_df = pd.DataFrame.from_dict(all_predictions)
    

    pred_all = all_predictions_df.apply(lambda x: get_all_relevant_metrics(x, y[-len(x[4]):]), axis = 1)
    pred_all.drop(pred_all.columns[4], axis = 1, inplace=True)
    
    pred_all.to_csv(dest_file_metrics, index = False)
    np.save(dest_file_predictions,all_predictions_df.values)

        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop("Date", axis = 1, inplace=True)


Running the following configuration: n hidden 1 = 256, epochs = 80, lr = 0.001,  segmentation = False


  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:34<00:00,  4.87s/it]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop("Date", axis = 1, inplace=True)
100%|██████████| 7/7 [00:45<00:00,  6.44s/it]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop("Date", axis = 1, inplace=True)
100%|██████████| 7/7 [00:43<00:00,  6.26s/it]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop("Date", axis = 1, inplace=True)
100%|██████████| 7/7 [00:40<00:00,  5.85s/it]


## Plot predictions

In [54]:
predictions = np.load(dest_file_predictions, allow_pickle=True)
pred = pd.DataFrame(predictions)
pred

Unnamed: 0,0,1,2,3,4
0,1,0,0,1,"[7.159170774992273, 7.183108475485501, 7.20418..."
1,3,0,1,1,"[5.64284667017527, 5.666270547740018, 5.653182..."
2,5,1,0,1,"[7.4860135397992895, 7.278619493425124, 6.7349..."
3,7,1,1,1,"[6.32293141435874, 6.026575275142432, 5.700425..."


In [1]:
for file_name in os.listdir(nft_predictions_dir):
    predictions = np.load(nft_predictions_dir + file_name, allow_pickle=True)


    y_df = pd.read_csv(nft_dir + file_name.replace("Predictions_", "").replace(".npy", "") + ".csv")
    y_df.Date = pd.to_datetime(y_df.Date)
    y_df = shorten_df(y_df, dates = dates, start_date=min(y_df.Date), end_date=end_date)
    y = y_df.price.values

    pred = pd.DataFrame(predictions)
    NFT_name  = file_name.replace(".csv", "")

    y_pred = pred.loc[0, 4]
    y_tgt = y[-len(y_pred):]
    x = y_df.Date.values[-len(y_pred):]
    plt.figure(figsize = (14, 8))
    plt.plot(x, y_tgt, color="blue", label = "Target")
    for i, row in pred.iterrows():
        y_pred = pred.loc[i, 4]
        indexes_included = np.where(pred.loc[i, 1:3].values == 1)[0]
        label = "+".join(np.array(data_names)[indexes_included])
        plt.plot(x, y_pred,  label=label, linestyle="--")

    plt.title(NFT_name)
    plt.grid(zorder=100, lw =0.5, color = 'lightgray')
    leg = plt.legend(frameon=True,facecolor='white', framealpha=1, loc='upper right', fontsize=12, ncol = 2)
    frame = leg.get_frame()
    frame.set_linewidth(0)
    plt.xlabel("Date")
    plt.ylabel("Price in Tezos")
    plt.show()

NameError: name 'os' is not defined