In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

from sklearn.preprocessing import MinMaxScaler
from copy import deepcopy as dc
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [None]:
# temp_path = '/workspaces/Futures-First/Prediction/LSTM/data/Corn/ZC Jul-Sep'
temp_path = '/workspaces/Futures-First/Prediction/LSTM/data/Copper/HG Dec-Mar'
# files = ['./data/2004.csv', './data/2005.csv', './data/2006.csv','./data/2007.csv', './data/2008.csv', './data/2009.csv','./data/2010.csv', './data/2011.csv', './data/2012.csv','./data/2013.csv', './data/2014.csv', './data/2015.csv','./data/2016.csv', './data/2017.csv', './data/2018.csv','./data/2019.csv', './data/2020.csv', './data/2021.csv','./data/2022.csv', './data/2023.csv', './data/2024.csv']  
# files = [temp_path +'/2016.csv',temp_path +'/2017.csv', temp_path +'/2018.csv',temp_path +'/2019.csv', temp_path +'/2020.csv',temp_path +'/2021.csv',temp_path +'/2022.csv',temp_path +'/2023.csv',temp_path +'/2024.csv',temp_path +'/2025.csv']  
files = [temp_path +'/2015.csv',temp_path +'/2016.csv',temp_path +'/2017.csv', temp_path +'/2018.csv',temp_path +'/2019.csv', temp_path +'/2020.csv',temp_path +'/2021.csv',temp_path +'/2022.csv',temp_path +'/2023.csv',temp_path +'/2024.csv']  


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers,
                            batch_first=True)

        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [None]:
tag_list = ['Open','High','Low','Close']
tag_index = 2

In [None]:
# Function updates
def train_one_epoch(train_loader, model, optimizer, loss_function, epoch):
    model.train(True)
    print(f'Epoch: {epoch + 1}')
    running_loss = 0.0

    for batch_index, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        output = model(x_batch)
        loss = loss_function(output, y_batch)
        running_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 100 == 99:  # print every 100 batches
            avg_loss_across_batches = running_loss / 100
            print(f'Batch {batch_index + 1}, Loss: {avg_loss_across_batches:.3f}')
            running_loss = 0.0
    print()

def validate_one_epoch(test_loader, model, loss_function):
    model.train(False)
    running_loss = 0.0

    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss.item()

    avg_loss_across_batches = running_loss / len(test_loader)

    print(f'Val Loss: {avg_loss_across_batches:.3f}')
    print('***************************************************')
    print()

class TimeSeriesDataset(Dataset):
        def __init__(self, X, y):
            self.X = X
            self.y = y

        def __len__(self):
            return len(self.X)

        def __getitem__(self, i):
            return self.X[i], self.y[i]
# Main loop
for file in files[:-1]:  # Exclude the current year's data for training
    # Load and preprocess data
    data = pd.read_csv(file)
    data['Date'] = pd.to_datetime(data['Date'])
    data = data.rename(columns={tag_list[tag_index] :'Price'})
    data = data[['Date', 'Price']]
    # plt.plot(data['Date'], data['Price'])
    # plt.show()  # Show the plot for each file

    # Prepare the data for training
    def prepare_dataframe_for_lstm(df, n_steps):
        df = dc(df)

        df.set_index('Date', inplace=True)

        for i in range(1, n_steps+1):
            df[f'Price(t-{i})'] = df['Price'].shift(i)

        df.dropna(inplace=True)

        return df

    lookback = 10
    shifted_df = prepare_dataframe_for_lstm(data, lookback)

    shifted_df_as_np = shifted_df.to_numpy()
    scaler = MinMaxScaler(feature_range=(-1, 1))
    shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)

    X = shifted_df_as_np[:, 1:]
    y = shifted_df_as_np[:, 0]
    X = dc(np.flip(X, axis=1))

    X = X.reshape((-1, lookback, 1))
    y = y.reshape((-1, 1))

    X = torch.tensor(X).float()
    y = torch.tensor(y).float()

    train_dataset = TimeSeriesDataset(X, y)
    
    batch_size = 16
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    for _, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        print(x_batch.shape, y_batch.shape)
        break

    model = LSTM(1, 4, 1)
    model.to(device)
    print(model)

    learning_rate = 0.001
    num_epochs = 100
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        train_one_epoch(train_loader, model, optimizer, loss_function, epoch)
        validate_one_epoch(train_loader, model, loss_function)

    torch.save(model.state_dict(), f'lstm_model_{file.split("/")[-1].split(".")[0]}.pt')
    with torch.no_grad():
        predicted = model(X.to(device)).to('cpu').numpy()

    plt.plot(y, label='Actual Close')
    plt.plot(predicted, label='Predicted Close')
    plt.xlabel('Day')
    plt.ylabel('Close')
    plt.legend()
    plt.show()


In [None]:
import torch
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from copy import deepcopy as dc
import plotly.graph_objects as go

# Function to prepare data for LSTM
def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    df.set_index('Date', inplace=True)
    for i in range(1, n_steps+1):
        df[f'Price(t-{i})'] = df['Price'].shift(i)
    df.dropna(inplace=True)
    return df

# Load and prepare the data
data = pd.read_csv(files[-1])
data['Date'] = pd.to_datetime(data['Date'])
data = data.rename(columns={tag_list[tag_index]:'Price'})
data = data[['Date', 'Price']]

lookback = 10
shifted_df = prepare_dataframe_for_lstm(data, lookback)

shifted_df_as_np = shifted_df.to_numpy()
scaler = MinMaxScaler(feature_range=(-1, 1))
shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)

X = shifted_df_as_np[:, 1:]
y = shifted_df_as_np[:, 0]
X = dc(np.flip(X, axis=1))

X = X.reshape((-1, lookback, 1))
y = y.reshape((-1, 1))

X_test = torch.tensor(X).float()
y_test = torch.tensor(y).float()

# Load the trained model
model_file = '/workspaces/Futures-First/Prediction/LSTM/lstm_model_2023.pt'
model = LSTM(input_size=1, hidden_size=4, num_stacked_layers=1).to(device)
model.load_state_dict(torch.load(model_file))
model.eval()

# Predict the last part of the dataset
test_predictions = model(X_test.to(device)).detach().cpu().numpy().flatten()

# Inverse transform the predictions
dummies = np.zeros((X_test.shape[0], lookback+1))
dummies[:, 0] = test_predictions
dummies = scaler.inverse_transform(dummies)
test_predictions = dc(dummies[:, 0])

dummies = np.zeros((X_test.shape[0], lookback+1))
dummies[:, 0] = y_test.flatten()
dummies = scaler.inverse_transform(dummies)
new_y_test = dc(dummies[:, 0])

# Prepare for predicting future prices
last_window = X_test[-1].cpu().numpy()

future_predictions = []

# Predict the next 5 days
for _ in range(10):
    # Predict the next price
    pred = model(torch.tensor(last_window).unsqueeze(0).float().to(device)).detach().cpu().numpy().flatten()[0]
    
    # Scale back to original price
    dummies = np.zeros((1, lookback + 1))
    dummies[:, 0] = pred
    dummies = scaler.inverse_transform(dummies)
    
    pred_original = dummies[:, 0][0]
    future_predictions.append(pred_original)
    
    # Update the window for the next prediction
    last_window = np.roll(last_window, -1)
    last_window[-1, 0] = pred

# Combine all predictions
all_predictions = np.concatenate((test_predictions, np.array(future_predictions)))

# Create plotly figure
fig = go.Figure()

# Add traces for actual and predicted values
fig.add_trace(go.Scatter(
    x=np.arange(len(new_y_test)),
    y=new_y_test,
    mode='lines',
    name='Actual Close'
))

fig.add_trace(go.Scatter(
    x=np.arange(len(test_predictions)),
    y=test_predictions,
    mode='lines',
    name='Predicted Close (Past)'
))

fig.add_trace(go.Scatter(
    x=np.arange(len(test_predictions), len(all_predictions)),
    y=future_predictions,
    mode='lines',
    name='Predicted Close (Future)',
    # line=dict(dash='dash')
))

# Update layout
fig.update_layout(
    title="ZW Futures Price Prediction",
    xaxis_title="Day",
    yaxis_title="Price",
    legend_title="Legend"
)

# Show plot
fig.show()


In [None]:
future_num = [f"{number:.4f}" for number in future_predictions]
future_num,tag_list[tag_index]

HG-

OPEN =    -0.0390,-0.0404,-0.0393,-0.0384

HIGH =    -0.0385,-0.0365,-0.0355,-0.0346

LOW =     -0.0420,-0.0419,-0.0409,-0.0401

CLOSE=    -0.0405,-0.0388,-0.0379,-0.0370




ZW Dec Mar- 

OPEN -  -21.00
        -19.0099  -18.7063  -18.4336

HIGH - -20.75
       -18.3430   -18.1017  -17.9478

LOW-    -21.25
        -19.6267'  '-19.2615'  '-18.9909'

CLOSE-  -21.00
        -19.3360  -18.9063  -18.6242

ZC
OPEN - 1.25
       1.1289  1.3446  1.5681

HIGH - 3.25
       0.9690  1.0174  1.0735

LOW-   1.25
       0.5462  0.7300  0.9205

CLOSE- 3.00
       2.0662     2.7128    3.4458

In [None]:
import plotly.graph_objs as go

# OHLC data for two days
ohlc = {
    'Open': [-0.0390,-0.0404,-0.0393,-0.0384],
    'High': [-0.0385,-0.0365,-0.0355,-0.0346],
    'Low': [-0.0420,-0.0419,-0.0409,-0.0401],
    'Close': [-0.0405,-0.0388,-0.0379,-0.0370]
}

# Dates for the candlesticks
dates = ['2024-08-25','2024-08-26', '2024-08-27','2024-08-28']

# Create a candlestick chart
candle = go.Candlestick(
    x=['T_real','T-1','T','T+1'],  
    open=ohlc['Open'],
    high=ohlc['High'],
    low=ohlc['Low'],
    close=ohlc['Close']
)

# Create a figure and add the candlestick
fig = go.Figure(data=[candle])

# Update layout to show titles
fig.update_layout(title='Candlesticks', xaxis_title='Date', yaxis_title='Price')

# Show the figure
fig.show()
