# Function Libraries

In [None]:
## Install Libraries
!pip install fastapi python-multipart uvicorn
!pip install -U kaleido

In [None]:
## Import Libraries
# These are used to export an image
import plotly.io
from plotly.io import to_image
# These will be used to load data
import pandas as pd
import numpy as np
# These will be used to create images
from datetime import datetime
import matplotlib.pyplot as plt
# This is the library for candlestick charts
import plotly.graph_objects as go
# Misc. Libraries
import os
import glob

# Labeling Function

In [None]:
def labeling(df):
    df_new_col = pd.DataFrame()
    for date in df['Date'].unique():
        # Filter so that it does not calculate price change from 1 day to the next
        single_day = df[df['Date'] == date].copy()
        single_day.dropna(subset=['Close_2m'], inplace=True)
        # Calculate the 6 minute change in price
        single_day['SixMinChange'] = single_day['Close_2m'].shift(-3) - single_day['Close_2m']
        # concatenate data into a new data frame
        df_new_col = pd.concat([df_new_col, single_day], ignore_index=True)

    df = df_new_col.copy()
    # Labeling conditions: top 33% are considered bull, bottome 33 percent are considered bear
    percentiles = [.67, .33]

    # This calculates the price to seperate labels on
    bull_condition = df['SixMinChange'].quantile(percentiles[0])
    bear_condition = df['SixMinChange'].quantile(percentiles[1])

    # Create 'Label' column based on conditions
    df['SixMinLabel'] = 'Neutral'
    df.loc[df['SixMinChange'] > bull_condition, 'SixMinLabel'] = 'Bullish'
    df.loc[df['SixMinChange'] < bear_condition, 'SixMinLabel'] = 'Bearish'

    # Drop NaNs after calculations
    df.dropna(subset=['SixMinChange'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

# Momentum Indicator Creation

In [None]:
def momentum_columns(df):
  df_new_cols = pd.DataFrame()
  for date in df['Date'].unique():
    # Find the Moving Averages and Hourly Change
    single_day = df[df['Date'] == date].copy()
    single_day['TenMinMovingAvg'] = single_day['Close_2m'].rolling(window=5).mean()
    single_day['TwentyMinMovingAvg'] = single_day['Close_2m'].rolling(window=10).mean()
    single_day['ThirtyMinMovingAvg'] = single_day['Close_2m'].rolling(window=15).mean()
    single_day['HourChange'] = single_day['Close_2m'].shift(30) - single_day['Close_2m']
    df_new_cols = pd.concat([df_new_cols, single_day], ignore_index=True)
  df = df_new_cols
  ## Create Labels for Dummy Variables
  # 10 Minute MA
  df['TenMinMALabel'] = 'Neutral'
  df.loc[100*df['Close_2m'] > 100*df['TenMinMovingAvg'], 'TenMinMALabel'] = 'Above'
  df.loc[100*df['Close_2m'] < 100*df['TenMinMovingAvg'], 'TenMinMALabel'] = 'Below'

  # 20 Minute MA
  df['TwentyMinMALabel'] = 'Neutral'
  df.loc[100*df['Close_2m'] > 100*df['TwentyMinMovingAvg'], 'TwentyMinMALabel'] = 'Above'
  df.loc[100*df['Close_2m'] < 100*df['TwentyMinMovingAvg'], 'TwentyMinMALabel'] = 'Below'

  # Thirty Minute MA
  df['ThirtyMinMALabel'] = 'Neutral'
  df.loc[100*df['Close_2m'] > 100*df['ThirtyMinMovingAvg'], 'ThirtyMinMALabel'] = 'Above'
  df.loc[100*df['Close_2m'] < 100*df['ThirtyMinMovingAvg'], 'ThirtyMinMALabel'] = 'Below'

  # Ten Cross Twently Label
  df['TenCrossTwenty'] = 'Neutral'
  df.loc[100*df['TenMinMovingAvg'] > 100*df['TwentyMinMovingAvg'], 'TenCrossTwenty'] = 'Above'
  df.loc[100*df['TenMinMovingAvg'] < 100*df['TwentyMinMovingAvg'], 'TenCrossTwenty'] = 'Below'

  # Ten Cross Twently Label
  df['TenCrossThirty'] = 'Neutral'
  df.loc[100*df['TenMinMovingAvg'] > 100*df['ThirtyMinMovingAvg'], 'TenCrossThirty'] = 'Above'
  df.loc[100*df['TenMinMovingAvg'] < 100*df['ThirtyMinMovingAvg'], 'TenCrossThirty'] = 'Below'

  # Hourly Change
  df['HourChangeLabel'] = 'Neutral'
  df.loc[10*df['HourChange'] > 0, 'HourChangeLabel'] = 'Above'
  df.loc[10*df['HourChange'] < 0, 'HourChangeLabel'] = 'Below'

  # Drop nulls
  df.dropna(subset=['HourChange'], inplace = True)
  df = df.reset_index()
  return df

# Data Cleaning

In [None]:
def intraday_cleaning(data):
    # Change to datetime data type, normalize to utc time zone
    data['Datetime'] = pd.to_datetime(data['Datetime'], utc=True)
    # Create a Date column
    data['Date'] = data['Datetime'].dt.date
    # Create a time column
    data['Time'] = data['Datetime'].dt.time
    # Adj Close is not used and Datetime become repetitive
    data.drop(['Datetime', 'Adj Close'], inplace=True, axis=1)
    return data

# Candlestick Creation

In [None]:
def candle_sticks(data, image_folder_base, ticker, candles, step):
    # Assuming 'Date' and 'Label' columns exist in the dataframe
    unique_dates = data['Date'].unique()

    for date in unique_dates:
        day_df = data.loc[data['Date'] == date]

        # Select 10-period sequences
        start_candle = 0
        end_candle = candles

        while end_candle <= len(day_df):
            # Filter the dataframe for the sequence
            sequence = day_df.iloc[start_candle:end_candle]

            # Create candlestick chart
            fig = go.Figure(data=[go.Candlestick(x=sequence['Time'],
                                                 open=sequence['Open_2m'],
                                                 high=sequence['High_2m'],
                                                 low=sequence['Low_2m'],
                                                 close=sequence['Close_2m'])])

            # Get label for the last candlestick in the sequence
            label = sequence['SixMinLabel'].iloc[-1]
            output_folder = f'{image_folder_base}/{label}'

            # Write to a JPEG file
            image_file = f"{output_folder}/candles_{date}_{ticker}_{start_candle}_{end_candle}_label_{label}.jpeg"
            fig.write_image(image_file)

            # Move to the next sequence
            start_candle += step
            end_candle += step

# Candlestick Image Dataset

This is the function to get load images into a dataset using the folders in google drive

In [None]:
class CandlestickDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        label_encoder = LabelEncoder()  # Initialize label encoder

        for label in os.listdir(self.root_dir):
            label_dir = os.path.join(self.root_dir, label)
            if os.path.isdir(label_dir):
                for image_file in os.listdir(label_dir):
                    self.image_paths.append(os.path.join(label_dir, image_file))
                    self.labels.append(label)  # Store original labels

        # Encode labels
        self.labels = label_encoder.fit_transform(self.labels)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        # Open image and convert to RGB (if grayscale)
        image = Image.open(img_path)
        image = image.convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

# Model

In [None]:
## Model Libraries
# PyTorch and dependencies
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
# Model
import torchvision.models as models
from torchvision.models import densenet121
# For image/data loading and export
from PIL import Image
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

This is a sample of the work flow to get images from a folder, into the model, and output predictions

In [None]:
## This only works if working in colab, connects google drive where everything is stored
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

## This is the data preprocessing function
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
test_batch_size = 256

## This is the Nasdaq filepath for google drive
nq_path =  '/content/drive/MyDrive/Algo_Trader/FuturesTrading/CNN Model/Images/TestImages/NQF'
## This transforms the images into a dataset using the candelstickdataset function and preprocessing function
nq_set = CandlestickDataset(root_dir=nq_path, transform=test_transform)
## This loads the dataset into a dataloader for more efficient training/testing
nq_loader = DataLoader(nq_set, batch_size=test_batch_size, pin_memory=True)

In [None]:
## Setting up the model
## This is the file path to the model weights
model_weights = '/content/drive/MyDrive/Algo_Trader/FuturesTrading/CNN Model/Data/Weights/DenseNet-121_epoch_19.pt'

## This loads the model architecture
model = densenet121(pretrained=False)

## The original architecture has 1000 neurons in the output layer, we only need 3 so this adjusts that
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 3)

## This loads the weights into the model
loaded_weights = torch.load(model_weights)
model.load_state_dict(loaded_weights)

## This moves the model to the GPU
model.to(device)

In [None]:
all_predictions = []  # Store predictions for individual images
output_values = []  # Store output values for individual images
true_labels = [] # Store the true labels for individual images
model.eval()  # Set model to evaluation mode

## This is the inference/prediction code
with torch.no_grad():
    # Takes the images and labels in the loader and stores them then moves them to the GPU
    for images, labels in nq_loader:
        images, labels = images.to(device), labels.to(device)

        # Predicts values for each class
        outputs = model(images)

        # Calculates the probabilities for each class using Softmax
        probabilities = F.softmax(outputs, dim=1)

        # Finds the highest probability for each image
        _, predicted = probabilities.max(1)

        # Collect predictions and output values
        all_predictions.extend(predicted.cpu().numpy())
        output_values.extend(probabilities.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

# Flatten the output values list
output_values_flat = np.array(output_values).reshape(-1, 3)  # Assuming 3 classes

# Create a DataFrame
data = {
    'Predicted_Class': all_predictions,
    'Actual_Class':true_labels,
    'Class_0_Score': output_values_flat[:, 0],
    'Class_1_Score': output_values_flat[:, 1],
    'Class_2_Score': output_values_flat[:, 2]
}

# Create a DataFrame with above outputs generated by the model
df = pd.DataFrame(data)

# Export to a CSV file
df.to_csv('/content/drive/MyDrive/Algo_Trader/FuturesTrading/CNN Model/Data/Results/model_results.csv')