In [2]:
import cv2
import pandas as pd
import re
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, precision_score, recall_score, fbeta_score, roc_auc_score
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model
from keras.layers import Conv2D, LeakyReLU, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.initializers import GlorotUniform

2023-12-30 16:15:21.464138: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
TIMEFRAMES = [14, 30, 90, 180, 365]
PREDICTIONS = [5, 30, 90]
IMG_TYPES = ['OHLC', 'ColoredOHLC', 'Line', 'AlgoTrading']

labels = pd.read_csv('labels.csv')

In [31]:
# Find bounding boxes of all 4 image sizes
def find_bounding_box(img_path):
    # Load the image
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Find all non-black pixels
    coords = cv2.findNonZero(gray)
    
    # Find the bounding box of those pixels
    x, y, w, h = cv2.boundingRect(coords)
    print(x, y, w, h)

# Define the paths to your images
image_paths = [
    'images/Line/AI&Robotics_2019-01-15 00:00:00_14.png',
    'images/Line/AI&Robotics_2019-01-24 00:00:00_30.png',
    'images/Line/AI&Robotics_2019-03-28 00:00:00_90.png',
    'images/Line/AI&Robotics_2019-06-20 00:00:00_180.png', 
    'images/Line/AI&Robotics_2019-12-26 00:00:00_365.png'   
]

# Crop each image and save the result
for path in image_paths:
    cropped_image = find_bounding_box(path)

80 100 85 120
80 100 132 120
80 100 226 120
80 100 414 120
80 100 602 120


In [5]:
# Crop the images using the bounding boxes
def crop_image(img_path):
    # Load the image in grayscale
    img = cv2.imread(img_path, 0)

    # Check if the image was loaded correctly
    if img is None:
        raise ValueError(f"Image at {img_path} not found. Please check the path.")

    # Use regular expression to match numbers followed by ".png" at the end of the filename
    match = re.search(r'(\d+)(?=\.png$)', img_path)
    
    # Check if we found a match
    if match:
        # Extract the number from the matched group
        number = int(match.group(1))
        
        # Check if the number is one of the specified values
        if number == 14:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+85]
        elif number == 30:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+132]
        elif number == 90:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+226]
        elif number == 180:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+414]
        elif number == 365:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+602]
    return crop

# Example usage:
filenames = labels['Image'].values.tolist()

# Testing the function with the provided list of filenames
for name in filenames:
    try:
        cropped_image = crop_image(name)
        # Construct the new path for the cropped image
        new_path = name.replace('.png', '_cropped.png')
        # Save the cropped image
        cv2.imwrite(new_path, cropped_image)
    except ValueError as e:
        print(e)


In [6]:
# Create a new column called 'Image' that contains the path to the cropped image but only if they 
labels['Image'] = labels['Image'].str.replace('.png', '_cropped.png')

  labels['Image'] = labels['Image'].str.replace('.png', '_cropped.png')


In [7]:
# Function to load and convert an image to grayscale
def load_image(image_path):
    # Load image in grayscale
    image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Unable to load image at path: {image_path}")
    return image

filenames = labels['Image'].values.tolist()

images = []

for name in filenames:
    try:
        img = load_image(name)
        images.append(img)
    except ValueError as e:
        print(e)

# Add a new column to the labels DataFrame to store the image arrays
labels['Image_Array'] = images

In [8]:
# Sort the DataFrame by date
labels['Date'] = labels['Image'].str.extract(r'(\d{4}-\d{2}-\d{2})')
labels['Date'] = pd.to_datetime(labels['Date'])
labels = labels.sort_values(by='Date') 
print(labels.head())

                                                  Image  TimePrediction  \
0     images/OHLC/S&P500_2019-02-01 00:00:00_14_crop...               5   
1149  images/OHLC/DAX_2019-02-01 00:00:00_180_croppe...              90   
6531  images/Line/EmergingMarkets_2019-02-01 00:00:0...              30   
6530  images/Line/EmergingMarkets_2019-02-01 00:00:0...               5   
6500  images/Line/EmergingMarkets_2019-02-01 00:00:0...               5   

       LastPrice  FuturePrice  Label  \
0     270.059998   270.140015      1   
1149   25.042000    27.209999      1   
6531   42.759998    42.549999      0   
6530   42.759998    42.400002      0   
6500   42.759998    42.400002      0   

                                            Image_Array       Date  
0     [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2019-02-01  
1149  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2019-02-01  
6531  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13... 2019-02-01  
6530  [[0, 0, 0, 0, 0, 0, 0, 0, 

In [9]:
# Show the number of 1s and 0s in the dataset
print(labels['Label'].value_counts())

1    6620
0    3780
Name: Label, dtype: int64


In [39]:
'''def create_cnn_model(num_blocks, initial_filters=64, input_shape=()):
    model = Sequential()

    for i in range(num_blocks):
        # Adjust the number of filters
        filters = initial_filters * (2 ** i)

        # Set dilation rate and strides
        strides = (1, 3)

        # Add Convolutional layer
        model.add(Conv2D(filters, kernel_size=(5, 3), strides=strides, padding='same',
                         input_shape=input_shape if i == 0 else model.output_shape[1:]))

        # Add LeakyReLU layer
        model.add(LeakyReLU(alpha=0.01))

        # Add MaxPooling layer only if dimensions allow
        if model.output_shape[1] >= 2 and model.output_shape[2] >= 2:
            model.add(MaxPooling2D(pool_size=(2, 2)))

    # Add Flatten and Dense layers for final prediction
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='softmax')) 

    # Compile the model with binary cross-entropy loss and Adam optimizer
    model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

    return model'''

In [17]:
def create_cnn_model(num_blocks, initial_filters=64, input_shape=()):
    # Determine the number of blocks based on the input image size
    if num_blocks == 2:
        dilation_rate = (1, 1)
    elif num_blocks == 3:
        dilation_rate = (1, 2)
    else:
        dilation_rate = (1, 3)

    model = Sequential()

    for i in range(num_blocks):
        # Adjust the number of filters
        filters = initial_filters * (2 ** i)

        # Add Convolutional layer
        if i == 0:  # Apply specific strides and dilation rate only for the first layer
            model.add(Conv2D(filters, kernel_size=(5, 3), strides=(1, 3), padding='same', 
                             input_shape=input_shape if i == 0 else None))
            model.add(Conv2D(filters, kernel_size=(5, 3), strides=(1, 1), dilation_rate=dilation_rate, padding='same', 
                             input_shape=input_shape if i == 0 else None))
        else:
            model.add(Conv2D(filters, kernel_size=(5, 3), padding='same'))

        # Add LeakyReLU layer
        model.add(LeakyReLU(alpha=0.01))

        # Add MaxPooling layer
        model.add(MaxPooling2D(pool_size=(2, 1)))

    # Add Flatten and Dense layers for final prediction
    dense_units = {2: 64, 3: 128, 4: 264}.get(num_blocks, 64) 
    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [18]:
# Train and evaluate the model for each img_type, timeframe, and prediction

evaluation_df = pd.DataFrame(columns=['Image_Type', 'Timeframe', 'Prediction', 'Accuracy', 'Precision', 'Recall', 'F1_Score', 'Hit_Rate', 'Average_RoR'])

for img_type in IMG_TYPES:
    for timeframe in TIMEFRAMES:
        for prediction in PREDICTIONS:
            if prediction < timeframe:
                print(f"Predicting {prediction} days ahead using {img_type} images with {timeframe} days timeframe.")
                
                # Filter your data based on prediction, img_type, and timeframe
                data = labels[(labels['TimePrediction'] == prediction) & (labels['Image'].str.contains(img_type)) & (labels['Image'].str.contains(f'_{timeframe}_'))]
                data = data.reset_index(drop=True)  # Reset the index to maintain temporal order
                
                # Create and compile your CNN model based on timeframe
                if timeframe == 14:
                    model = create_cnn_model(2, input_shape=(120, 85, 1))
                elif timeframe == 30:
                    model = create_cnn_model(3, input_shape=(120, 132, 1))
                elif timeframe == 90:
                    model = create_cnn_model(3, input_shape=(120, 226, 1))
                elif timeframe == 180:
                    model = create_cnn_model(4, input_shape=(120, 414, 1))
                elif timeframe == 365:
                    model = create_cnn_model(4, input_shape=(120, 602, 1))
                
                X = np.array(data['Image_Array'].tolist()) / 255.0
                lastPrice = np.array(data['LastPrice'].tolist())
                futurePrice = np.array(data['FuturePrice'].tolist())
                y = data['Label']
                # Split the data into training and validation sets in temporal order
                split_index = int(len(X) * 0.7)
                X_train, X_test = X[:split_index], X[split_index:]
                y_train, y_test = y[:split_index], y[split_index:]
                lastPrice_test = lastPrice[split_index:]
                futurePrice_test = futurePrice[split_index:]

                # Shuffle test data
                X_test, y_test, lastPrice_test, futurePrice_test = shuffle(X_test, y_test, lastPrice_test, futurePrice_test, random_state=0)
                
                # Train the model on 10 windows of data
                window_size = len(X_train)/10
                num_windows = 10

                for epoch in range(10):  # 10 epochs
                    start = 0
                    end = int(window_size * (epoch + 1))
                    X_temp = X_train[start:end]
                    y_temp = y_train[start:end]
                    split_index = int(len(X) * 0.9)
                    X_train_temp, X_val_temp = X_temp[:split_index], X_temp[split_index:]
                    y_train_temp, y_val_temp = y_temp[:split_index], y_temp[split_index:]
                    model.fit(X_train_temp, y_train_temp, batch_size=32, epochs=1, validation_data=(X_val_temp, y_val_temp))            
                
                # Evaluate the model on test data
                y_pred = model.predict(X_test)
                # Convert predictions to binary: if > 0.5 then 1 else 0
                y_pred_binary = np.where(y_pred > 0.5, 1, 0)
                
                accuracy = accuracy_score(y_test, y_pred_binary)
                precision = precision_score(y_test, y_pred_binary)
                recall = recall_score(y_test, y_pred_binary)
                f1_score = fbeta_score(y_test, y_pred_binary, beta=1)

                y_test_array = y_test.values.ravel()  # Convert y_test to a 1D NumPy array if it's a pandas Series
                correct_predictions = np.sum(y_pred_binary.ravel() == y_test_array)
                hit_rate = correct_predictions / len(y_test_array)

                # Calculate Rate of Return RoR
                ror = np.where(y_pred_binary == 1, ((futurePrice_test - lastPrice_test)/lastPrice_test), 0)
                # Drop all 0s from the array
                ror = ror[ror != 0]
                # Calculate average RoR
                avg_ror = np.mean(ror)
                
                print("Evaluation Metrics:")
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1 Score: {f1_score}")
                print(f"Hit Rate: {hit_rate}")
                print(f"Average RoR: {avg_ror}")
                
                # Add the evaluation metrics to the DataFrame
                evaluation_df = evaluation_df.append({
                    'Image_Type': img_type,
                    'Timeframe': timeframe,
                    'Prediction': prediction,
                    'Accuracy': accuracy,
                    'Precision': precision,
                    'Recall': recall,
                    'F1_Score': f1_score,
                    'Hit_Rate': hit_rate,
                    'Average_RoR': avg_ror
                }, ignore_index=True)
                
                # Save the model
                model.save(f"models/{img_type}_{timeframe}_{prediction}.h5")
                print(f"Model saved as {img_type}_{timeframe}_{prediction}.h5")
                print("--------------------------------------------------")

# Save the evaluation DataFrame to a CSV file
evaluation_df.to_csv('evaluation_scores.csv', index=False)
print("Evaluation scores saved to 'evaluation_scores.csv'.")

Predicting 5 days ahead using OHLC images with 14 days timeframe.
Evaluation Metrics:
Accuracy: 0.8194444444444444
Precision: 0.93
Recall: 0.8303571428571429
F1 Score: 0.8773584905660378
Hit Rate: 0.8194444444444444
Average RoR: 0.011492027359999997


  saving_api.save_model(


Model saved as OHLC_14_5.h5
--------------------------------------------------
Predicting 5 days ahead using OHLC images with 30 days timeframe.
Evaluation Metrics:
Accuracy: 0.5785714285714286
Precision: 0.9574468085106383
Recall: 0.4411764705882353
F1 Score: 0.6040268456375839
Hit Rate: 0.5785714285714286
Average RoR: 0.010986253385571325


  saving_api.save_model(


Model saved as OHLC_30_5.h5
--------------------------------------------------
Predicting 5 days ahead using OHLC images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.562962962962963
Precision: 0.8888888888888888
Recall: 0.425531914893617
F1 Score: 0.5755395683453238
Hit Rate: 0.562962962962963
Average RoR: 0.010929760158341078


  saving_api.save_model(


Model saved as OHLC_90_5.h5
--------------------------------------------------
Predicting 30 days ahead using OHLC images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.9037037037037037
Precision: 0.9425287356321839
Recall: 0.9111111111111111
F1 Score: 0.9265536723163842
Hit Rate: 0.9037037037037037
Average RoR: 0.013044802382188627


  saving_api.save_model(


Model saved as OHLC_90_30.h5
--------------------------------------------------
Predicting 5 days ahead using OHLC images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.5538461538461539
Precision: 0.6805555555555556
Recall: 0.5833333333333334
F1 Score: 0.6282051282051283
Hit Rate: 0.5538461538461539
Average RoR: 0.007769163611704977


  saving_api.save_model(


Model saved as OHLC_180_5.h5
--------------------------------------------------
Predicting 30 days ahead using OHLC images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.6923076923076923
Precision: 0.7435897435897436
Recall: 0.7435897435897436
F1 Score: 0.7435897435897437
Hit Rate: 0.6923076923076923
Average RoR: 0.011473286946343236


  saving_api.save_model(


Model saved as OHLC_180_30.h5
--------------------------------------------------
Predicting 90 days ahead using OHLC images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.9076923076923077
Precision: 0.926829268292683
Recall: 0.926829268292683
F1 Score: 0.926829268292683
Hit Rate: 0.9076923076923077
Average RoR: 0.014310004436722477


  saving_api.save_model(


Model saved as OHLC_180_90.h5
--------------------------------------------------
Predicting 5 days ahead using OHLC images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.41379310344827586
Precision: 0.3103448275862069
Recall: 0.391304347826087
F1 Score: 0.34615384615384615
Hit Rate: 0.41379310344827586
Average RoR: -0.002821983920101746


  saving_api.save_model(


Model saved as OHLC_365_5.h5
--------------------------------------------------
Predicting 30 days ahead using OHLC images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.4051724137931034
Precision: 0.3465346534653465
Recall: 0.9210526315789473
F1 Score: 0.5035971223021584
Hit Rate: 0.4051724137931034
Average RoR: -0.016397223272783493


  saving_api.save_model(


Model saved as OHLC_365_30.h5
--------------------------------------------------
Predicting 90 days ahead using OHLC images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.7327586206896551
Precision: 0.6
Recall: 0.8372093023255814
F1 Score: 0.6990291262135923
Hit Rate: 0.7327586206896551
Average RoR: -0.03139105716259891


  saving_api.save_model(


Model saved as OHLC_365_90.h5
--------------------------------------------------
Predicting 5 days ahead using ColoredOHLC images with 14 days timeframe.
Evaluation Metrics:
Accuracy: 0.7222222222222222
Precision: 0.95
Recall: 0.6785714285714286
F1 Score: 0.7916666666666667
Hit Rate: 0.7222222222222222
Average RoR: 0.011492027359999997


  saving_api.save_model(


Model saved as ColoredOHLC_14_5.h5
--------------------------------------------------
Predicting 5 days ahead using ColoredOHLC images with 30 days timeframe.
Evaluation Metrics:
Accuracy: 0.6714285714285714
Precision: 0.8181818181818182
Recall: 0.7058823529411765
F1 Score: 0.7578947368421053
Hit Rate: 0.6714285714285714
Average RoR: 0.010997937347095874


  saving_api.save_model(


Model saved as ColoredOHLC_30_5.h5
--------------------------------------------------
Predicting 5 days ahead using ColoredOHLC images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.7794117647058824
Precision: 0.9
Recall: 0.7659574468085106
F1 Score: 0.8275862068965516
Hit Rate: 0.7794117647058824
Average RoR: 0.010929760158341077


  saving_api.save_model(


Model saved as ColoredOHLC_90_5.h5
--------------------------------------------------
Predicting 30 days ahead using ColoredOHLC images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.8676470588235294
Precision: 0.86
Recall: 0.9555555555555556
F1 Score: 0.9052631578947369
Hit Rate: 0.8676470588235294
Average RoR: 0.012140858235687321


  saving_api.save_model(


Model saved as ColoredOHLC_90_30.h5
--------------------------------------------------
Predicting 5 days ahead using ColoredOHLC images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.5692307692307692
Precision: 0.625
Recall: 0.8333333333333334
F1 Score: 0.7142857142857143
Hit Rate: 0.5692307692307692
Average RoR: 0.007900970443083374


  saving_api.save_model(


Model saved as ColoredOHLC_180_5.h5
--------------------------------------------------
Predicting 30 days ahead using ColoredOHLC images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.5846153846153846
Precision: 0.6363636363636364
Recall: 0.717948717948718
F1 Score: 0.674698795180723
Hit Rate: 0.5846153846153846
Average RoR: 0.01150961685661354


  saving_api.save_model(


Model saved as ColoredOHLC_180_30.h5
--------------------------------------------------
Predicting 90 days ahead using ColoredOHLC images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.8769230769230769
Precision: 0.9024390243902439
Recall: 0.9024390243902439
F1 Score: 0.9024390243902439
Hit Rate: 0.8769230769230769
Average RoR: 0.01483491280607999


  saving_api.save_model(


Model saved as ColoredOHLC_180_90.h5
--------------------------------------------------
Predicting 5 days ahead using ColoredOHLC images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.3620689655172414
Precision: 0.3409090909090909
Recall: 0.6521739130434783
F1 Score: 0.44776119402985076
Hit Rate: 0.3620689655172414
Average RoR: -0.0029026247221197953


  saving_api.save_model(


Model saved as ColoredOHLC_365_5.h5
--------------------------------------------------
Predicting 30 days ahead using ColoredOHLC images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.39655172413793105
Precision: 0.35185185185185186
Recall: 1.0
F1 Score: 0.5205479452054794
Hit Rate: 0.39655172413793105
Average RoR: -0.01633733295140135


  saving_api.save_model(


Model saved as ColoredOHLC_365_30.h5
--------------------------------------------------
Predicting 90 days ahead using ColoredOHLC images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.39655172413793105
Precision: 0.375
Recall: 1.0
F1 Score: 0.5454545454545454
Hit Rate: 0.39655172413793105
Average RoR: -0.03241503116509543


  saving_api.save_model(


Model saved as ColoredOHLC_365_90.h5
--------------------------------------------------
Predicting 5 days ahead using Line images with 14 days timeframe.
Evaluation Metrics:
Accuracy: 0.875
Precision: 0.9607843137254902
Recall: 0.875
F1 Score: 0.9158878504672897
Hit Rate: 0.875
Average RoR: 0.011492027359999997


  saving_api.save_model(


Model saved as Line_14_5.h5
--------------------------------------------------
Predicting 5 days ahead using Line images with 30 days timeframe.
Evaluation Metrics:
Accuracy: 0.6857142857142857
Precision: 0.9166666666666666
Recall: 0.6346153846153846
F1 Score: 0.75
Hit Rate: 0.6857142857142857
Average RoR: 0.011793101932310339


  saving_api.save_model(


Model saved as Line_30_5.h5
--------------------------------------------------
Predicting 5 days ahead using Line images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.6764705882352942
Precision: 0.8157894736842105
Recall: 0.6739130434782609
F1 Score: 0.7380952380952381
Hit Rate: 0.6764705882352942
Average RoR: 0.010372807192088364


  saving_api.save_model(


Model saved as Line_90_5.h5
--------------------------------------------------
Predicting 30 days ahead using Line images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.75
Precision: 0.8461538461538461
Recall: 0.75
F1 Score: 0.7951807228915662
Hit Rate: 0.75
Average RoR: 0.011541999111647444


  saving_api.save_model(


Model saved as Line_90_30.h5
--------------------------------------------------
Predicting 5 days ahead using Line images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.47692307692307695
Precision: 0.5952380952380952
Recall: 0.5952380952380952
F1 Score: 0.5952380952380952
Hit Rate: 0.47692307692307695
Average RoR: 0.007514036864494548


  saving_api.save_model(


Model saved as Line_180_5.h5
--------------------------------------------------
Predicting 30 days ahead using Line images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.6461538461538462
Precision: 0.66
Recall: 0.8461538461538461
F1 Score: 0.7415730337078651
Hit Rate: 0.6461538461538462
Average RoR: 0.011245496535023968


  saving_api.save_model(


Model saved as Line_180_30.h5
--------------------------------------------------
Predicting 90 days ahead using Line images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.8769230769230769
Precision: 0.9024390243902439
Recall: 0.9024390243902439
F1 Score: 0.9024390243902439
Hit Rate: 0.8769230769230769
Average RoR: 0.01600471594096922


  saving_api.save_model(


Model saved as Line_180_90.h5
--------------------------------------------------
Predicting 5 days ahead using Line images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.41379310344827586
Precision: 0.4
Recall: 0.9565217391304348
F1 Score: 0.5641025641025641
Hit Rate: 0.41379310344827586
Average RoR: -0.00292423359258412


  saving_api.save_model(


Model saved as Line_365_5.h5
--------------------------------------------------
Predicting 30 days ahead using Line images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.25862068965517243
Precision: 0.2727272727272727
Recall: 0.8333333333333334
F1 Score: 0.4109589041095891
Hit Rate: 0.25862068965517243
Average RoR: -0.017273287091736842


  saving_api.save_model(


Model saved as Line_365_30.h5
--------------------------------------------------
Predicting 90 days ahead using Line images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.6896551724137931
Precision: 0.5517241379310345
Recall: 0.7619047619047619
F1 Score: 0.64
Hit Rate: 0.6896551724137931
Average RoR: -0.03309965395855883


  saving_api.save_model(


Model saved as Line_365_90.h5
--------------------------------------------------
Predicting 5 days ahead using AlgoTrading images with 14 days timeframe.
Evaluation Metrics:
Accuracy: 0.8194444444444444
Precision: 0.9387755102040817
Recall: 0.8214285714285714
F1 Score: 0.8761904761904763
Hit Rate: 0.8194444444444444
Average RoR: 0.011492027359999994


  saving_api.save_model(


Model saved as AlgoTrading_14_5.h5
--------------------------------------------------
Predicting 5 days ahead using AlgoTrading images with 30 days timeframe.
Evaluation Metrics:
Accuracy: 0.7285714285714285
Precision: 0.9696969696969697
Recall: 0.64
F1 Score: 0.7710843373493975
Hit Rate: 0.7285714285714285
Average RoR: 0.010493673635674302


  saving_api.save_model(


Model saved as AlgoTrading_30_5.h5
--------------------------------------------------
Predicting 5 days ahead using AlgoTrading images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.6470588235294118
Precision: 0.7169811320754716
Recall: 0.8085106382978723
F1 Score: 0.76
Hit Rate: 0.6470588235294118
Average RoR: 0.011151480207671276


  saving_api.save_model(


Model saved as AlgoTrading_90_5.h5
--------------------------------------------------
Predicting 30 days ahead using AlgoTrading images with 90 days timeframe.
Evaluation Metrics:
Accuracy: 0.7352941176470589
Precision: 0.9354838709677419
Recall: 0.6444444444444445
F1 Score: 0.763157894736842
Hit Rate: 0.7352941176470589
Average RoR: 0.012832535901444167


  saving_api.save_model(


Model saved as AlgoTrading_90_30.h5
--------------------------------------------------
Predicting 5 days ahead using AlgoTrading images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.6461538461538462
Precision: 0.6461538461538462
Recall: 1.0
F1 Score: 0.7850467289719627
Hit Rate: 0.6461538461538462
Average RoR: 0.007900970443083376


  saving_api.save_model(


Model saved as AlgoTrading_180_5.h5
--------------------------------------------------
Predicting 30 days ahead using AlgoTrading images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.6
Precision: 0.6
Recall: 1.0
F1 Score: 0.7499999999999999
Hit Rate: 0.6
Average RoR: 0.01143695703607293


  saving_api.save_model(


Model saved as AlgoTrading_180_30.h5
--------------------------------------------------
Predicting 90 days ahead using AlgoTrading images with 180 days timeframe.
Evaluation Metrics:
Accuracy: 0.8769230769230769
Precision: 0.9024390243902439
Recall: 0.9024390243902439
F1 Score: 0.9024390243902439
Hit Rate: 0.8769230769230769
Average RoR: 0.014834912806079991


  saving_api.save_model(


Model saved as AlgoTrading_180_90.h5
--------------------------------------------------
Predicting 5 days ahead using AlgoTrading images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.4482758620689655
Precision: 0.41509433962264153
Recall: 0.9565217391304348
F1 Score: 0.5789473684210527
Hit Rate: 0.4482758620689655
Average RoR: -0.002815080789779799


  saving_api.save_model(


Model saved as AlgoTrading_365_5.h5
--------------------------------------------------
Predicting 30 days ahead using AlgoTrading images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.3275862068965517
Precision: 0.3275862068965517
Recall: 1.0
F1 Score: 0.49350649350649356
Hit Rate: 0.3275862068965517
Average RoR: -0.016095763437278768


  saving_api.save_model(


Model saved as AlgoTrading_365_30.h5
--------------------------------------------------
Predicting 90 days ahead using AlgoTrading images with 365 days timeframe.
Evaluation Metrics:
Accuracy: 0.3793103448275862
Precision: 0.3793103448275862
Recall: 1.0
F1 Score: 0.5499999999999999
Hit Rate: 0.3793103448275862
Average RoR: -0.030515262990759927


  saving_api.save_model(


Model saved as AlgoTrading_365_90.h5
--------------------------------------------------
Evaluation scores saved to 'evaluation_scores.csv'.


In [None]:
# Train models on all images types and evaluate them based on timeframe and prediction

evaluation_df = pd.DataFrame(columns=['Timeframe', 'Prediction', 'Accuracy', 'Precision', 'Recall', 'F1_Score', 'Hit_Rate', 'Average_RoR'])

for timeframe in TIMEFRAMES:
    for prediction in PREDICTIONS:
        if prediction < timeframe:
            print(f"Predicting {prediction} days ahead with {timeframe} days timeframe.")
            
            # Filter your data based on prediction, img_type, and timeframe
            data = labels[(labels['TimePrediction'] == prediction) & (labels['Image'].str.contains(f'_{timeframe}_'))]
            data = data.reset_index(drop=True)  # Reset the index to maintain temporal order
            
            # Create and compile your CNN model based on timeframe
            if timeframe == 14:
                model = create_cnn_model(2, input_shape=(120, 85, 1))
            elif timeframe == 30:
                model = create_cnn_model(3, input_shape=(120, 132, 1))
            elif timeframe == 90:
                model = create_cnn_model(3, input_shape=(120, 226, 1))
            elif timeframe == 180:
                model = create_cnn_model(4, input_shape=(120, 414, 1))
            elif timeframe == 365:
                model = create_cnn_model(4, input_shape=(120, 602, 1))
            
            X = np.array(data['Image_Array'].tolist()) / 255.0
            lastPrice = np.array(data['LastPrice'].tolist())
            futurePrice = np.array(data['FuturePrice'].tolist())
            y = data['Label']
            # Split the data into training and validation sets in temporal order
            split_index = int(len(X) * 0.7)
            X_train, X_test = X[:split_index], X[split_index:]
            y_train, y_test = y[:split_index], y[split_index:]
            lastPrice_test = lastPrice[split_index:]
            futurePrice_test = futurePrice[split_index:]
            
            window_size = len(X_train)/10
            num_windows = 10

            for epoch in range(10):  # 10 epochs
                start = 0
                end = int(window_size * (epoch + 1))
                X_temp = X_train[start:end]
                y_temp = y_train[start:end]
                split_index = int(len(X) * 0.9)
                X_train_temp, X_val_temp = X_temp[:split_index], X_temp[split_index:]
                y_train_temp, y_val_temp = y_temp[:split_index], y_temp[split_index:]
                model.fit(X_train_temp, y_train_temp, batch_size=32, epochs=1, validation_data=(X_val_temp, y_val_temp))            
            
            # Evaluate the model
            y_pred = model.predict(X_test)
            # Convert predictions to binary: if > 0.5 then 1 else 0
            y_pred_binary = np.where(y_pred > 0.5, 1, 0)
            
            accuracy = accuracy_score(y_test, y_pred_binary)
            precision = precision_score(y_test, y_pred_binary)
            recall = recall_score(y_test, y_pred_binary)
            f1_score = fbeta_score(y_test, y_pred_binary, beta=1)

            y_test_array = y_test.values.ravel()  # Convert y_test to a 1D NumPy array if it's a pandas Series
            correct_predictions = np.sum(y_pred_binary.ravel() == y_test_array)
            hit_rate = correct_predictions / len(y_test_array)

            # Calculate Rate of Return RoR
            ror = np.where(y_pred_binary == 1, ((futurePrice_test - lastPrice_test)/lastPrice_test), 0)
            # Drop all 0s from the array
            ror = ror[ror != 0]
            # Calculate average RoR
            avg_ror = np.mean(ror)
            
            print("Evaluation Metrics:")
            print(f"Accuracy: {accuracy}")
            print(f"Precision: {precision}")
            print(f"Recall: {recall}")
            print(f"F1 Score: {f1_score}")
            print(f"Hit Rate: {hit_rate}")
            print(f"Average RoR: {avg_ror}")
            
            # Add the evaluation metrics to the DataFrame
            evaluation_df = evaluation_df.append({
                'Timeframe': timeframe,
                'Prediction': prediction,
                'Accuracy': accuracy,
                'Precision': precision,
                'Recall': recall,
                'F1_Score': f1_score,
                'Hit_Rate': hit_rate,
                'Average_RoR': avg_ror
            }, ignore_index=True)
            
            # Save the model
            model.save(f"models/combined_{timeframe}_{prediction}.h5")
            print(f"Model saved as combined_{timeframe}_{prediction}.h5")
            print("--------------------------------------------------")

# Save the evaluation DataFrame to a CSV file
evaluation_df.to_csv('combined_model_evaluation_scores.csv', index=False)
print("Evaluation scores saved to 'combined_model_evaluation_scores.csv'.")