In [None]:
import cv2
import pandas as pd
import re
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, fbeta_score
from keras.models import load_model

In [21]:
TIMEFRAMES = [7, 14, 30, 90, 180]
PREDICTIONS = [5, 30, 90]
IMG_TYPES = ['MPA', 'CPA', 'MLC', 'API']

TRANSFER = True
if TRANSFER:
    labels = pd.read_csv('labels/transfer_labels.csv')
else:
    labels = pd.read_csv('labels/labels.csv')

In [None]:
# Crop the images using the bounding boxes
def crop_image(img_path):
    # Load the image in grayscale
    img = cv2.imread(img_path, 0)

    # Check if the image was loaded correctly
    if img is None:
        raise ValueError(f"Image at {img_path} not found. Please check the path.")

    # Use regular expression to match numbers followed by ".png" at the end of the filename
    match = re.search(r'(\d+)(?=\.png$)', img_path)
    
    # Check if we found a match
    if match:
        # Extract the number from the matched group
        number = int(match.group(1))
        
        # Check if the number is one of the specified values
        if number == 7:
            # Crop the image using the bounding rectangle
            crop = img[105:105+115, 80:80+38]
        elif number == 14:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+85]
        elif number == 30:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+132]
        elif number == 90:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+226]
        elif number == 180:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+414]
    return crop

# Example usage:
filenames = labels['Image'].values.tolist()

# Testing the function with the provided list of filenames
for name in filenames:
    try:
        cropped_image = crop_image(name)
        # Construct the new path for the cropped image
        new_path = name.replace('.png', '_cropped.png')
        # Save the cropped image
        cv2.imwrite(new_path, cropped_image)
    except ValueError as e:
        print(e)


In [22]:
# Create a new column called 'Image' that contains the path to the cropped image but only if they 
labels['Image'] = labels['Image'].str.replace('.png', '_cropped.png')

  labels['Image'] = labels['Image'].str.replace('.png', '_cropped.png')


In [23]:
# Function to load and convert an image to grayscale
def load_image(image_path):
    # Load image in grayscale
    image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Unable to load image at path: {image_path}")
    return image

filenames = labels['Image'].values.tolist()

images = []

for name in filenames:
    try:
        img = load_image(name)
        images.append(img)
    except ValueError as e:
        print(e)

# Add a new column to the labels DataFrame to store the image arrays
labels['Image_Array'] = images

In [24]:
# Show the number of 1s and 0s in the dataset
print(labels['Label'].value_counts())

1    4956
0    2968
Name: Label, dtype: int64


In [25]:
# Sort the DataFrame by date
labels['Date'] = labels['Image'].str.extract(r'(\d{4}-\d{2}-\d{2})')
labels['Date'] = pd.to_datetime(labels['Date'])
labels = labels.sort_values(by='Date') 
print(labels.head())

                                                  Image  TimePrediction  \
0     images/OHLC/SmallCap_2019-02-01 00:00:00_7_cro...               5   
7844  images/AlgoTrading/CleanEnergy_2019-02-01 00:0...              30   
5094  images/Line/NASDAQ_2019-02-01 00:00:00_7_cropp...               5   
5124  images/Line/NASDAQ_2019-02-01 00:00:00_14_crop...               5   
910   images/OHLC/Healthcare_2019-02-01 00:00:00_30_...              30   

        LastPrice  FuturePrice  Label  \
0       76.620003    76.860001      1   
7844     9.500000     9.710000      1   
5094  7263.870117  7288.350098      1   
5124  7263.870117  7288.350098      1   
910    170.380005   173.130005      1   

                                            Image_Array       Date  
0     [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2019-02-01  
7844  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2019-02-01  
5094  [[27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 2019-02-01  
5124  [[0, 0, 0, 0, 0, 0, 

In [None]:
def get_investment_return(y_pred_binary, lastPrice, futurePrice):
    # Calculate Rate of Return RoR for each trade independently
    # Long positions
    long_ror = []
    long_trades = []
    long_investment = 0
    # Short positions
    short_ror = []
    short_trades = []
    short_investment = 0

    y_pred_binary = y_pred_binary.tolist()
    for i in range(len(y_pred_binary)):
        # Long position
        if y_pred_binary[i][0] == 1:
            long_return_i = ((futurePrice[i] - lastPrice[i])/lastPrice[i])
            long_ror.append(long_return_i)
            long_trades.append(100*(long_return_i))
            long_investment += 100
        # Short position
        else:
            short_return_i = ((lastPrice[i] - futurePrice[i])/lastPrice[i])
            short_ror.append(short_return_i)
            short_trades.append(100*(short_return_i))
            short_investment += 100
    
    if long_investment > 0:
        # Calculate average long RoR
        long_avg_ror = np.mean(long_ror)
        # Sum all long trades
        long_trades = np.sum(long_trades)
    else:
        long_avg_ror = 0
        long_trades = 0
    
    if short_investment > 0:
        # Calculate average short RoR
        short_avg_ror = np.mean(short_ror)
        # Sum all short trades
        short_trades = np.sum(short_trades)
    else:
        short_avg_ror = 0
        short_trades = 0

    return long_avg_ror, long_trades, long_investment, short_avg_ror, short_trades, short_investment

In [None]:
evaluation_df = pd.DataFrame(columns=['Image_Type', 'Timeframe', 'Prediction', 'Accuracy', 'Precision', 'Recall', 'F1_Score', 'Hit_Rate', 'Long_Average_RoR', 'Long_Investment', 'Long_Return', 'Short_Average_RoR', 'Short_Investment', 'Short_Return', 'Total_Investment', 'Total_Return'])

TIMEFRAMES = [7, 14, 30, 90, 180]
PREDICTIONS = [5, 30, 90]
IMG_TYPES = ['MPA', 'CPA', 'MLC', 'API']

for img_type in IMG_TYPES:
    for timeframe in TIMEFRAMES:
        for prediction in PREDICTIONS:
            if prediction <= timeframe:
                print(f"Evaluating model predicting {prediction} days ahead using {img_type} images with {timeframe} days timeframe.")

                # Filter the data
                data = labels[(labels['TimePrediction'] == prediction) &
                              (labels['Image'].str.contains(f'/{img_type}/')) &
                              (labels['Image'].str.contains(f'_{timeframe}_'))]
                data = data.reset_index(drop=True)

                # Start at same index as test data
                split_index = int(len(data) * 0.8)
                data = data[split_index:]

                # Load the model
                model_filename = f"models/noWeights_{img_type}_{timeframe}_{prediction}.h5"
                model = load_model(model_filename)

                X = np.array(data['Image_Array'].tolist()) / 255.0
                lastPrice = data['LastPrice'].tolist()
                futurePrice = data['FuturePrice'].tolist()
                y = data['Label'].values

                # Evaluate the model on test data
                y_pred = model.predict(X)
                # Convert predictions to binary: if > 0.5 then 1 else 0
                y_pred_binary = np.where(y_pred > 0.5, 1, 0)  
                
                accuracy = accuracy_score(y, y_pred_binary)
                precision = precision_score(y, y_pred_binary)
                recall = recall_score(y, y_pred_binary)
                f1_score = fbeta_score(y, y_pred_binary, beta=1)

                y_test_array = y.ravel()  # Convert y_test to a 1D NumPy array if it's a pandas Series
                correct_predictions = np.sum(y_pred_binary.ravel() == y_test_array)
                hit_rate = correct_predictions / len(y_test_array)

                long_avg_ror, long_trades, long_investment, short_avg_ror, short_trades, short_investment = get_investment_return(y_pred_binary, lastPrice, futurePrice)
                # Total investment
                total_investment = long_investment + short_investment
                total_returns = long_trades + short_trades
                
                
                print("Evaluation Metrics:")
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1 Score: {f1_score}")
                print(f"Hit Rate: {hit_rate}")
                print(f"Average Long RoR: {long_avg_ror}")
                print(f"Long Investment: {long_investment}")
                print(f"Long Return: {long_trades}")
                print(f"Average Short RoR: {short_avg_ror}")
                print(f"Short Investment: {short_investment}")
                print(f"Short Return: {short_trades}")
                print(f"Total Investment: {total_investment}")
                print(f"Total Return: {total_returns}")

                
                # Add the evaluation metrics to the DataFrame
                evaluation_df = evaluation_df.append({
                    'Image_Type': img_type,
                    'Timeframe': timeframe,
                    'Prediction': prediction,
                    'Accuracy': accuracy,
                    'Precision': precision,
                    'Recall': recall,
                    'F1_Score': f1_score,
                    'Hit_Rate': hit_rate,
                    'Long_Average_RoR': long_avg_ror,
                    'Long_Investment': long_investment,
                    'Long_Return': long_trades,
                    'Short_Average_RoR': short_avg_ror,
                    'Short_Investment': short_investment,
                    'Short_Return': short_trades, 
                    'Total_Investment': total_investment,
                    'Total_Return': total_returns
                }, ignore_index=True)

# Save the evaluation DataFrame to a CSV file
if TRANSFER:
    evaluation_df.to_csv('evaluation/separate/transfer_evaluation_scores.csv', index=False)
    print("Evaluation scores saved to 'evaluation/separate/tansfer_evaluation_scores.csv'.")
else:
    evaluation_df.to_csv('evaluation/separate/test_evaluation_scores.csv', index=False)
    print("Evaluation scores saved to 'evaluation/separate/test_evaluation_scores.csv'.")

In [None]:
evaluation_df = pd.DataFrame(columns=['Image_Type', 'Timeframe', 'Prediction', 'Accuracy', 'Precision', 'Recall', 'F1_Score', 'Hit_Rate', 'Long_Average_RoR', 'Long_Investment', 'Long_Return', 'Short_Average_RoR', 'Short_Investment', 'Short_Return', 'Total_Investment', 'Total_Return'])

TIMEFRAMES = [7, 14, 30, 90, 180]
PREDICTIONS = [5, 30, 90]

IMG_TYPES = ['MPA', 'CPA', 'MLC', 'API']

for img_type in IMG_TYPES:
    for timeframe in TIMEFRAMES:
        for prediction in PREDICTIONS:
            if prediction <= timeframe:
                print(f"Evaluating model predicting {prediction} days ahead using {img_type} images with {timeframe} days timeframe.")

                # Filter the data
                data = labels[(labels['TimePrediction'] == prediction) &
                              (labels['Image'].str.contains(f'/{img_type}/')) &
                              (labels['Image'].str.contains(f'_{timeframe}_'))]
                data = data.reset_index(drop=True)

                # Start at same index as test data
                split_index = int(len(data) * 0.8)
                data = data[split_index:]

                # Load the model
                model_filename = f"models/noWeights_combined_{timeframe}_{prediction}.h5"
                model = load_model(model_filename)

                X = np.array(data['Image_Array'].tolist()) / 255.0
                lastPrice = data['LastPrice'].tolist()
                futurePrice = data['FuturePrice'].tolist()
                y = data['Label'].values

                # Evaluate the model on test data
                y_pred = model.predict(X)
                # Convert predictions to binary: if > 0.5 then 1 else 0
                y_pred_binary = np.where(y_pred > 0.5, 1, 0)  
                
                accuracy = accuracy_score(y, y_pred_binary)
                precision = precision_score(y, y_pred_binary)
                recall = recall_score(y, y_pred_binary)
                f1_score = fbeta_score(y, y_pred_binary, beta=1)

                y_test_array = y.ravel()  # Convert y_test to a 1D NumPy array if it's a pandas Series
                correct_predictions = np.sum(y_pred_binary.ravel() == y_test_array)
                hit_rate = correct_predictions / len(y_test_array)

                long_avg_ror, long_trades, long_investment, short_avg_ror, short_trades, short_investment = get_investment_return(y_pred_binary, lastPrice, futurePrice)
                # Total investment
                total_investment = long_investment + short_investment
                total_returns = long_trades + short_trades
                
                
                print("Evaluation Metrics:")
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1 Score: {f1_score}")
                print(f"Hit Rate: {hit_rate}")
                print(f"Average Long RoR: {long_avg_ror}")
                print(f"Long Investment: {long_investment}")
                print(f"Long Return: {long_trades}")
                print(f"Average Short RoR: {short_avg_ror}")
                print(f"Short Investment: {short_investment}")
                print(f"Short Return: {short_trades}")
                print(f"Total Investment: {total_investment}")
                print(f"Total Return: {total_returns}")

                
                # Add the evaluation metrics to the DataFrame
                evaluation_df = evaluation_df.append({
                    'Image_Type': img_type,
                    'Timeframe': timeframe,
                    'Prediction': prediction,
                    'Accuracy': accuracy,
                    'Precision': precision,
                    'Recall': recall,
                    'F1_Score': f1_score,
                    'Hit_Rate': hit_rate,
                    'Long_Average_RoR': long_avg_ror,
                    'Long_Investment': long_investment,
                    'Long_Return': long_trades,
                    'Short_Average_RoR': short_avg_ror,
                    'Short_Investment': short_investment,
                    'Short_Return': short_trades, 
                    'Total_Investment': total_investment,
                    'Total_Return': total_returns
                }, ignore_index=True)

# Save the evaluation DataFrame to a CSV file
if TRANSFER:
    evaluation_df.to_csv('evaluation/combined/transfer_evaluation_scores.csv', index=False)
    print("Evaluation scores saved to 'evaluation/combined/tansfer_evaluation_scores.csv'.")
else:
    evaluation_df.to_csv('evaluation/combined/test_evaluation_scores.csv', index=False)
    print("Evaluation scores saved to 'evaluation/combined/test_evaluation_scores.csv'.")

In [26]:
ror_df = pd.DataFrame(columns=['Image_Type', 'Timeframe', 'Prediction', 'Average_RoR'])

for img_type in IMG_TYPES:
    for timeframe in TIMEFRAMES:
        for prediction in PREDICTIONS:
            if prediction <= timeframe:

                # Filter your data based on prediction, img_type, and timeframe
                data = labels[(labels['TimePrediction'] == prediction) &
                              (labels['Image'].str.contains(f'/{img_type}/')) &
                              (labels['Image'].str.contains(f'_{timeframe}_'))]                
                data = data.reset_index(drop=True)
                split_index = int(len(data) * 0.8)
                lastPrice = data['LastPrice'][split_index:]
                futurePrice = data['FuturePrice'][split_index:]
                # Calculate Rate of Return RoR
                ror = np.array((futurePrice - lastPrice)/lastPrice)
                # Calculate average RoR
                avg_ror = np.mean(ror)
                #print(f"Average RoR for {img_type} {timeframe} {prediction}: {avg_ror}")
                # Add the evaluation metrics to the DataFrame
                ror_df = ror_df.append({
                    'Image_Type': img_type,
                    'Timeframe': timeframe,
                    'Prediction': prediction,
                    'Average_RoR': avg_ror
                }, ignore_index=True)

# Save the evaluation DataFrame to a CSV file
ror_df.to_csv('transfer_ror.csv', index=False)