In [1]:
import cv2
import pandas as pd
import re
import numpy as np
from sklearn.utils import shuffle
from keras.models import Sequential
from keras.layers import Conv2D, LeakyReLU, MaxPooling2D, Flatten, Dense



In [2]:
TIMEFRAMES = [30, 90, 180, 365]
PREDICTIONS = [5, 30, 90]
IMG_TYPES = ['OHLC', 'ColoredOHLC', 'Line', 'AlgoTrading']
NEW_DATA = ['NASDAQ']

labels = pd.read_csv('labels.csv')

In [5]:
# Crop the images using the bounding boxes
def crop_image(img_path):
    # Load the image in grayscale
    img = cv2.imread(img_path, 0)

    # Check if the image was loaded correctly
    if img is None:
        raise ValueError(f"Image at {img_path} not found. Please check the path.")

    # Use regular expression to match numbers followed by ".png" at the end of the filename
    match = re.search(r'(\d+)(?=\.png$)', img_path)
    
    # Check if we found a match
    if match:
        # Extract the number from the matched group
        number = int(match.group(1))
        
        # Check if the number is one of the specified values
        if number == 30:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+132]
        elif number == 90:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+226]
        elif number == 180:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+414]
        elif number == 365:
            # Crop the image using the bounding rectangle
            crop = img[100:100+120, 80:80+602]
    return crop

# Example usage:
filenames = labels['Image'].values.tolist()

# Only keep filenames that is in NEW_DATA
filenames = [f for f in filenames if f.split('/')[0] in NEW_DATA]

# Testing the function with the provided list of filenames
for name in filenames:
    try:
        cropped_image = crop_image(name)
        # Construct the new path for the cropped image
        new_path = name.replace('.png', '_cropped.png')
        # Save the cropped image
        cv2.imwrite(new_path, cropped_image)
    except ValueError as e:
        print(e)


In [6]:
# Create a new column called 'Image' that contains the path to the cropped image but only if they 
labels['Image'] = labels['Image'].str.replace('.png', '_cropped.png')

  labels['Image'] = labels['Image'].str.replace('.png', '_cropped.png')


In [7]:
# Function to load and convert an image to grayscale
def load_image(image_path):
    # Load image in grayscale
    image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Unable to load image at path: {image_path}")
    return image

filenames = labels['Image'].values.tolist()

images = []

for name in filenames:
    try:
        img = load_image(name)
        images.append(img)
    except ValueError as e:
        print(e)

# Add a new column to the labels DataFrame to store the image arrays
labels['Image_Array'] = images

In [8]:
# Divide data based on timeframes
labels_dict = {
"30" : labels[labels['Image'].str.contains('_30_')],
"90" : labels[labels['Image'].str.contains('_90_')],
"180" : labels[labels['Image'].str.contains('_180_')],
"365" : labels[labels['Image'].str.contains('_365_')]
}

In [9]:
for key in labels_dict:
    # Convert the Image_Array column to a numpy array
    labels_dict[key]['Image_Array'] = np.array(labels_dict[key]['Image_Array']) / 255.0
    # Shuffle the DataFrame
    #labels_dict[key] = shuffle(labels_dict[key], random_state=42).reset_index(drop=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  labels_dict[key]['Image_Array'] = np.array(labels_dict[key]['Image_Array']) / 255.0


In [10]:
from keras.models import load_model
from pathlib import Path

# Base directory where the models are stored
base_dir = Path('models/')

# Collect all .h5 model files in the directory
model_files = list(base_dir.glob('*.h5'))

# Load the models
models = {model_file.stem: load_model(model_file) for model_file in model_files}

# Return the names of the loaded models
list(models.keys())

['Line_365_30',
 'AlgoTrading_180_30',
 'Line_90_5',
 'OHLC_365_30',
 'AlgoTrading_90_5',
 'ColoredOHLC_90_30',
 'OHLC_180_30',
 'AlgoTrading_365_5',
 'Line_180_30',
 'AlgoTrading_365_30',
 'OHLC_90_30',
 'ColoredOHLC_180_90',
 'AlgoTrading_180_5',
 'OHLC_30_5',
 'Line_90_30',
 'ColoredOHLC_365_90',
 'ColoredOHLC_30_5',
 'Line_180_5',
 'ColoredOHLC_180_30',
 'ColoredOHLC_90_5',
 'ColoredOHLC_365_30',
 'ColoredOHLC_365_5',
 'AlgoTrading_90_30',
 'OHLC_90_5',
 'OHLC_180_5',
 'OHLC_365_5',
 'OHLC_365_90',
 'Line_365_90',
 'AlgoTrading_180_90',
 'ColoredOHLC_180_5',
 'Line_30_5',
 'Line_180_90',
 'AlgoTrading_365_90',
 'Line_365_5',
 'OHLC_180_90',
 'AlgoTrading_30_5']

In [None]:
from sklearn.model_selection import train_test_split

TIMEFRAMES = [30, 90, 180, 365]
PREDICTIONS = [5, 30, 90]
IMG_TYPES = ['OHLC', 'ColoredOHLC', 'Line', 'AlgoTrading']

for img_type in IMG_TYPES:
    for timeframe in TIMEFRAMES:
        for prediction in PREDICTIONS:
            if prediction < timeframe:
                print(f"Evaluating model predicting {prediction} days ahead using {img_type} images with {timeframe} days timeframe.")

                # Filter the data
                data = labels[(labels['TimePrediction'] == prediction) &
                              (labels['Image'].str.contains(img_type)) &
                              (labels['Image'].str.contains(f'_{timeframe}_'))]

                # Normalize image arrays
                data = shuffle(data, random_state=42).reset_index(drop=True)

                # Load the model
                model_filename = f"models/{img_type}_{timeframe}_{prediction}.h5"
                model = load_model(model_filename)

                # Prepare data for evaluation
                X = np.array(data['Image_Array'].tolist())
                y = data['Label']
                X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

                # Evaluate the model
                loss, accuracy = model.evaluate(X_val, y_val)
                print(f"Loss: {loss}, Accuracy: {accuracy}")

                print("--------------------------------------------------")