# Image Generation

In [None]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Move to your current working directory
%cd drive/MyDrive/image_processing

Mounted at /content/drive
/content/drive/MyDrive/image_processing


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Sampling images

In [None]:
# Load data
df = pd.read_csv("data/2330_data.csv")
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,20_MA,Label
0,2013-07-26,103.0,103.0,101.0,102.5,75.580215,31781000,105.585,0
1,2013-07-29,100.0,101.5,100.0,100.0,73.736786,38265000,105.185,0
2,2013-07-30,100.5,102.5,100.5,102.0,75.211548,33818000,104.785,0
3,2013-07-31,101.5,103.0,101.0,102.5,75.580215,36945000,104.560,0
4,2013-08-01,102.0,102.0,100.0,100.5,74.105476,27849000,104.235,0
...,...,...,...,...,...,...,...,...,...
2676,2024-07-15,1040.0,1045.0,1025.0,1040.0,1040.000000,38393309,992.150,0
2677,2024-07-16,1040.0,1070.0,1035.0,1055.0,1055.000000,32125609,997.750,0
2678,2024-07-17,1035.0,1045.0,1020.0,1030.0,1030.000000,54022399,1000.200,0
2679,2024-07-18,988.0,1005.0,986.0,1005.0,1005.000000,81990095,1001.400,0


In [None]:
def single_symbol_image(tabular_df, start_date, end_date, training, stock):
    # dataset = []
    image_size = (96, 180) # H x W

    lookback = image_size[1] // 3
    for d in range(lookback, len(tabular_df)):
        if tabular_df.iloc[d]['Date'] < start_date:
            continue
        if tabular_df.iloc[d]['Date'] > end_date:
            break

        # Spit prices and volume
        price_slice = tabular_df[d-lookback:d][['Open', 'High', 'Low', 'Close', '20_MA']].reset_index(drop=True)
        volume_slice = tabular_df[d-lookback:d][['Volume']].reset_index(drop=True)

        # Normalize data
        price_slice = (price_slice-np.min(price_slice.values))/(np.max(price_slice.values)-np.min(price_slice.values))
        volume_slice = (volume_slice-np.min(volume_slice.values))/(np.max(volume_slice.values)-np.min(volume_slice.values))

        # Scaling to fit image
        price_slice = price_slice.apply(lambda x: x*(image_size[0]/3*2-1)+image_size[0]/3).astype(int)
        volume_slice = volume_slice.apply(lambda x: x*(image_size[0]/3-1)).astype(int)

        image = np.zeros(image_size)
        for i in range(len(price_slice)):
            # Draw price data
            image[price_slice.loc[i, 'Open'], i*3] = 255
            image[price_slice.loc[i, 'Low']:price_slice.loc[i, 'High']+1, i*3+1] = 255
            image[price_slice.loc[i, 'Close'], i*3+2] = 255
            # Draw indicators
            image[price_slice.loc[i, '20_MA'], i*3:i*3+2] = 255
            # Draw volume bars
            image[:volume_slice.loc[i]['Volume'], i*3+1] = 255


        label = tabular_df.iloc[d]['Label']
        date = tabular_df.iloc[d]['Date']
        image = np.flipud(image)
        filepath = 'img/training' if training else 'img/testing'
        plt.imsave(f'{filepath}/[{label}]_{stock}_{date}.png', image, cmap='gray')

    #     entry = [image, label]
    #     dataset.append(entry)

    # return dataset

# Generate and display the image
# start_date = '2024-06-01'
# end_date = '2024-06-05'
# training = True
# stock = '2330'
# dataset = single_symbol_image(df, start_date, end_date, training, stock)

# Display the first generated image
# plt.imshow(dataset[0][0], cmap='gray')
# plt.axis('off')
# plt.show()
# plt.imsave('img/test.png', dataset[0][0], cmap='gray')

## Save all images

In [None]:
tickers = []
first_line = True
with open('data/companies.csv', 'r') as f:
    for line in f:
        if first_line:
            first_line = False
        else:
            tickers.append(line[:4])
print(tickers)

['1101', '1102', '1216', '1301', '1303', '1326', '1402', '2002', '2105', '2201', '2207', '2227', '2301', '2303', '2308', '2317', '2327', '2330', '2354', '2357', '2382', '2408', '2409', '2412', '2474', '2454', '2609', '2615', '2633', '2801', '2880', '2881', '2882', '2883', '2884', '2885', '2886', '2887', '2888', '2890', '2891', '2892', '2912', '3008', '3045', '3481', '3711', '4904', '4938', '5871', '5880', '6415', '6505', '6669', '6770', '8454', '9904', '9910']


In [None]:
for ticker in tickers:
    try:
        filepath = f'data/{ticker}_data.csv'
        data = pd.read_csv(filepath)

        # training data
        start_date = '2014-01-01'
        end_date = '2021-12-31'
        training = True
        single_symbol_image(data, start_date, end_date, training, ticker)
        print(f"training images for {ticker} saved")

        # testing data
        start_date = '2022-01-01'
        end_date = '2024-06-30'
        training = False
        single_symbol_image(data, start_date, end_date, training, ticker)
        print(f"testing images for {ticker} saved")

    except Exception as e:
        print(f"Failed to save images for {ticker}: {e}")

training image for 1101 saved
testing image for 1101 saved
training image for 1102 saved
testing image for 1102 saved
training image for 1216 saved
testing image for 1216 saved
training image for 1301 saved
testing image for 1301 saved
training image for 1303 saved
testing image for 1303 saved
training image for 1326 saved
testing image for 1326 saved
training image for 1402 saved
testing image for 1402 saved
training image for 2002 saved
testing image for 2002 saved
training image for 2105 saved
testing image for 2105 saved
training image for 2201 saved
testing image for 2201 saved
training image for 2207 saved
testing image for 2207 saved
training image for 2227 saved
testing image for 2227 saved
training image for 2301 saved
testing image for 2301 saved
training image for 2303 saved
testing image for 2303 saved
training image for 2308 saved
testing image for 2308 saved
training image for 2317 saved
testing image for 2317 saved
training image for 2327 saved
testing image for 2327 sav

In [None]:
# Function to find the first date with data for a stock
def find_first_valid_date(df):
    first_valid_index = df['Date'].first_valid_index()
    return first_valid_index

except_tickers = ['2633', '6415', '6669', '6770', '8454']
for ticker in except_tickers:
    try:
        filepath = f'data/{ticker}_data.csv'
        data.dropna(inplace=True)
        data.reset_index(drop=True, inplace=True)
        first_date = find_first_valid_date(data)

        # training data
        start_date = str(first_date)
        end_date = '2021-12-31'
        training = True
        single_symbol_image(data, start_date, end_date, training, ticker)
        print(f"training images for {ticker} saved")

        # testing data
        start_date = '2022-01-01'
        end_date = '2024-06-30'
        training = False
        single_symbol_image(data, start_date, end_date, training, ticker)
        print(f"testing images for {ticker} saved")

    except Exception as e:
        print(f"Failed to save images for {ticker}: {e}")

training image for 6415 saved
testing image for 6415 saved
training image for 6669 saved
testing image for 6669 saved
training image for 6770 saved
testing image for 6770 saved
training image for 8454 saved
testing image for 8454 saved
