In [1]:
import numbers
import numpy as np
import os
import pandas as pd

from feature_engineering import (
    add_return_data,
    add_simple_moving_average,
    generate_image_data,
)
from pathlib import Path
from PIL import Image
from tqdm.notebook import tqdm


### Image Config

In [2]:
def get_image_config(days: int):
    '''
    return: image_height, volume_image_height, gap_height
    '''
    # 5d - 32x15 
    if days == 5:
        return 32, 6, 2
    
    # 20d - 64x60
    if days == 20:
        return 64, 13, 1
    
    # 60d - 96x180
    if days == 60:
        return 96, 19, 1
    
    # 250d - 256x756
    if days == 250:
        return 256, 51, 1
    
    raise Exception("Image Size is not configured")

### Data Processing

In [3]:
# add return_date and return_value and moving average data
def calculate_df(df: pd.DataFrame) -> pd.DataFrame:
    df = add_return_data(df, 5) # add return_5
    df = add_return_data(df, 20) # add return_20
    df = add_return_data(df, 60) # add return_60
    df = add_simple_moving_average(df, 50) # add ma_50
    df = add_simple_moving_average(df, 100) # add ma_100
    df = add_simple_moving_average(df, 200) # add ma_200
    return df

In [4]:
# generate images dataset with labels
def create_ohlc_graphs(
        dataset: pd.DataFrame, # raw dataset
        ticker: str, 
        lookback_days: int, 
        ma_line: int, 
        image_height: int, 
        volume_image_height: int, # put 0 if we don't want volume barchart
        gap_height: int, # put 0 if we don't want volume barchart
        output_root_path: str, 
        dry_run: bool):
        
    if lookback_days not in [5, 20, 60, 250]:
        raise Exception(f'Invalid lookback_days: {lookback_days}')
    
    if ma_line not in [50, 100, 200]:
        raise Exception(f'Invalid ma_line period: {ma_line}')
   
    dataset = add_simple_moving_average(dataset, ma_line).dropna(subset=['ma'])
    for i in range(len(dataset)):
        try:
            subset = dataset[i:i+lookback_days]
            if len(subset) != lookback_days:
                continue
            if np.isnan(subset.iloc[-1]['volume']) or subset.iloc[-1]['volume'] == 0:
                continue
            if len([v for v in subset['volume'].to_list() if isinstance(v, numbers.Number) and v!=0]) >= int(lookback_days * 0.6):
                imgData = generate_image_data(
                    subset,
                    image_height - volume_image_height - gap_height,
                    volume_image_height,
                    gap_height
                )        
                if not dry_run:
                    img = Image.fromarray(imgData)
                    img.save(os.path.join(output_root_path, f'{ticker}_{str(subset.iloc[-1]["date"])[:10]}.png'))
        except:
            print(f'Error detected: {i} @{ticker}')

### Demo

In [None]:
dataPath='../../../dataset/market_data/output/nikkei_225/CONSTITUENTS_DAILY_9984.T.csv'
df = pd.read_csv(dataPath, parse_dates=['date'])
df = df.dropna(how='any').sort_values(by=['date'], ascending=True) # remove row with null
df = calculate_df(df)

In [None]:
# example: 50d-ma + ohlc in 250 days
subset = df[1002:1252]
imgData = generate_image_data(subset, get_image_config(250)[0], 0, 0, True, False, False)
img = Image.fromarray(imgData)
display(img)

In [None]:
# example: 100d-ma + ohlc + volume in 250 days
subset = df[1002:1252]
image_height, volume_image_height, gap_height = get_image_config(250)
imgData = generate_image_data(
    subset,
    image_height - volume_image_height - gap_height,
    volume_image_height,
    gap_height,
    False,
    True,
    False
)
img = Image.fromarray(imgData)
display(img)

In [None]:
# create demo images and metas 
dataset_raw = df[0:4000]
image_height, volume_image_height, gap_height = get_image_config(20)
dataset = add_features(
    dataset_raw,
    20,
    20,
    image_height - volume_image_height - gap_height,
    volume_image_height,
    gap_height,
    True,
    False,
    False,
    -0.5,
    0.5,
    False,
    False,
    '9984_T'
)
dataset.sample(5)

### Graph Generator

In [7]:
def generate_ohlc_graphs(market: str, lookback_days: int, ma_line: int):
    #input
    source_root_path = os.path.abspath(f'../../../dataset/market_data/output/{market}')
    if not os.path.exists(source_root_path):
        raise RuntimeError(f'Path {source_root_path} does not exist')
    
    #output
    output_root_path = os.path.abspath(f'../../../dataset/ohlc_graphs/{market}/i{lookback_days}-ma{ma_line}')
    os.makedirs(output_root_path, exist_ok=True)
    
    image_height, volume_image_height, gap_height = get_image_config(lookback_days)
    for source_file in tqdm(os.listdir(source_root_path)):
        if source_file.startswith('CONSTITUENTS'):
            ticker = source_file.replace('CONSTITUENTS_DAILY_', '').replace('.csv', '')
            source_file_path = os.path.join(source_root_path, source_file)
            print(f'Read raw data from {source_file_path}')
            source_df = pd.read_csv(source_file_path, parse_dates=['date']).sort_values(by=['date'], ascending=True)
            create_ohlc_graphs(
                source_df, 
                ticker, 
                lookback_days, 
                ma_line, 
                image_height, 
                volume_image_height, 
                gap_height, 
                output_root_path, 
                False)

In [11]:
generate_ohlc_graphs('kospi_100', 5, 50)
generate_ohlc_graphs('kospi_100', 20, 50)
generate_ohlc_graphs('kospi_100', 60, 50)
# generate_ohlc_graphs('hsi', 250, 50)

  0%|          | 0/101 [00:00<?, ?it/s]

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000100.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000270.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000660.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000720.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_018260.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_018880.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_021240.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_022100.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\

Error detected: 5461 @105560.KS
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_128940.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_138040.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_161390.KS.csv
Error detected: 2719 @161390.KS
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_180640.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Docum

  0%|          | 0/101 [00:00<?, ?it/s]

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000100.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000270.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000660.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000720.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_021240.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_022100.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_024110.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_028050.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_180640.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_207940.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_241560.KS.csv
Error detected: 1456 @241560.KS
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_251270.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(Comp

  0%|          | 0/101 [00:00<?, ?it/s]

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000100.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000270.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000660.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_000720.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_021240.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_022100.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_024110.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_028050.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\

Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_180640.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_207940.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_241560.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\output\kospi_100\CONSTITUENTS_DAILY_251270.KS.csv
Read raw data from C:\Users\User\OneDrive\OneDrive - The University of Hong Kong - Connect\Documents\HKU MScCS\COMP7705 MSc(CompSc) Project\dataset\market_data\