In [None]:
import math
import numbers
import os
import pandas as pd
import platform
import shutil
import time

from datetime import date
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

## Split Data & Create the Folders for Training and Testing

In [None]:
def create_training_space(market_index: str, lookback_days: int, forecast_days: int, ma_line: int, margins: tuple):
    workspace = os.path.abspath(f'../../../training/{market_index}/i{lookback_days}-r{forecast_days}-ma{ma_line}')
    os.makedirs(workspace, exist_ok=True)
    
    train_folder_path = os.path.join(workspace, 'input/train')
    test_folder_path = os.path.join(workspace, 'input/test')
    portfolio_folder_path = os.path.abspath(f'../../../portfolio/{market_index}/i{lookback_days}-r{forecast_days}-ma{ma_line}/input')
    os.makedirs(portfolio_folder_path, exist_ok=True)
    for cls_label in (0, 1, 2):
        os.makedirs(os.path.join(train_folder_path, str(cls_label)), exist_ok=True)
        os.makedirs(os.path.join(test_folder_path, str(cls_label)), exist_ok=True)
    
    graph_data_folder = f'../../../dataset/ohlc_graphs/{market_index}/i{lookback_days}-ma{ma_line}'
    train_files, test_files = train_test_split(os.listdir(graph_data_folder), test_size=0.2)
    
    return_data = None
    for item in os.scandir(f'../../../dataset/returns/{market_index}'):
        if item.path.endswith('.pkl'):
            df = pd.read_pickle(os.path.abspath(item.path))
            return_data = pd.concat([return_data, df], ignore_index=True) if isinstance(return_data, pd.DataFrame) else df
    
    return_data = return_data.set_index(['date', 'ticker'])
    for file in tqdm(train_files):
        ticker, dt = file.replace('.png', '').split('_')
        return_val = return_data.loc[dt, ticker][f'r{forecast_days}']
        if isinstance(return_val, numbers.Number) and not math.isnan(return_val):
            src_path = os.path.join(graph_data_folder, file)
            if date.fromisoformat(dt) > date(2014, 3, 31):
                shutil.copy(src_path, os.path.join(portfolio_folder_path, file))
            else:
                if return_val < margins[0]:
                    label = 0
                elif return_val > margins[1]:
                    label = 2
                else:
                    label = 1
                shutil.copy(src_path, os.path.join(train_folder_path, str(label), file))

    for file in tqdm(test_files):
        ticker, dt = file.replace('.png', '').split('_')
        return_val = return_data.loc[dt, ticker][f'r{forecast_days}']
        if isinstance(return_val, numbers.Number) and not math.isnan(return_val):
            src_path = os.path.join(graph_data_folder, file)
            if date.fromisoformat(dt) > date(2014, 3, 31):
                shutil.copy(src_path, os.path.join(portfolio_folder_path, file))
            else:
                if return_val < margins[0]:
                    label = 0
                elif return_val > margins[1]:
                    label = 2
                else:
                    label = 1
                shutil.copy(src_path, os.path.join(test_folder_path, str(label), file))

# Moving graphs to the /training folder
# create_training_space('nikkei_mid_small_cap', 250, 5, 50, (-0.015891, 0.016609))
# create_training_space('nikkei_mid_small_cap', 250, 20, 50, (-0.033320, 0.044304))
# create_training_space('nikkei_mid_small_cap', 250, 60, 50, (-0.054173, 0.092287))
create_training_space('nikkei_mid_small_cap', 250, 250, 50, (-0.072364, 0.273452))
