In [None]:
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [None]:
import time
import logging
time_now = int(time.time())
logger = logging.getLogger('')
logging.basicConfig(filename=f'portfolio_analysys.{time_now}.log', encoding='utf-8', level=logging.DEBUG)

In [None]:
# Loop dataframe to get portfolio result
def portfolio_analysis(
        _data: pd.DataFrame,
        initial_capital = 200000,
        threshold = 0.7,
        logging = False,
        skipping = 5,
        _enable_short = True,
    ):
    if logging:
        logger.info(f'-----------------------')
        logger.info(f'-----Start Logging-----')
        logger.info(f'-----------------------')
    # meta = []
    dates = [_data['date'][0] - pd.tseries.offsets.Day()]
    capitals = [initial_capital]
    long_counts = [0]
    short_counts = [0]
    current = initial_capital
    for index, row in _data.iterrows():
        if index % skipping != 0:
            continue
        s_return = 0
        s_count = 0
        l_return = 0
        l_count = 0
        for i, _v in row.items():
            if (i == 'date'):
                if logging:
                    logger.info(f'')
                    logger.info(f'---{_v}---')
                continue
            if (type(_v) is not str):
                continue
            # parse array-format-string to actual array
            v = _v[1:-1].split(',')
            if (type(v) is list):
                # short
                if float(v[0]) >= threshold:
                    if logging:
                        logger.info(f'{i} is included in [Short] - p:{v[0]}, r:{v[3]} ')
                    s_return = (s_return * s_count + float(v[3])) / (s_count + 1)
                    s_count += 1
                # long
                if float(v[2]) >= threshold:
                    if logging:
                        logger.info(f'{i} is included in [Long] - p:{v[2]}, r:{v[3]} ')
                    l_return = (l_return * l_count + float(v[3])) / (l_count + 1)
                    l_count += 1
        # skip if no short or long
        if (l_count == 0 or (s_count == 0 and _enable_short)):
            if logging:
                logger.info(f'Not doing anything: l/s count: {l_count} / {s_count}, _enable_short: {str(_enable_short)}')
                logger.info(f'Final Capital: {current}')
            dates.append(row['date'])
            long_counts.append(0)
            short_counts.append(0)
            capitals.append(current)
            continue
        l_capital = (current / 2) * (1 + l_return) if _enable_short else current * (1 + l_return)
        s_capital = (current / 2) * (1 - s_return) if _enable_short else 0
        if logging:
            logger.info(f'current: {current}')
            logger.info(f'l_return: {l_return}')
            logger.info(f's_return: {s_return}')
            logger.info(f'l_capital: {l_capital}')
            logger.info(f's_capital: {s_capital}')
        current = l_capital + s_capital
        # print('doing trade on ' + str(row['date']))
        # print('current: ' + str(current))
        dates.append(row['date'])
        capitals.append(current)
        long_counts.append(l_count)
        short_counts.append(s_count)
        if logging:
            logger.info(f'Final Capital: {current}')

    data = {
        'date': np.array(dates),
        'capital': np.array(capitals),
        'long_count': np.array(long_counts),
        'short_count': np.array(short_counts),
    }
    return data

In [None]:
# long-short
MODEL_DATA_FOLDER_PATH = os.path.abspath('./portfolio')
initial_capital = 200000
enable_short = True
logging = False

for source_file in tqdm(os.listdir(MODEL_DATA_FOLDER_PATH)):
    if source_file.endswith('.csv'):
        file_name = source_file.replace('.csv', '')
        # print('Current in: ' + file_name)
        skipping = 60 if 'r60ma' in file_name else 20 if 'r20ma' in file_name else 5
        # print('skipping: ' + str(skipping))
        df = pd.read_csv(MODEL_DATA_FOLDER_PATH + '/' + source_file, parse_dates=['date'])
        for threshold in [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]:
            portfolio_result = portfolio_analysis(df, initial_capital, threshold, logging, skipping, enable_short)
            ts = pd.Series(portfolio_result['capital'], index=portfolio_result['date'])
            ts.plot()
            plt.savefig(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + '.png')
            plt.clf()
            pd.DataFrame.from_dict(portfolio_result).to_csv(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + 'portfolio_result.csv')
        

In [None]:
# long only
MODEL_DATA_FOLDER_PATH = os.path.abspath('./portfolio')
initial_capital = 200000
enable_short = False
logging = False

for source_file in tqdm(os.listdir(MODEL_DATA_FOLDER_PATH)):
    if source_file.endswith('.csv') and source_file.startswith('nikkei_mid_small_cap-i5r5'):
        file_name = source_file.replace('.csv', '')
        # print('Current in: ' + file_name)
        skipping = 60 if 'r60ma' in file_name else 20 if 'r20ma' in file_name else 5
        # print('skipping: ' + str(skipping))
        df = pd.read_csv(MODEL_DATA_FOLDER_PATH + '/' + source_file, parse_dates=['date'])
        for threshold in [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]:
            portfolio_result = portfolio_analysis(df, initial_capital, threshold, logging, skipping, enable_short)
            ts = pd.Series(portfolio_result['capital'], index=portfolio_result['date'])
            # ts.plot(yticks=[0, 100000, 200000, 500000]);
            ts.plot()
            plt.savefig(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + '-long.png')
            plt.clf()
            pd.DataFrame.from_dict(portfolio_result).to_csv(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + 'portfolio_result_long.csv')
            
            # long count
            #ts = pd.Series(portfolio_result['long_count'], index=portfolio_result['date'])
            #ts.plot()
            #plt.savefig(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + '-long_count.png')
            #plt.clf()
            #pd.DataFrame.from_dict(portfolio_result).to_csv(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + 'portfolio_result_long.csv')
            
        

In [None]:
# hsi sector only
# long only
MODEL_DATA_FOLDER_PATH = os.path.abspath('./portfolio')
initial_capital = 200000
enable_short = False
logging = False

sector_mapping = pd.read_csv('./hsi_sector_mapping.csv')
sector = 'Utilities'

tickers = sector_mapping.loc[sector_mapping['Merged Grouping'] == sector]['Ticker'].to_list()
print(tickers)

# add the necessary column name
tickers.insert(0, 'date')

for source_file in tqdm(os.listdir(MODEL_DATA_FOLDER_PATH)):
    if source_file.endswith('.csv') and source_file.startswith('hsi-i5r5'):
        file_name = source_file.replace('.csv', '')
        # print('Current in: ' + file_name)
        skipping = 60 if 'r60ma' in file_name else 20 if 'r20ma' in file_name else 5
        # print('skipping: ' + str(skipping))
        df = pd.read_csv(MODEL_DATA_FOLDER_PATH + '/' + source_file, parse_dates=['date'])
        # filtering non-sector column
        df = df[tickers]
        for threshold in [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]:
            portfolio_result = portfolio_analysis(df, initial_capital, threshold, logging, skipping, enable_short)
            ts = pd.Series(portfolio_result['capital'], index=portfolio_result['date'])
            # ts.plot(yticks=[0, 100000, 200000, 500000]);
            ts.plot()
            plt.savefig(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + '-long-' + sector + '.png')
            plt.clf()
            pd.DataFrame.from_dict(
                portfolio_result).to_csv(
                    MODEL_DATA_FOLDER_PATH +
                    '/output/' +
                    file_name +
                    '-' +
                    str(threshold) +
                    'portfolio_result_long-' +
                    sector +
                    '.csv')
        


In [None]:
# hsi sector only
# long short
MODEL_DATA_FOLDER_PATH = os.path.abspath('./portfolio')
initial_capital = 200000
enable_short = True
logging = False

sector_mapping = pd.read_csv('./hsi_sector_mapping.csv')
sector = 'Real Estate'

tickers = sector_mapping.loc[sector_mapping['Merged Grouping'] == sector]['Ticker'].to_list()

# add the necessary column name
tickers.insert(0, 'date')

for source_file in tqdm(os.listdir(MODEL_DATA_FOLDER_PATH)):
    if source_file.endswith('.csv') and source_file.startswith('hsi-i5r5'):
        file_name = source_file.replace('.csv', '')
        # print('Current in: ' + file_name)
        skipping = 60 if 'r60ma' in file_name else 20 if 'r20ma' in file_name else 5
        # print('skipping: ' + str(skipping))
        df = pd.read_csv(MODEL_DATA_FOLDER_PATH + '/' + source_file, parse_dates=['date'])
        # filtering non-sector column
        df = df[tickers]
        for threshold in [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]:
            portfolio_result = portfolio_analysis(df, initial_capital, threshold, logging, skipping, enable_short)
            ts = pd.Series(portfolio_result['capital'], index=portfolio_result['date'])
            # ts.plot(yticks=[0, 100000, 200000, 500000]);
            ts.plot()
            plt.savefig(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + '-' + sector + '.png')
            plt.clf()
            pd.DataFrame.from_dict(
                portfolio_result).to_csv(
                    MODEL_DATA_FOLDER_PATH +
                    '/output/' +
                    file_name +
                    '-' +
                    str(threshold) +
                    'portfolio_result-' +
                    sector +
                    '.csv')
        

In [None]:
# use this to inspect one csv
DATA_PATH = os.path.abspath('./portfolio/nifty_midcap_100-i20r5ma50-1720842029.csv')
df = pd.read_csv(DATA_PATH, parse_dates=['date'])
threshold = 0.5
logging = True
skipping = 5
enable_short = False

portfolio_result = portfolio_analysis(df, 200000, threshold, logging, skipping, enable_short)
ts = pd.Series(portfolio_result['capital'], index=portfolio_result['date'])
ts.plot();
plt.savefig('./inspecting_model.png')
plt.clf()
pd.DataFrame.from_dict(portfolio_result).to_csv('./inspecting_model_portfolio_result.csv')

In [None]:
# generate chart/data for index
index_path = '../../../dataset/market_data/output/kosdaq_150/INDEX_DAILY_KOSDAQ_150.csv'
output_name = 'kosdaq_150'
START_DATE = '2014-04-01'
CAPITAL = 200000

index_df = pd.read_csv(index_path, parse_dates=['date'])
index_df = index_df.drop(columns=['adj_close', 'volume', 'high', 'low'])
index_df = index_df.dropna(how='any')
index_df = index_df.loc[index_df['date'] > START_DATE]
index_df = index_df.sort_values(by=['date'], ascending=True).reset_index(drop=True)

dates = [index_df['date'][0] - pd.tseries.offsets.Day()]
capitals = [float(CAPITAL)]

base = index_df['open'][0]

for index, row in index_df.iterrows():
    dates.append(row['date'])
    capitals.append(float(CAPITAL * row['close'] / base))

pd.DataFrame.from_dict({
    'date': dates,
    'capital': capitals
}).to_csv('./index/' + output_name + '.csv')

ts = pd.Series(np.array(capitals), index=np.array(dates))
ts.plot()
plt.savefig('./index/' + output_name + '.png')
plt.clf()