In [None]:
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [None]:
import time
import logging
time_now = int(time.time())
logger = logging.getLogger('')
logging.basicConfig(filename=f'portfolio_analysys.{time_now}.log', encoding='utf-8', level=logging.DEBUG)

In [None]:
# Loop dataframe to get portfolio result
def portfolio_analysis(
        _data: pd.DataFrame,
        initial_capital = 200000,
        threshold = 0.7,
        logging = False,
        skipping = 5,
        enable_short = True,
    ):
    if logging:
        logger.info(f'-----------------------')
        logger.info(f'-----Start Logging-----')
        logger.info(f'-----------------------')
    # meta = []
    dates = [_data['date'][0] - pd.tseries.offsets.Day()]
    capitals = [initial_capital]
    current = initial_capital
    for index, row in _data.iterrows():
        if index % skipping != 0:
            continue
        s_return = 0
        s_count = 0
        l_return = 0
        l_count = 0
        for i, _v in row.items():
            if (i == 'date'):
                if logging:
                    logger.info(f'')
                    logger.info(f'---{_v}---')
                continue
            if (type(_v) is not str):
                continue
            # parse array-format-string to actual array
            v = _v[1:-1].split(',')
            if (type(v) is list):
                # short
                if float(v[0]) >= threshold:
                    if logging:
                        logger.info(f'{i} is included in [Short] - p:{v[0]}, r:{v[3]} ')
                    s_return = (s_return * s_count + float(v[3])) / (s_count + 1)
                    s_count += 1
                # long
                if float(v[2]) >= threshold:
                    if logging:
                        logger.info(f'{i} is included in [Long] - p:{v[2]}, r:{v[3]} ')
                    l_return = (l_return * l_count + float(v[3])) / (l_count + 1)
                    l_count += 1
        # skip if no short or long
        if (l_count == 0 or (s_count == 0 and enable_short)):
            if logging:
                logger.info(f'Not doing anything: l/s count: {l_count} / {s_count}, enable_short: {str(enable_short)}')
                logger.info(f'Final Capital: {current}')
            dates.append(row['date'])
            capitals.append(current)
            continue
        l_capital = (current / 2) * (1 + l_return) if enable_short else current * (1 + l_return)
        s_capital = (current / 2) * (1 - s_return) if enable_short else 0
        if logging:
            logger.info(f'current: {current}')
            logger.info(f'l_return: {l_return}')
            logger.info(f's_return: {s_return}')
            logger.info(f'l_capital: {l_capital}')
            logger.info(f's_capital: {s_capital}')
        current = l_capital + s_capital
        # print('doing trade on ' + str(row['date']))
        # print('current: ' + str(current))
        dates.append(row['date'])
        capitals.append(current)
        if logging:
            logger.info(f'Final Capital: {current}')

    data = {
        'date': np.array(dates),
        'capital': np.array(capitals),
    }
    return data

In [None]:
MODEL_DATA_FOLDER_PATH = os.path.abspath('./portfolio')

for source_file in tqdm(os.listdir(MODEL_DATA_FOLDER_PATH)):
    if source_file.endswith('.csv'):
        file_name = source_file.replace('.csv', '')
        print('Current in: ' + file_name)
        skipping = 60 if 'r60ma' in file_name else 20 if 'r20ma' in file_name else 5
        # print('skipping: ' + str(skipping))
        df = pd.read_csv(MODEL_DATA_FOLDER_PATH + '/' + source_file, parse_dates=['date'])
        for threshold in [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]:
            portfolio_result = portfolio_analysis(df, 200000, threshold, True, skipping, True)
            ts = pd.Series(portfolio_result['capital'], index=portfolio_result['date'])
            ts.plot();
            plt.savefig(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + '.png')
            plt.clf()
            pd.DataFrame.from_dict(portfolio_result).to_csv(MODEL_DATA_FOLDER_PATH + '/output/' + file_name + '-' + str(threshold) + 'portfolio_result.csv')
        

In [None]:
# use this to inspect one csv
DATA_PATH = os.path.abspath('./portfolio/hsi-i60r60ma50-1720459342.csv')
df = pd.read_csv(DATA_PATH, parse_dates=['date'])
threshold = 0.7

portfolio_result = portfolio_analysis(df, 200000, threshold, True, 60)
ts = pd.Series(portfolio_result['capital'], index=portfolio_result['date'])
ts.plot();
plt.savefig('./inspecting_model.png')
plt.clf()
pd.DataFrame.from_dict(portfolio_result).to_csv('./inspecting_model_portfolio_result.csv')