# Portfolio

### Setup

In [None]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from tqdm.notebook import tqdm
from torchvision.transforms import transforms

In [None]:
# configs

device = torch.device('cpu')
MODEL_PTH_PATH = os.path.abspath('../../../dataset/nikkei_mid_small_cap-i5r5ma50/nikkei_mid_small_cap-i5r5ma50-1719447843.pth')
IMAGE_FOLDER = os.path.abspath('../../../dataset/nikkei_mid_small_cap-i5r5ma50/input')
RETURN_FOLDER = os.path.abspath('../../../dataset/nikkei_mid_small_cap-i5r5ma50/return')

# get the stock ticker from the folder
RAW_DATA_FOLDER_PATH = os.path.abspath('../../../dataset/market_data/output/nikkei_mid_small_cap')
# get the date from the index data
INDEX_DATA = 'INDEX_DAILY_NIKKEI_MID_SMALL_CAP.csv'
START_DATE = '2014-04-01'
RETURN_DATA_TYPE = 'r5'


In [None]:
# load model and predict function

# copy from cnn_model
class I5_CNNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, (5, 3), padding=(2, 1), stride=(1, 1), dilation=(1, 1))
        self.conv2 = nn.Conv2d(64, 128, (5, 3), padding=(2, 1), stride=(1, 1), dilation=(1, 1))
        self.bn1 = nn.BatchNorm2d(64, affine=True)
        self.bn2 = nn.BatchNorm2d(128, affine=True)
        self.pool = nn.MaxPool2d((2, 1))
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(15360, 3)
        self.init_weights(self.conv1)
        self.init_weights(self.conv2)
        self.init_weights(self.fc)
        
    def init_weights(self, m):
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
            nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)
            
    def forward(self, x):
        x = self.pool(F.leaky_relu(self.bn1(self.conv1(x))))
        x = self.pool(F.leaky_relu(self.bn2(self.conv2(x))))
        x = self.dropout(x.view(x.shape[0], -1))
        x = self.fc(x)
        x = F.softmax(x, dim=1)
        return x

def load_model(
        _model_pth_path: str,
    ):
    _model = I5_CNNet().to(device)
    state_dict = torch.load(_model_pth_path, map_location=device)
    _model.load_state_dict(state_dict['model_state_dict'])
    _model.eval() # need this line to fix the value
    return _model

# return an array: [p|0, p|1, p|2]
def predict(
        _model,
        _image,
        _transformer,
    ):
    with torch.no_grad():
        _image_tensor = _transformer(_image)
        _image_tensor.unsqueeze_(0)
        output = _model(_image_tensor)
    return output.tolist()[0]


### Generate Portfolio Raw Data

In [None]:
# 1. get a list of date

df = pd.read_csv(RAW_DATA_FOLDER_PATH + "/" + INDEX_DATA, parse_dates=['date'])
df = df.loc[df['date'] > START_DATE].filter(regex='date') #get the date
df = df.sort_values(by=['date'], ascending=True).reset_index(drop=True)
df.head(3)

In [None]:
# 2. get stocks list from raw data file name
stocks = []
for source_file in os.listdir(RAW_DATA_FOLDER_PATH):
    if source_file == INDEX_DATA:
        # ignoring index data, although the CONSTITUENTS marks should already filter it
        continue
    if source_file.startswith('CONSTITUENTS'):
        ticker = source_file.replace('CONSTITUENTS_DAILY_', '').replace('.csv', '')
        stocks.append(ticker)

print("Total stocks: #" + str(len(stocks)))
print("eg: " + str(stocks[0:3]))

In [None]:
# 3. loop stocks and get prediction using the trained model

model = load_model(MODEL_PTH_PATH)
transformer = transforms.Compose([transforms.Grayscale(), transforms.ToTensor()])

data = {}

for stock in tqdm(stocks):
    try:
        return_df = pd.read_pickle(RETURN_FOLDER + '/RETURN_' + stock + '.pkl')
    except:
        print("Return data not found: " + stock)
        continue
    results = []
    for index, date in tqdm(df['date'].items()):
        try:
            image = Image.open(IMAGE_FOLDER + f'/{stock}_{date.date()}.png')
            prediction = predict(model, image, transformer)
            return_value = return_df.loc[return_df['date'] == str(date.date())][RETURN_DATA_TYPE]
            prediction.append((float(return_value.iloc[0])))
            results.append(prediction)
        except:
            print('error on: ' + stock + ' ' + str(date.date()))
            results.append(None)
    data[stock] = results

df = df.assign(**data)

df.to_csv('out.csv') # TODO: update name


In [None]:
print(df.head(10))

### Portfolio Testing

In [None]:
# Loop dataframe to get portfolio result

# TODO: import the df to save time rerunning the code

def portfolio_analysis(
        _data: pd.DataFrame,
        initial_capital = 200000,
        threshold = 0.7,
    ):
    meta = []
    current = initial_capital
    for index, row in tqdm(_data.iterrows()):
        s_return = 0
        s_count = 0
        l_return = 0
        l_count = 0
        for i, v in row.items():
            if (i == 'date'):
                continue
            if (type(v) is list):
                # short
                if v[0] >= threshold:
                    s_return = (s_return * s_count + v[3]) / (s_count + 1)
                    s_count += 1
                # long
                if v[2] >= threshold:
                    l_return = (l_return * l_count + v[3]) / (l_count + 1)
                    l_count += 1
        # skip if no short or long
        if (s_count == 0 or l_count == 0):
            continue
        l_capital = (current / 2) * (1 + l_return)
        s_capital = (current / 2) * (1 - s_return)
        current = l_capital + s_capital
        print('doing trade on ' + str(row['date']))
        print('current: ' + str(current))

    return current

portfolio_analysis(df)