In [10]:
import numpy as np
import pandas as pd
import gym
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, accuracy_score

from datetime import datetime, timedelta

# Read closing prices for stocks
daily_prices = pd.read_csv('../data/Untitled Folder/price.csv', index_col=0)
daily_prices['Dates'] = pd.to_datetime(daily_prices['Dates'])
daily_prices = daily_prices.set_index('Dates')

LABEL = "PX_OPEN"
# LABEL = "PX_LAST"
LABEL_ADJUSTED = LABEL + '_ADJUSTED'
# SPLIT_YEAR = 2010
SPLIT_YEAR = 2018

name = str(SPLIT_YEAR) + 'split_open_prices_with_sp'
# name = str(SPLIT_YEAR) + 'split_open_prices'
# name = str(SPLIT_YEAR) + 'split'
# name = "olivier"

In [11]:
symbols = ["AMM", "CIMB", "DIGI", "GAM", "GENM", "GENT", "HLBK", "IOI", "KLK", "MAY", "MISC", "NESZ", "PBK", "PEP", "PETD", "PTG", "RHBBANK", "ROTH", "T", "TNB"]

# Change from closing prices to opening prices
for stock in symbols:
    df = pd.read_csv(f'../data/Day Data with Volatility/{stock} MK Equity.csv')
    df['Dates'] = pd.to_datetime(df['Dates'])
    df = df.set_index('Dates')
    daily_prices[stock] = df[LABEL]

In [12]:
# print("TRAIN PREDICTIONS | r^2 | meanstd model acc | best model acc")
print("TRAIN PREDICTIONS: model acc")
print("="*60)
for sym in daily_prices:
    
    df = pd.read_csv(f'../data/directions/{name}/Directions {sym}.csv')
    df['Dates'] = pd.to_datetime(df.Dates)
    df = df.set_index('Dates')
    
    # Get model predictions corresponding to training data
    train = df[:datetime(SPLIT_YEAR,1,1)]
    
    # Get prices corresponding to training data. .loc[train.index] because in the process of adding US stock prices
    # as features, we lost a few trading days
    subset = daily_prices[datetime(2000,4,24):datetime(SPLIT_YEAR,1,1)].loc[train.index]
    
    # Percent change between consecutive (actual) prices
    y = subset[sym].values[1:]/subset[sym].values[:-1] - 1
    
    # Get the 10 model predictions, after dropping the 1st data point (because taking the difference in 
    # the previous step meant that the first point is unusable)
    arrs = train[[f'MODEL_{i+1}' for i in range(10)]].values[1:]
    
    # get indices where all models agree?
    concur_mask = np.array([np.all(arr == arr[0]) for arr in arrs])    
    
    change_mask = y != 0
    avg = train['AVG'].values[1:]
    print(accuracy_score(np.sign(y[change_mask]), np.sign(avg[change_mask])))

    
    
    
print("\n\n")
print("EVAL PREDICTIONS: model acc")
print("="*60)
for sym in daily_prices:

    df = pd.read_csv(f'../data/directions/{name}/Directions {sym}.csv')
    df['Dates'] = pd.to_datetime(df.Dates)
    df = df.set_index('Dates')
    
    # Get model predictions corresponding to training data
    train = df[datetime(SPLIT_YEAR,1,1):datetime(2020,1,1)]
    
    # Get prices corresponding to training data. .loc[train.index] because in the process of adding US stock prices
    # as features, we lost a few trading days
    subset = daily_prices[datetime(SPLIT_YEAR,1,1):datetime(2020,1,1)].loc[train.index]
    
    # Percent change between consecutive (actual) prices
    y = subset[sym].values[1:]/subset[sym].values[:-1] - 1
    
    # Get the 10 model predictions, after dropping the 1st data point (because taking the difference in 
    # the previous step meant that the first point is unusable)
    arrs = train[[f'MODEL_{i+1}' for i in range(10)]].values[1:]
    
    # get indices where all models agree?
    concur_mask = np.array([np.all(arr == arr[0]) for arr in arrs])    

    change_mask = y != 0
    
    avg = train['AVG'].values[1:]

    # print(accuracy_score(np.sign(y[change_mask]), np.sign(np.mean(arrs[change_mask], axis = 1))))
    print(accuracy_score(np.sign(y[change_mask]), np.sign(avg[change_mask])))


TRAIN PREDICTIONS: model acc
0.4100052938062467
0.42080956261885355
0.4217005439450329
0.4172901080631754
0.4053760521314146
0.39741170126718794
0.4296429642964296
0.4088752442087636
0.4461361677678828
0.40573414422241527
0.44089732528041414
0.4451966473243069
0.41408114558472553
0.4075856212850269
0.43764501160092806
0.448938611589214
0.4125235151840903
0.40550724637681157
0.42897808141189864
0.4251082251082251



EVAL PREDICTIONS: model acc
0.40714285714285714
0.39518072289156625
0.4305555555555556
0.4018475750577367
0.4099526066350711
0.38963963963963966
0.44819819819819817
0.4386792452830189
0.43875278396436523
0.38028169014084506
0.44269662921348313
0.43537414965986393
0.4
0.4666666666666667
0.47098214285714285
0.42437923250564336
0.417607223476298
0.3969298245614035
0.38850574712643676
0.41935483870967744
