In [1]:
import pandas as pd
import numpy as np
import os, itertools
os.environ['MPLCONFIGDIR'] = os.getcwd() + "/configs/"
import matplotlib.pyplot as plt

import datetime
from sklearn.preprocessing import MinMaxScaler
from collections import Counter

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
IMAGE_SIZE = 50

In [3]:
# current working directory from which notebook is located
cur_dir = os.getcwd()
# the data is located in this data_dir
data_dir = os.path.join(cur_dir, 'data/3k')
images_dir = os.path.join(data_dir, 'images')

print("Notebook directory: ", cur_dir)
print("Images directory: ", images_dir)

Notebook directory:  /shared
Images directory:  /shared/data/3k/images


In [4]:
symbols_TSX = pd.read_csv('symbols_TSX.csv', header=None)
symbols_TSX.columns = ['Name']
symbol_list = symbols_TSX['Name'].tolist()
print('Stock symbol_list length: ', len(symbol_list))

Stock symbol_list length:  93


In [5]:
def db_one_symbol(symbol, lowest_date, highest_date):
    query = f"SELECT * FROM daily_price WHERE symbol='{symbol}' "
    df = pd.read_sql(query,engine)
    df.columns = ['Date', 'Symbol', 'Adj_close', 'Close', 'High', 'Low', 'Open','Volume']
    df['Date'] = pd.to_datetime(df['Date']).dt.date
    df.set_index(['Date'],inplace=True)
    df = df.sort_index()
    df.drop(['Symbol'], axis=1, inplace=True)
    df = df.loc[lowest_date:highest_date]
    return df


In [6]:
# REQUIREMENT:  TA-LIB Package to be installed AS ROOT
# https://github.com/mrjbq7/ta-lib

# docker exec --user="root" -it <container_name> /bin/bash
#!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
#!tar -xzf ta-lib-0.4.0-src.tar.gz
# !cd ta-lib/
# !./configure --prefix=/usr
# !make
# !make install
# !pip install ta-lib

In [7]:
def calculate_log_return_series(series:pd.Series, t) -> pd.Series:
    """
    calculate  log returns over past timeperiod t in days
    """
    shifted_series = series.shift(t, axis=0)
    return pd.Series(np.log(series / shifted_series))

def calculate_DollarValue(price:pd.Series, volume:pd.Series) -> pd.Series:
    DollarVolume = price * volume /1000000
    return pd.Series(DollarVolume)


In [8]:
import talib as tb

def _create_ta_indicators(data):
    '''Calculating the features ie. Technical Indicator Values
        Parameters: pandas price dataframe in OHLCV column names '''

    imageList = []
    labelList = []
    axes = ['Date', 'Value']
   
    rsi = tb.RSI(data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    wma = tb.WMA(data["Close"], timeperiod=30).to_frame().reset_index().set_axis(axes, axis=1)
    ema = tb.EMA(data["Close"], timeperiod=30).to_frame().reset_index().set_axis(axes, axis=1)
    sma = tb.SMA(data["Close"], timeperiod=30).to_frame().reset_index().set_axis(axes, axis=1)
    roc = tb.ROC(data["Close"], timeperiod=10).to_frame().reset_index().set_axis(axes, axis=1)
    cmo = tb.CMO(data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    cci = tb.CCI(data["High"], data["Low"], data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    ppo = tb.PPO(data["Close"], fastperiod=12, slowperiod=26, matype=0).to_frame().reset_index().set_axis(axes, axis=1)
    tema = tb.TEMA(data["Close"], timeperiod=30).to_frame( ).reset_index().set_axis(axes, axis=1)
    willr = tb.WILLR(data["High"], data["Low"], data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    macd, macdsignal, macdhist = tb.MACD( data["Close"], fastperiod=12, slowperiod=26, signalperiod=9)
    macd = macd.to_frame().reset_index().set_axis(axes, axis=1)
    sar = tb.SAR(data["High"], data["Low"], acceleration=0, maximum=0).to_frame().reset_index().set_axis(axes, axis=1)
    adx = tb.ADX(data["High"], data["Low"], data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    std = tb.STDDEV(data['Close'], timeperiod=5, nbdev=1).to_frame( ).reset_index().set_axis(axes, axis=1)
    obv = tb.OBV(data['Close'], data['Volume']).to_frame( ).reset_index().set_axis(axes, axis=1)
    adxr = tb.ADXR(data["High"], data["Low"], data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    apo = tb.APO(data['Close'], fastperiod=12, slowperiod=26, matype=0).to_frame().reset_index().set_axis(axes, axis=1)
    aroondown, aroonup = tb.AROON(data["High"], data["Low"], timeperiod=14)
    aroondown = aroondown.to_frame().reset_index().set_axis(axes, axis=1)
    aroonup = aroonup.to_frame().reset_index().set_axis(axes, axis=1)
    aroonosc = tb.AROONOSC(data["High"], data["Low"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    bop = tb.BOP(data["Open"], data["High"], data["Low"], data["Close"]).to_frame( ).reset_index().set_axis(axes, axis=1)
    dx = tb.DX(data["High"], data["Low"], data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    macdext, macdextsignal, macdexthist = tb.MACDEXT( data["Close"], fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)
    macdext = macdext.to_frame().reset_index().set_axis(axes, axis=1)
    macdfix, macdfixsignal, macdfixhist = tb.MACDFIX( data["Close"], signalperiod=9)
    macdfix = macdfix.to_frame().reset_index().set_axis(axes, axis=1)
    mfi = tb.MFI(data["High"], data["Low"], data["Close"], data["Volume"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    minus_di = tb.MINUS_DI(data["High"], data["Low"], data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    minus_dm = tb.MINUS_DM(data["High"], data["Low"], timeperiod=14).to_frame( ).reset_index().set_axis(axes, axis=1)
    mom = tb.MOM(data["Close"], timeperiod=10).to_frame( ).reset_index().set_axis(axes, axis=1)
    plus_di = tb.PLUS_DI(data["High"], data["Low"], data["Close"], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    plus_dm = tb.PLUS_DM(data["High"], data["Low"], timeperiod=14).to_frame( ).reset_index().set_axis(axes, axis=1)
    rocp = tb.ROCP(data["Close"], timeperiod=10).to_frame( ).reset_index().set_axis(axes, axis=1)
    rocr = tb.ROCR(data["Close"], timeperiod=10).to_frame( ).reset_index().set_axis(axes, axis=1)
    rocr100 = tb.ROCR100(data["Close"], timeperiod=10).to_frame( ).reset_index().set_axis(axes, axis=1)
    slowk, slowd = tb.STOCH(data["High"], data["Low"], data["Close"], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    slowk = slowk.to_frame().reset_index().set_axis(axes, axis=1)
    slowd = slowd.to_frame().reset_index().set_axis(axes, axis=1)
    fastk, fastd = tb.STOCHF( data["High"], data["Low"], data["Close"], fastk_period=5, fastd_period=3, fastd_matype=0)
    fastk = fastk.to_frame().reset_index().set_axis(axes, axis=1)
    fastd = fastd.to_frame().reset_index().set_axis(axes, axis=1)
    fastkrsi, fastdrsi = tb.STOCHRSI( data["Close"], timeperiod=14, fastk_period=5, fastd_period=3, fastd_matype=0)
    fastkrsi = fastkrsi.to_frame().reset_index().set_axis(axes, axis=1)
    fastdrsi = fastdrsi.to_frame().reset_index().set_axis(axes, axis=1)
    trix = tb.TRIX(data["Close"], timeperiod=30).to_frame( ).reset_index().set_axis(axes, axis=1)
    ultosc = tb.ULTOSC(data["High"], data["Low"], data["Close"], timeperiod1=7, timeperiod2=14, timeperiod3=28).to_frame().reset_index().set_axis(axes, axis=1)
    bbands_upperband, bbands_middleband, bbands_lowerband = tb.BBANDS( data['Close'], timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)
    bbands_upperband = bbands_upperband.to_frame().reset_index().set_axis(axes, axis=1)
    bbands_middleband = bbands_middleband.to_frame().reset_index().set_axis(axes, axis=1)
    bbands_lowerband = bbands_lowerband.to_frame().reset_index().set_axis(axes, axis=1)
    dema = tb.DEMA(data['Close'], timeperiod=30).to_frame( ).reset_index().set_axis(axes, axis=1)
    ht_trendline = tb.HT_TRENDLINE( data['Close']).to_frame().reset_index().set_axis(axes, axis=1)
    kama = tb.KAMA(data['Close'], timeperiod=30).to_frame( ).reset_index().set_axis(axes, axis=1)
    ma = tb.MA(data['Close'], timeperiod=30, matype=0).to_frame( ).reset_index().set_axis(axes, axis=1)
    midpoint = tb.MIDPOINT(data['Close'], timeperiod=14).to_frame( ).reset_index().set_axis(axes, axis=1)
    midprice = tb.MIDPRICE(data["High"], data["Low"], timeperiod=14).to_frame( ).reset_index().set_axis(axes, axis=1)
    sarext = tb.SAREXT(data["High"], data["Low"], startvalue=0, offsetonreverse=0, accelerationinitlong=0, accelerationlong=0, accelerationmaxlong=0, accelerationinitshort=0, accelerationshort=0, accelerationmaxshort=0).to_frame().reset_index().set_axis(axes, axis=1)
    trima = tb.TRIMA(data['Close'], timeperiod=30).to_frame( ).reset_index().set_axis(axes, axis=1)
    ad = tb.AD(data["High"], data["Low"], data['Close'], data['Volume']).to_frame().reset_index().set_axis(axes, axis=1)
    adosc = tb.ADOSC(data["High"], data["Low"], data['Close'], data['Volume'], fastperiod=3, slowperiod=10).to_frame().reset_index().set_axis(axes, axis=1)
    trange = tb.TRANGE(data["High"], data["Low"], data['Close']).to_frame( ).reset_index().set_axis(axes, axis=1)
    avgprice = tb.AVGPRICE(data['Open'], data["High"], data["Low"], data['Close']).to_frame().reset_index().set_axis(axes, axis=1)
    medprice = tb.MEDPRICE(data["High"], data["Low"]).to_frame( ).reset_index().set_axis(axes, axis=1)
    typprice = tb.TYPPRICE(data["High"], data["Low"], data['Close']).to_frame( ).reset_index().set_axis(axes, axis=1)
    wclprice = tb.WCLPRICE(data["High"], data["Low"], data['Close']).to_frame( ).reset_index().set_axis(axes, axis=1)
    beta = tb.BETA(data["High"], data["Low"], timeperiod=5).to_frame( ).reset_index().set_axis(axes, axis=1)
    correl = tb.CORREL(data["High"], data["Low"], timeperiod=30).to_frame( ).reset_index().set_axis(axes, axis=1)
    linearreg = tb.LINEARREG(data['Close'], timeperiod=14).to_frame( ).reset_index().set_axis(axes, axis=1)
    linearreg_angle = tb.LINEARREG_ANGLE( data['Close'], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    linearreg_intercept = tb.LINEARREG_INTERCEPT( data['Close'], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    linearreg_slope = tb.LINEARREG_SLOPE( data['Close'], timeperiod=14).to_frame().reset_index().set_axis(axes, axis=1)
    tsf = tb.TSF(data['Close'], timeperiod=14).to_frame( ).reset_index().set_axis(axes, axis=1)
    var = tb.VAR(data['Close'], timeperiod=5, nbdev=1).to_frame().reset_index().set_axis(axes, axis=1)
    logReturn_1d = calculate_log_return_series(data['Adj_close'],1)
    logReturn_5d = calculate_log_return_series(data['Adj_close'],5)
    logReturn_21d = calculate_log_return_series(data['Adj_close'],21)
    logReturn_Qtr = calculate_log_return_series(data['Adj_close'],60)
    logReturn_6m = calculate_log_return_series(data['Adj_close'],125)
    logReturn_1y = calculate_log_return_series(data['Adj_close'],220)
    # DollarVolume = calculate_DollarValue(data['Adj_close'],data['Volume']).reset_index().set_axis(axes, axis=1)
    
    indicators = [rsi, cmo, plus_di, minus_di, logReturn_1d, logReturn_5d, logReturn_21d, logReturn_Qtr, logReturn_6m, logReturn_1y, willr, cci, ultosc, aroonosc, mfi, mom, macd, macdfix, linearreg_angle, linearreg_slope, rocp, roc, rocr, rocr100, slowk, fastd, slowd, aroonup, aroondown, apo,
                  macdext, fastk, ppo, minus_dm, adosc, fastdrsi, fastkrsi, trange, trix, std, bop, var, plus_dm, correl, ad, beta, wclprice, tsf, typprice, avgprice, medprice, bbands_lowerband, linearreg, obv,
                  bbands_middleband, tema, bbands_upperband, dema, midprice, midpoint, wma, ema, ht_trendline, kama, sma, ma, adxr, adx, trima, linearreg_intercept, dx]

    indicatorValues = []
    frame = data
    for indicator in indicators:
        frame = pd.merge(frame,indicator,on='Date')
    frame1_columns = ['Date', 'Adj_close', 'Close', 'High', 'Low', 'Open', 'Volume']
    frame2_columns = ['rsi','cmo','plus_di','minus_di', 'logReturn_1d', 'logReturn_5d', 'logReturn_21d', 'logReturn_Qtr', 'logReturn_6m', 'logReturn_1y','willr', 'cci','ultosc','aroonosc','mfi','mom','macd','macdfix','linearreg_angle','linearreg_slope','rocp','roc','rocr','rocr100','slowk','fastd','slowd','aroonup','aroondown','apo','macdext','fastk','ppo','minus_dm','adosc','fastdrsi','fastkrsi','trange','trix','std','bop','var','plus_dm','correl','ad','beta','wclprice','tsf','typprice','avgprice','medprice','bbands_lowerband','linearreg','obv','bbands_middleband','tema','bbands_upperband','dema','midprice','midpoint','wma','ema','ht_trendline','kama','sma','ma','adxr','adx','trima','linearreg_intercept','dx']
    frame.columns= frame1_columns + frame2_columns
    frame.set_index(['Date'],inplace=True)
    return frame

In [9]:
def labeling(df, windowSize):
    ''' Labels price data series with Buy/Sell/Hold labels '''
    label_map = {'Hold': 0, 'Buy': 1, 'Sell': 2}
    roll = df['Adj_close'].rolling(windowSize, center=True)
    df['Label'] = roll.apply(lambda x: label_map['Buy'] if np.argmin(x) == windowSize // 2 else (label_map['Sell'] if np.argmax(x) == windowSize // 2 else label_map['Hold']), raw=True)
    for label in label_map:
        df['Label'].replace(label_map[label], label, inplace=True)
    df.dropna(inplace=True)
    #dates = pd.to_datetime(df.index)
    return df

In [12]:
def extend_labels(df):
    '''  Changes the label of the day before and the day after a Buy or Sell price peak or bottom '''
    frame = df.copy()
    before = 1
    after = 1 
    start_row = 3 
    last_row = len(frame)-2
    for i in range(start_row, last_row):
        if frame.iloc[i, int(frame.columns.get_loc('Label'))] == 'Buy':
            frame.iloc[(i-before), int(frame.columns.get_loc('Label'))] = 'Buy'
        elif frame.iloc[i, int(frame.columns.get_loc('Label'))] == 'Buy':
            frame.iloc[(i+after), frame.columns.get_loc('Label')] = 'Buy'
        elif frame.iloc[i, frame.columns.get_loc('Label')] == 'Sell':
            frame.iloc[(i-before), frame.columns.get_loc('Label')] = 'Sell'
        elif frame.iloc[i, frame.columns.get_loc('Label')] == 'Sell':
            frame.iloc[(i+after), frame.columns.get_loc('Label')] = 'Sell'
        else:
            pass
    return frame                

In [13]:
def _scale_Xs_only(df):
    ''' This is to scale data before training test split. Should scale after split to prevent leakage '''
    features = df.iloc[:, :-1].columns
    scaler = MinMaxScaler()
    Xs = df.drop(['Label'], axis=1)
    Xs.dropna(inplace=True)
    ys = df[['Label']]
    Xs.values[:] = scaler.fit_transform(Xs)
    df = pd.merge(Xs, ys, on="Date", how="outer")
    return df

In [14]:
# !pip install sqlalchemy
from sqlalchemy import create_engine
from PIL import Image

engine = create_engine('sqlite:///2000_db_db')

def _make_images(df, directory, sym):
    ''' Create image files from numerical data. 
        ys:Looks for column called Labels.
        Xs ys saved as numpy arrays and Images saved to three diffeerent directories '''
    frame = df.copy()    
    df = df.iloc[0:, 7:(IMAGE_SIZE+7)]
    df.sort_index(ascending=False, inplace=True)
    start_row = 0
    last_row = len(df)-(IMAGE_SIZE+1)
    image_list = []
    image_labels = []
    holds_counter = 0
    count = 0
    for i in range(start_row, last_row):
        img = df.iloc[i:i+IMAGE_SIZE,:]
        img = img.to_numpy()
        # img = np.round(img)
        # img = img.astype(np.uint8)
        numerical = img
        img = Image.fromarray(img, 'L') 
        if frame.iloc[i, frame.columns.get_loc('Label')] == 'Buy':
            # img.save(os.path.join(directory+'/Buy','buy_'+sym+'_'+str(i)+'.png'))
            image_list.append(numerical)            
            image_labels.append(2)
        elif frame.iloc[i, frame.columns.get_loc('Label')] == 'Sell':
            # img.save(os.path.join(directory+'/Sell', 'sell_'+sym+'_'+str(i)+'.png'))
            image_list.append(numerical)            
            image_labels.append(0)
        else:
            if holds_counter <=200:
                # img.save(os.path.join(directory+'/Hold','hold_'+sym+'_'+str(i)+'.png'))
                image_list.append(numerical)            
                image_labels.append(1)
                holds_counter += 1
            else:
                pass
        count +=1

    print(f'Image files processed for:{sym} {count}' )
    return image_list, image_labels

In [15]:
lowest_date, highest_date = datetime.date(year=2000,month=1,day=10), datetime.date(year=2022,month=12,day=15)
Xs = []
ys= []
count = 0
errors = 0

for symbol in symbol_list:
    OHLCV = db_one_symbol(symbol,lowest_date, highest_date)
    if pd.isnull(OHLCV).sum().max()>0:
        errors=+1
        print("Error:",symbol)
        pass
    else:
        frame = _create_ta_indicators(OHLCV)
        frame.dropna(inplace=True)
        single_stock_data = labeling(frame,31)
        single_stock_data = extend_labels(single_stock_data)        
        # single_stock_data = _scale_Xs_only(single_stock_data)
        single_stock_data, labels = _make_images(single_stock_data, images_dir, symbol)
        Xs.extend(single_stock_data)
        ys.extend(labels)
        count +=1
        print("Remaining:", len(symbol_list)-count)
print("Stocks Processed: ",count)
print("Errors: ",errors)

Image files processed for:RY.TO 5462
Remaining: 92


Image files processed for:TD.TO 5462
Remaining: 91


Image files processed for:CNR.TO 5462
Remaining: 90


Image files processed for:ENB.TO 5463
Remaining: 89


Image files processed for:CP.TO 5052
Remaining: 88


Image files processed for:CNQ.TO 5463
Remaining: 87


Image files processed for:BMO.TO 5462
Remaining: 86


Image files processed for:BNS.TO 5462
Remaining: 85


Image files processed for:TRI.TO 3347
Remaining: 84


Image files processed for:SHOP.TO 1601
Remaining: 83


Image files processed for:ATD.TO 5462
Remaining: 82


Image files processed for:TRP.TO 5462
Remaining: 81


Image files processed for:BCE.TO 5463
Remaining: 80


Image files processed for:SU.TO 5463
Remaining: 79


Image files processed for:NTR.TO 945
Remaining: 78


Image files processed for:CM.TO 5463
Remaining: 77


Image files processed for:CVE.TO 2993
Remaining: 76


Image files processed for:MFC.TO 5462
Remaining: 75


Image files processed for:CSU.TO 3500
Remaining: 74


Image files processed for:QSR.TO 1709
Remaining: 73


Image files processed for:L.TO 5463
Remaining: 72


Image files processed for:IMO.TO 5462
Remaining: 71


Image files processed for:SLF.TO 5409
Remaining: 70


Image files processed for:FNV.TO 3473
Remaining: 69


Image files processed for:IFC.TO 4222
Remaining: 68


Image files processed for:AEM.TO 5462
Remaining: 67


Image files processed for:NA.TO 5462
Remaining: 66


Image files processed for:RCI-B.TO 5462
Remaining: 65


Image files processed for:GIB-A.TO 5462
Remaining: 64


Image files processed for:GWO.TO 4835
Remaining: 63


Image files processed for:FTS.TO 5462
Remaining: 62


Image files processed for:WN.TO 5463
Remaining: 61


Image files processed for:PPL.TO 4518
Remaining: 60


Image files processed for:TECK-B.TO 5500
Remaining: 59


Image files processed for:TOU.TO 2727
Remaining: 58


Image files processed for:WPM.TO 4229
Remaining: 57


Image files processed for:MGA.TO 4339
Remaining: 56


Image files processed for:DOL.TO 3008
Remaining: 55


Image files processed for:POW.TO 5462
Remaining: 54


Image files processed for:H.TO 1477
Remaining: 53


Image files processed for:FM.TO 5340
Remaining: 52


Image files processed for:WSP.TO 1945
Remaining: 51


Image files processed for:FFH.TO 5462
Remaining: 50


Image files processed for:MRU.TO 5462
Remaining: 49


Image files processed for:IVN.TO 2251
Remaining: 48


Image files processed for:EMA.TO 5462
Remaining: 47


Image files processed for:SAP.TO 5462
Remaining: 46


Image files processed for:GFL.TO 401
Remaining: 45


Image files processed for:CCO.TO 5462
Remaining: 44


Image files processed for:TFII.TO 4517
Remaining: 43


Image files processed for:ARX.TO 5462
Remaining: 42


Image files processed for:CCL-B.TO 5463
Remaining: 41


Image files processed for:CHP-UN.TO 2071
Remaining: 40


Image files processed for:CU.TO 5311
Remaining: 39


Image files processed for:EMP-A.TO 5462
Remaining: 38


Image files processed for:NPI.TO 4518
Remaining: 37


Image files processed for:IGM.TO 5462
Remaining: 36


Image files processed for:CAE.TO 5463
Remaining: 35


Image files processed for:CTC.TO 5462
Remaining: 34


Image files processed for:WFG.TO 5464
Remaining: 33


Image files processed for:TRQ.TO 5508
Remaining: 32


Image files processed for:RBA.TO 4372
Remaining: 31


Image files processed for:TIH.TO 5462
Remaining: 30


Image files processed for:DOO.TO 2102
Remaining: 29


Image files processed for:IAG.TO 5439
Remaining: 28


Image files processed for:DSG.TO 5462
Remaining: 27


Image files processed for:X.TO 4524
Remaining: 26


Image files processed for:CAR-UN.TO 5462
Remaining: 25


Image files processed for:FSV.TO 1597
Remaining: 24


Image files processed for:EFN.TO 2459
Remaining: 23


Image files processed for:K.TO 4835
Remaining: 22


Image files processed for:STN.TO 5462
Remaining: 21


Image files processed for:YRI.TO 5462
Remaining: 20


Image files processed for:TIXT.TO 169
Remaining: 19


Image files processed for:GIL.TO 5462
Remaining: 18


Image files processed for:ALA.TO 4366
Remaining: 17


Image files processed for:AC.TO 2889
Remaining: 16


Image files processed for:AQN.TO 4518
Remaining: 15


Image files processed for:KEY.TO 4518
Remaining: 14


Image files processed for:LUN.TO 5462
Remaining: 13


Image files processed for:QBR-B.TO 5462
Remaining: 12


Image files processed for:REI-UN.TO 5462
Remaining: 11


Image files processed for:WCP.TO 4954
Remaining: 10


Image files processed for:ACO-X.TO 5464
Remaining: 9


Image files processed for:CPX.TO 3051
Remaining: 8


Image files processed for:PKI.TO 5462
Remaining: 7


Image files processed for:ONEX.TO 5462
Remaining: 6


Image files processed for:CIGI.TO 5462
Remaining: 5


Image files processed for:ATZ.TO 1257
Remaining: 4


Image files processed for:FTT.TO 5462
Remaining: 3


Image files processed for:AGI.TO 4518
Remaining: 2


Image files processed for:PSK.TO 1846
Remaining: 1


Image files processed for:BBD-B.TO 5508
Remaining: 0
Stocks Processed:  93
Errors:  0


In [14]:
print("Class label count:",sorted(Counter(ys).items()))

Class label count: [(0, 17856), (1, 18647), (2, 17725)]


In [15]:
np.save('Xs.npy',Xs)
np.save('ys.npy', ys)
