## Install needed libraries

In [None]:
import os
from google.colab import drive
drive.mount('/content/gdrive')

path = "/content/gdrive/MyDrive/Chuyên đề nghiên cứu 1/vnquant_package"
%cd {path}
!ls

In [None]:
!git clone https://github.com/phamdinhkhanh/vnquant
%cd vnquant
!python setup.py install

In [None]:
%cd ..
!rm -rf vnquant
!ls

In [None]:
!pip freeze | grep vnquant

vnquant==0.1.1


In [None]:
import vnquant
vnquant.__version__

'0.1.1'

## Import libraries

In [None]:
import vnquant.data as dt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

## We choose 10 codes from biggest companies on stock exchange in Vietnam

In [None]:
stock_symbols = ['VIC', 'VCB', 'VNM', 'GAS', 'HVN', 'MWG', 'FPT', 'HPG', 'MSN', 'SAB']

## Prepare the dataset

In [None]:
stock = {}
for i in range(len(stock_symbols)):
  data = dt.DataLoader(symbols=stock_symbols[i],
                        start="2014-01-01",
                        end="2024-01-01",
                        data_source="VND")
  stock[stock_symbols[i]] = data.download()

In [None]:
stock[stock_symbols[0]]

Attributes,high,low,open,close,avg,volume
Symbols,VIC,VIC,VIC,VIC,VIC,VIC
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2014-01-02,70.50,69.50,70.00,70.00,69.92,74950.0
2014-01-03,70.50,69.50,70.00,70.50,70.00,91920.0
2014-01-06,70.50,69.50,70.50,70.50,70.11,111760.0
2014-01-07,70.50,70.00,70.50,70.50,70.47,294120.0
2014-01-08,70.50,70.00,70.00,70.00,70.02,109890.0
...,...,...,...,...,...,...
2023-12-25,43.55,43.00,43.10,43.40,43.34,3364500.0
2023-12-26,43.75,43.35,43.40,43.55,43.55,1806700.0
2023-12-27,43.95,43.60,43.65,43.60,43.76,1920500.0
2023-12-28,44.60,43.60,43.60,44.45,44.32,4359700.0


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]] = stock[stock_symbols[i]].ewm(alpha=0.65).mean()

stock[stock_symbols[0]].head()

Attributes,high,low,open,close,avg,volume
Symbols,VIC,VIC,VIC,VIC,VIC,VIC
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2014-01-02,70.5,69.5,70.0,70.0,69.92,74950.0
2014-01-03,70.5,69.5,70.0,70.37037,69.979259,87520.37037
2014-01-06,70.5,69.5,70.339559,70.458404,70.068048,103981.918506
2014-01-07,70.5,69.829951,70.445434,70.485853,70.333297,229454.54508
2014-01-08,70.5,69.941067,70.154373,70.168381,70.128579,151327.251584


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['today', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'].pct_change() * 100

stock[stock_symbols[1]].head()

Attributes,high,low,open,close,avg,volume,today
Symbols,VCB,VCB,VCB,VCB,VCB,VCB,VCB
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2014-01-02,26.9,26.6,26.9,26.7,26.74,333900.0,
2014-01-03,27.196296,26.674074,26.751852,27.07037,26.984444,689055.6,1.387155
2014-01-06,27.334635,26.895416,27.124109,27.090492,27.110458,476379.5,0.074332
2014-01-07,27.707721,27.228392,27.37216,27.492708,27.4863,1727654.0,1.484712
2014-01-08,27.964049,27.471213,27.651724,27.889532,27.730488,1562150.0,1.443379


In [None]:
for i in range(len(stock_symbols)):
  for j in range(1, 6):
    stock[stock_symbols[i]][f'previous{str(j)}', str(stock_symbols[i])] = stock[stock_symbols[i]]['today'].shift(j)

stock[stock_symbols[2]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5
Symbols,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2014-01-02,137.0,135.0,135.0,135.0,135.55,32520.0,,,,,,
2014-01-03,136.259259,135.0,135.0,135.740741,135.527778,31171.851852,0.548697,,,,,
2014-01-06,136.083192,135.0,135.0,135.916808,135.610781,62355.65365,0.129709,0.548697,,,,
2014-01-07,136.028293,135.0,135.659903,135.311804,135.412295,91070.886744,-0.445128,0.129709,0.548697,,,
2014-01-08,136.009806,135.0,135.228701,135.761493,135.534947,56980.762637,0.332336,-0.445128,0.129709,0.548697,,


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['ema50', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(50).mean()
  stock[stock_symbols[i]]['ema21', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(21).mean()
  stock[stock_symbols[i]]['ema14', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(14).mean()
  stock[stock_symbols[i]]['ema5', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(5).mean()

stock[stock_symbols[3]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5
Symbols,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2014-01-02,66.5,65.5,66.5,66.5,66.08,229950.0,,,,,,,1.0,1.0,1.0,1.0
2014-01-03,66.5,65.5,66.5,66.12963,65.976296,249964.814815,-0.556948,,,,,,0.997235,0.997272,0.997304,0.997461
2014-01-06,66.839559,65.839559,66.160441,66.720713,66.270832,200155.093379,0.893825,-0.556948,,,,,1.00405,1.004019,1.003992,1.003844
2014-01-07,67.275386,66.275386,66.714468,66.905015,66.705819,169367.272952,0.276229,0.893825,-0.556948,,,,1.005058,1.004965,1.004886,1.004477
2014-01-08,67.422156,66.422156,67.22776,67.293797,67.061404,177190.635299,0.581096,0.276229,0.893825,-0.556948,,,1.008612,1.008418,1.008254,1.007419


In [None]:
def rsi(X, window=14):
  delta = X.diff(1)

  gains = delta.where(delta > 0, 0)
  losses = -delta.where(delta < 0, 0)

  avg_gains = gains.rolling(window=window, min_periods=1).mean()
  avg_losses = losses.rolling(window=window, min_periods=1).mean()

  rs = avg_gains / avg_losses
  rsi = 100 - (100 / (1 + rs))

  return rsi

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['rsi', str(stock_symbols[i])] = rsi(stock[stock_symbols[i]]['close'])

stock[stock_symbols[4]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi
Symbols,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
2017-01-03,39.2,39.2,39.2,39.2,39.2,700.0,,,,,,,1.0,1.0,1.0,1.0,
2017-01-04,43.496296,43.496296,43.496296,43.496296,43.496296,6750.37037,10.95994,,,,,,1.051412,1.050683,1.050072,1.047008,100.0
2017-01-05,49.067572,45.196604,49.067572,47.437691,48.052971,521133.820034,9.061449,10.95994,,,,,1.09222,1.09038,1.08884,1.081195,100.0
2017-01-06,51.13467,44.406962,51.13467,45.16915,46.837449,342822.959663,-4.78215,9.061449,10.95994,,,,1.02939,1.027717,1.026338,1.01981,78.40766
2017-01-09,47.910199,42.180744,47.910199,42.96764,43.843784,223068.513075,-4.873924,-4.78215,9.061449,10.95994,,,0.983469,0.982441,0.981627,0.978254,64.824189


In [None]:
def macd(X, short_window=12, long_window=29, signal=9):
  short_ema = X.ewm(span=short_window, adjust=False).mean()

  long_ema = X.ewm(span=long_window, adjust=False).mean()

  macd = short_ema - long_ema

  signal = macd.ewm(span=signal, adjust=False).mean()

  return signal

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['macd', str(stock_symbols[i])] = macd(stock[stock_symbols[i]]['close'])

stock[stock_symbols[5]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd
Symbols,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2014-07-14,81.5,81.0,81.0,81.5,81.45,10660.0,,,,,,,1.0,1.0,1.0,1.0,,0.0
2014-07-15,85.574074,85.444444,85.444444,85.574074,85.561111,12748.888889,4.998864,,,,,,1.024138,1.023804,1.023524,1.022119,100.0,0.071035
2014-07-16,90.617148,90.575552,90.575552,90.617148,90.612988,15832.835314,5.893226,4.998864,,,,,1.054212,1.053216,1.05238,1.048204,100.0,0.271165
2014-07-17,96.478966,96.464819,96.464819,96.478966,96.477551,68154.64654,6.468773,5.893226,4.998864,,,,1.088113,1.086065,1.084349,1.075834,100.0,0.644448
2014-07-18,102.700314,102.695411,102.695411,102.700314,102.699823,111480.682755,6.448398,6.468773,5.893226,4.998864,,,1.121361,1.117879,1.114968,1.100672,100.0,1.215521


In [None]:
def obv(X):
  obv = pd.Series(index=X.index)
  obv.iloc[0] = 0

  for i in range(1, len(X)):
    if (X['close'].iloc[i].values > X['close'].iloc[i-1].values):
      obv.iloc[i] = obv.iloc[i-1] + X['volume'].iloc[i].values
    elif (X['close'].iloc[i].values < X['close'].iloc[i-1].values):
      obv.iloc[i] = obv.iloc[i-1] - X['volume'].iloc[i].values
    else:
      obv.iloc[i] = obv.iloc[i-1]

  return obv

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['obv', str(stock_symbols[i])] = obv(stock[stock_symbols[i]])

stock[stock_symbols[6]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv
Symbols,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2014-01-02,47.8,47.2,47.6,47.4,47.55,207470.0,,,,,,,1.0,1.0,1.0,1.0,,0.0,0.0
2014-01-03,47.503704,47.051852,47.377778,47.325926,47.305556,176092.222222,-0.156274,,,,,,0.999226,0.999236,0.999245,0.999289,0.0,-0.001292,-176092.222222
2014-01-06,47.840747,47.016638,47.324958,47.783701,47.34253,261374.244482,0.967282,-0.156274,,,,,1.005851,1.005777,1.005715,1.005393,86.072358,0.00465,85282.02226
2014-01-07,47.945838,47.33561,47.77042,47.596486,47.637817,280917.760455,-0.391797,0.967282,-0.156274,,,,1.001417,1.001344,1.001283,1.000986,63.662614,0.011589,-195635.738195
2014-01-08,47.981229,47.377684,47.528376,47.729469,47.743792,182041.901609,0.279396,-0.391797,0.967282,-0.156274,,,1.003336,1.003232,1.003146,1.002725,69.333942,0.021105,-13593.836586


In [None]:
def emv(X):
  emv = pd.Series(index=X.index)
  emv.iloc[0] = np.nan

  for i in range(1, len(X)):
    dm = 0.5 * ((X['high'].iloc[i].values + X['low'].iloc[i].values) - (X['high'].iloc[i-1].values + X['low'].iloc[i-1].values))
    br = X['volume'].iloc[i].values / (1000000 * (X['high'].iloc[i].values - X['low'].iloc[i].values))
    emv.iloc[i] = dm / br if br != 0 else 0

  return emv

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['emv', str(stock_symbols[i])] = emv(stock[stock_symbols[i]])

In [None]:
stock[stock_symbols[7]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv,emv
Symbols,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2014-01-02,41.5,41.0,41.5,41.1,41.13,276250.0,,,,,,,1.0,1.0,1.0,1.0,,0.0,0.0,
2014-01-03,41.574074,41.0,41.277778,41.47037,41.30037,424864.814815,0.901144,,,,,,1.004441,1.004381,1.00433,1.004076,100.0,0.006458,424864.8,0.050044
2014-01-06,42.134975,41.339559,41.768251,42.033786,41.802666,443856.451613,1.358598,0.901144,,,,,1.011865,1.011663,1.011492,1.010634,100.0,0.026481,868721.3,0.806837
2014-01-07,42.573827,41.907366,42.251134,42.143471,42.302433,337198.725563,0.260945,1.358598,0.901144,,,,1.010731,1.010434,1.010185,1.008961,100.0,0.055928,1205920.0,0.994812
2014-01-08,42.590929,41.967896,42.087035,42.376438,42.418461,350568.775959,0.552796,0.260945,1.358598,0.901144,,,1.01288,1.012467,1.012122,1.010445,100.0,0.093794,1556489.0,0.068984


In [None]:
def mfi(X, window=14):
  combine_price = (X['high'] + X['low'] + X['close']) / 3

  raw_money_flow = combine_price * X['volume']

  flow_direction = (combine_price.diff() > 0).astype(int)

  positive_money_flow = flow_direction * raw_money_flow
  negative_money_flow = (1 - flow_direction) * raw_money_flow

  positive = positive_money_flow.rolling(window=window, min_periods=1).sum()
  negative = negative_money_flow.rolling(window=window, min_periods=1).sum()

  mf = positive / negative
  mfi = 100 - (100 / (1 + mf))

  return mfi

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['mfi', str(stock_symbols[i])] = mfi(stock[stock_symbols[i]])

stock[stock_symbols[8]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv,emv,mfi
Symbols,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,...,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,83.0,81.5,83.0,82.0,82.24,92560.0,,,,,...,,1.0,1.0,1.0,1.0,,0.0,0.0,,0.0
2014-01-03,82.62963,81.5,82.259259,81.62963,81.884444,59922.962963,-0.451671,,,,...,,0.997759,0.997789,0.997814,0.997942,0.0,-0.006458,-59922.962963,-3.490993,0.0
2014-01-06,82.541596,81.5,82.083192,81.881154,81.942547,100389.541596,0.308129,-0.451671,,,...,,1.00055,1.000562,1.000572,1.000614,40.444894,-0.012272,40466.578633,-0.456699,39.671831
2014-01-07,83.174049,81.829951,82.028293,82.289532,82.409397,114122.418543,0.498745,0.308129,-0.451671,,...,,1.004108,1.00406,1.004018,1.003794,64.051241,-0.010242,154588.997176,5.66745,58.494597
2014-01-08,84.040468,82.267783,82.989953,83.733923,83.115498,139748.439155,1.755254,0.498745,0.308129,-0.451671,...,,1.017135,1.016854,1.016614,1.015355,85.033504,0.021847,294337.436332,8.272094,70.060592


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['volume'] = stock[stock_symbols[i]]['volume'] / stock[stock_symbols[i]]['volume'].ewm(5).mean()

stock[stock_symbols[9]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv,emv,mfi
Symbols,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,...,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2016-12-06,132.0,132.0,132.0,132.0,132.0,1.0,,,,,...,,1.0,1.0,1.0,1.0,,0.0,0.0,,0.0
2016-12-07,138.814815,138.814815,138.814815,138.814815,138.814815,0.444413,5.162738,,,,...,,1.024909,1.024565,1.024275,1.022824,100.0,0.118822,802.592593,0.0,21.8999
2016-12-08,147.089983,147.089983,147.089983,147.089983,147.089983,0.640641,5.9613,5.162738,,,...,,1.055156,1.054139,1.053285,1.049022,100.0,0.450786,1739.247941,0.0,38.543338
2016-12-09,156.599192,149.670214,149.670214,156.599192,151.993071,1.329704,6.464892,5.9613,5.162738,,...,,1.088785,1.086712,1.084974,1.076356,100.0,1.065604,4044.108949,18171.910973,60.352065
2016-12-12,167.185317,164.783955,164.783955,167.185317,165.588983,3.025815,6.760013,6.464892,5.9613,5.162738,...,,1.124409,1.120852,1.117878,1.103275,100.0,2.010623,28163.886333,1279.337983,92.07782


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]].replace(0, np.nan, inplace=True)
  stock[stock_symbols[i]] = stock[stock_symbols[i]].dropna()

stock[stock_symbols[8]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv,emv,mfi
Symbols,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,...,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-10,86.183836,84.878233,84.966291,85.496039,85.530877,1.691985,1.10888,0.984626,1.755254,0.498745,...,-0.451671,1.028389,1.027515,1.026775,1.02304,91.258211,0.182444,641409.1,7.509301,82.458817
2014-01-13,86.389389,85.282473,85.638353,85.823688,85.926896,1.115988,0.383232,1.10888,0.984626,1.755254,...,0.308129,1.027883,1.02673,1.025759,1.020983,91.885723,0.300566,799909.8,2.129293,85.26809
2014-01-14,86.461292,85.423877,85.873442,85.9383,85.889911,1.005659,0.133544,0.383232,1.10888,0.984626,...,0.498745,1.025651,1.024276,1.023128,1.017645,92.08448,0.427605,942951.9,0.773506,87.130525
2014-01-15,86.811462,85.798367,86.280716,86.303415,86.162976,0.991841,0.424857,0.133544,0.383232,1.10888,...,1.755254,1.026646,1.025042,1.023713,1.017525,92.657436,0.559383,1083744.0,2.60722,88.560048
2014-01-16,88.559029,86.579436,86.748255,88.381215,87.727557,2.087498,2.407553,0.424857,0.133544,0.383232,...,0.984626,1.045982,1.043831,1.042042,1.033657,94.799598,0.722613,1483848.0,6.255457,91.341773


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['trend', str(stock_symbols[i])] = (stock[stock_symbols[i]]['today'].iloc[:] > 0).astype(int)

In [None]:
stock[stock_symbols[7]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema50,ema21,ema14,ema5,rsi,macd,obv,emv,mfi,trend
Symbols,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,...,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-10,43.646068,42.825369,42.904938,43.034537,43.228282,0.938625,0.283827,1.265564,0.552796,0.260945,...,1.020294,1.019467,1.018778,1.015456,100.0,0.201917,2442550.0,1.288501,90.069613,1
2014-01-13,43.551102,43.068934,43.161786,43.207127,43.235901,0.650725,0.401049,0.283827,1.265564,0.552796,...,1.021054,1.020041,1.019201,1.015223,100.0,0.263866,2677948.0,0.152189,90.880198,1
2014-01-14,44.62297,43.089128,43.251632,44.372586,44.109634,1.519863,2.697378,0.401049,0.283827,1.265564,...,1.04251,1.040977,1.039696,1.033504,100.0,0.345179,3313915.0,1.316932,92.551392,1
2014-01-15,45.648068,44.331229,44.388103,45.430434,45.1109,1.244591,2.384013,2.697378,0.401049,0.283827,...,1.059574,1.057294,1.055379,1.046053,100.0,0.452376,3868339.0,2.692464,93.599613,1
2014-01-16,46.851835,45.545942,45.760849,46.775665,46.254326,1.279353,2.961078,2.384013,2.697378,0.401049,...,1.081099,1.077765,1.074957,1.061284,100.0,0.593065,4478934.0,2.586229,94.479057,1


In [None]:
df = {}
for i in range(len(stock_symbols)):
  df[stock_symbols[i]] = stock[stock_symbols[i]][['today', 'previous1', 'previous2', 'previous3', 'previous4', 'previous5', 'volume',
                                                  'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'obv', 'emv', 'mfi', 'trend']]

df[stock_symbols[6]].head()

Attributes,today,previous1,previous2,previous3,previous4,previous5,volume,ema50,ema21,ema14,ema5,rsi,macd,obv,emv,mfi,trend
Symbols,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
2014-01-10,0.720937,0.91484,0.279396,-0.391797,0.967282,-0.156274,1.793705,1.014882,1.014474,1.014128,1.012381,84.028216,0.068142,809354.3,1.121602,80.348491,1
2014-01-13,0.786173,0.720937,0.91484,0.279396,-0.391797,0.967282,1.773739,1.019741,1.019103,1.018563,1.015853,87.047845,0.108682,1505487.0,0.304941,85.635204,1
2014-01-14,0.405809,0.786173,0.720937,0.91484,0.279396,-0.391797,1.587832,1.020951,1.020088,1.019362,1.01578,88.207699,0.157838,2241394.0,0.413767,88.831288,1
2014-01-15,1.200671,0.405809,0.786173,0.720937,0.91484,0.279396,1.961266,1.029477,1.028273,1.027258,1.022292,90.685569,0.219921,3435020.0,1.081338,91.825401,1
2014-01-16,1.069379,1.200671,0.405809,0.786173,0.720937,0.91484,1.475238,1.036284,1.034659,1.03329,1.026655,92.168772,0.295863,4447635.0,0.337539,93.34907,1


In [None]:
stock_symbols

['VIC', 'VCB', 'VNM', 'GAS', 'HVN', 'MWG', 'FPT', 'HPG', 'MSN', 'SAB']

## Logistic Regression

#### VIC

In [None]:
X = df[stock_symbols[0]].loc[:, df[stock_symbols[0]].columns != ('trend', 'VIC')]
y = df[stock_symbols[0]][('trend', 'VIC')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0])

In [None]:
y_test.values[:21]

array([1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.72


#### VCB

In [None]:
X = df[stock_symbols[1]].loc[:, df[stock_symbols[1]].columns != ('trend', 'VCB')]
y = df[stock_symbols[1]][('trend', 'VCB')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0])

In [None]:
y_test.values[:21]

array([0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


#### VNM

In [None]:
X = df[stock_symbols[2]].loc[:, df[stock_symbols[2]].columns != ('trend', 'VNM')]
y = df[stock_symbols[2]][('trend', 'VNM')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.73


#### GAS

In [None]:
X = df[stock_symbols[3]].loc[:, df[stock_symbols[3]].columns != ('trend', 'GAS')]
y = df[stock_symbols[3]][('trend', 'GAS')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1])

In [None]:
y_test.values[:21]

array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.83


#### HVN

In [None]:
X = df[stock_symbols[4]].loc[:, df[stock_symbols[4]].columns != ('trend', 'HVN')]
y = df[stock_symbols[4]][('trend', 'HVN')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.75


#### MWG

In [None]:
X = df[stock_symbols[5]].loc[:, df[stock_symbols[5]].columns != ('trend', 'MWG')]
y = df[stock_symbols[5]][('trend', 'MWG')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [None]:
y_test.values[:21]

array([1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.67


#### FPT

In [None]:
X = df[stock_symbols[6]].loc[:, df[stock_symbols[6]].columns != ('trend', 'FPT')]
y = df[stock_symbols[6]][('trend', 'FPT')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1])

In [None]:
y_test.values[:21]

array([0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


#### HPG

In [None]:
X = df[stock_symbols[7]].loc[:, df[stock_symbols[7]].columns != ('trend', 'HPG')]
y = df[stock_symbols[7]][('trend', 'HPG')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1])

In [None]:
y_test.values[:21]

array([0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.72


#### MSN

In [None]:
X = df[stock_symbols[8]].loc[:, df[stock_symbols[8]].columns != ('trend', 'MSN')]
y = df[stock_symbols[8]][('trend', 'MSN')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0])

In [None]:
y_test.values[:21]

array([1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### SAB

In [None]:
X = df[stock_symbols[9]].loc[:, df[stock_symbols[9]].columns != ('trend', 'SAB')]
y = df[stock_symbols[9]][('trend', 'SAB')]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1])

In [None]:
y_test.values[:21]

array([0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.64


## Random Forest

#### VIC

In [None]:
X = df[stock_symbols[0]][[('volume', stock_symbols[0]), ('ema50', stock_symbols[0]), ('ema21', stock_symbols[0]),
                          ('ema14', stock_symbols[0]), ('ema5', stock_symbols[0]), ('rsi', stock_symbols[0]),
                           ('macd', stock_symbols[0]), ('obv', stock_symbols[0]), ('emv', stock_symbols[0]), ('mfi', stock_symbols[0])]]
y = df[stock_symbols[0]][('trend', stock_symbols[0])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

In [None]:
y_test.values[:21]

array([1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.78


#### VCB

In [None]:
X = df[stock_symbols[1]][[('volume', stock_symbols[1]), ('ema50', stock_symbols[1]), ('ema21', stock_symbols[1]),
                          ('ema14', stock_symbols[1]), ('ema5', stock_symbols[1]), ('rsi', stock_symbols[1]),
                           ('macd', stock_symbols[1]), ('obv', stock_symbols[1]), ('emv', stock_symbols[1]), ('mfi', stock_symbols[1])]]
y = df[stock_symbols[1]][('trend', stock_symbols[1])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0])

In [None]:
y_test.values[:21]

array([0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### VNM

In [None]:
X = df[stock_symbols[2]][[('volume', stock_symbols[2]), ('ema50', stock_symbols[2]), ('ema21', stock_symbols[2]),
                          ('ema14', stock_symbols[2]), ('ema5', stock_symbols[2]), ('rsi', stock_symbols[2]),
                           ('macd', stock_symbols[2]), ('obv', stock_symbols[2]), ('emv', stock_symbols[2]), ('mfi', stock_symbols[2])]]
y = df[stock_symbols[2]][('trend', stock_symbols[2])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0])

In [None]:
y_test.values[:21]

array([0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.80


#### GAS

In [None]:
X = df[stock_symbols[3]][[('volume', stock_symbols[3]), ('ema50', stock_symbols[3]), ('ema21', stock_symbols[3]),
                          ('ema14', stock_symbols[3]), ('ema5', stock_symbols[3]), ('rsi', stock_symbols[3]),
                           ('macd', stock_symbols[3]), ('obv', stock_symbols[3]), ('emv', stock_symbols[3]), ('mfi', stock_symbols[3])]]
y = df[stock_symbols[3]][('trend', stock_symbols[3])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.73


#### HVN

In [None]:
X = df[stock_symbols[4]][[('volume', stock_symbols[4]), ('ema50', stock_symbols[4]), ('ema21', stock_symbols[4]),
                          ('ema14', stock_symbols[4]), ('ema5', stock_symbols[4]), ('rsi', stock_symbols[4]),
                           ('macd', stock_symbols[4]), ('obv', stock_symbols[4]), ('emv', stock_symbols[4]), ('mfi', stock_symbols[4])]]
y = df[stock_symbols[4]][('trend', stock_symbols[4])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
y_test.values[:21]

array([0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.76


#### MWG

In [None]:
X = df[stock_symbols[5]][[('volume', stock_symbols[5]), ('ema50', stock_symbols[5]), ('ema21', stock_symbols[5]),
                          ('ema14', stock_symbols[5]), ('ema5', stock_symbols[5]), ('rsi', stock_symbols[5]),
                           ('macd', stock_symbols[5]), ('obv', stock_symbols[5]), ('emv', stock_symbols[5]), ('mfi', stock_symbols[5])]]
y = df[stock_symbols[5]][('trend', stock_symbols[5])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [None]:
y_test.values[:21]

array([0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


#### FPT

In [None]:
X = df[stock_symbols[6]][[('volume', stock_symbols[6]), ('ema50', stock_symbols[6]), ('ema21', stock_symbols[6]),
                          ('ema14', stock_symbols[6]), ('ema5', stock_symbols[6]), ('rsi', stock_symbols[6]),
                           ('macd', stock_symbols[6]), ('obv', stock_symbols[6]), ('emv', stock_symbols[6]), ('mfi', stock_symbols[6])]]
y = df[stock_symbols[6]][('trend', stock_symbols[6])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1])

In [None]:
y_test.values[:21]

array([1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### HPG

In [None]:
X = df[stock_symbols[7]][[('volume', stock_symbols[7]), ('ema50', stock_symbols[7]), ('ema21', stock_symbols[7]),
                          ('ema14', stock_symbols[7]), ('ema5', stock_symbols[7]), ('rsi', stock_symbols[7]),
                           ('macd', stock_symbols[7]), ('obv', stock_symbols[7]), ('emv', stock_symbols[7]), ('mfi', stock_symbols[7])]]
y = df[stock_symbols[7]][('trend', stock_symbols[7])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])

In [None]:
y_test.values[:21]

array([1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### MSN

In [None]:
X = df[stock_symbols[8]][[('volume', stock_symbols[8]), ('ema50', stock_symbols[8]), ('ema21', stock_symbols[8]),
                          ('ema14', stock_symbols[8]), ('ema5', stock_symbols[8]), ('rsi', stock_symbols[8]),
                           ('macd', stock_symbols[8]), ('obv', stock_symbols[8]), ('emv', stock_symbols[8]), ('mfi', stock_symbols[8])]]
y = df[stock_symbols[8]][('trend', stock_symbols[8])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1])

In [None]:
y_test.values[:21]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.76


#### SAB

In [None]:
X = df[stock_symbols[9]][[('volume', stock_symbols[9]), ('ema50', stock_symbols[9]), ('ema21', stock_symbols[9]),
                          ('ema14', stock_symbols[9]), ('ema5', stock_symbols[9]), ('rsi', stock_symbols[9]),
                           ('macd', stock_symbols[9]), ('obv', stock_symbols[9]), ('emv', stock_symbols[9]), ('mfi', stock_symbols[9])]]
y = df[stock_symbols[9]][('trend', stock_symbols[9])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1])

In [None]:
y_test.values[:21]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77
