## Install needed libraries

In [None]:
import os
from google.colab import drive
drive.mount('/content/gdrive')

path = "/content/gdrive/MyDrive/Chuyên đề nghiên cứu 1/vnquant_package"
%cd {path}
!ls

In [None]:
!git clone https://github.com/phamdinhkhanh/vnquant
%cd vnquant
!python setup.py install

In [None]:
%cd ..
!rm -rf vnquant
!ls

In [None]:
!pip freeze | grep vnquant

vnquant==0.1.1


In [None]:
import vnquant
vnquant.__version__

'0.1.1'

## Import libraries

In [None]:
import vnquant.data as dt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

## We choose 10 codes from biggest companies on stock exchange in Vietnam

In [None]:
stock_symbols = ['VIC', 'VCB', 'VNM', 'GAS', 'HVN', 'MWG', 'FPT', 'HPG', 'MSN', 'SAB']

## Prepare and process the dataset

In [None]:
stock = {}
for i in range(len(stock_symbols)):
  data = dt.DataLoader(symbols=stock_symbols[i],
                        start="2014-01-01",
                        end="2024-01-01",
                        data_source="VND")
  stock[stock_symbols[i]] = data.download()

In [None]:
stock[stock_symbols[0]]

Attributes,high,low,open,close,avg,volume
Symbols,VIC,VIC,VIC,VIC,VIC,VIC
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2014-01-02,70.50,69.50,70.00,70.00,69.92,74950.0
2014-01-03,70.50,69.50,70.00,70.50,70.00,91920.0
2014-01-06,70.50,69.50,70.50,70.50,70.11,111760.0
2014-01-07,70.50,70.00,70.50,70.50,70.47,294120.0
2014-01-08,70.50,70.00,70.00,70.00,70.02,109890.0
...,...,...,...,...,...,...
2023-12-25,43.55,43.00,43.10,43.40,43.34,3364500.0
2023-12-26,43.75,43.35,43.40,43.55,43.55,1806700.0
2023-12-27,43.95,43.60,43.65,43.60,43.76,1920500.0
2023-12-28,44.60,43.60,43.60,44.45,44.32,4359700.0


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]] = stock[stock_symbols[i]].ewm(alpha=0.65).mean()

stock[stock_symbols[0]].head()

Attributes,high,low,open,close,avg,volume
Symbols,VIC,VIC,VIC,VIC,VIC,VIC
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2014-01-02,70.5,69.5,70.0,70.0,69.92,74950.0
2014-01-03,70.5,69.5,70.0,70.37037,69.979259,87520.37037
2014-01-06,70.5,69.5,70.339559,70.458404,70.068048,103981.918506
2014-01-07,70.5,69.829951,70.445434,70.485853,70.333297,229454.54508
2014-01-08,70.5,69.941067,70.154373,70.168381,70.128579,151327.251584


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['today', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'].pct_change() * 100

stock[stock_symbols[1]].head()

Attributes,high,low,open,close,avg,volume,today
Symbols,VCB,VCB,VCB,VCB,VCB,VCB,VCB
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2014-01-02,26.9,26.6,26.9,26.7,26.74,333900.0,
2014-01-03,27.196296,26.674074,26.751852,27.07037,26.984444,689055.6,1.387155
2014-01-06,27.334635,26.895416,27.124109,27.090492,27.110458,476379.5,0.074332
2014-01-07,27.707721,27.228392,27.37216,27.492708,27.4863,1727654.0,1.484712
2014-01-08,27.964049,27.471213,27.651724,27.889532,27.730488,1562150.0,1.443379


In [None]:
for i in range(len(stock_symbols)):
  for j in range(1, 6):
    stock[stock_symbols[i]][f'previous{str(j)}', str(stock_symbols[i])] = stock[stock_symbols[i]]['today'].shift(j)

stock[stock_symbols[2]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5
Symbols,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2014-01-02,137.0,135.0,135.0,135.0,135.55,32520.0,,,,,,
2014-01-03,136.259259,135.0,135.0,135.740741,135.527778,31171.851852,0.548697,,,,,
2014-01-06,136.083192,135.0,135.0,135.916808,135.610781,62355.65365,0.129709,0.548697,,,,
2014-01-07,136.028293,135.0,135.659903,135.311804,135.412295,91070.886744,-0.445128,0.129709,0.548697,,,
2014-01-08,136.009806,135.0,135.228701,135.761493,135.534947,56980.762637,0.332336,-0.445128,0.129709,0.548697,,


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['ema50', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(50).mean()
  stock[stock_symbols[i]]['ema21', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(21).mean()
  stock[stock_symbols[i]]['ema14', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(14).mean()
  stock[stock_symbols[i]]['ema5', str(stock_symbols[i])] = stock[stock_symbols[i]]['close'] / stock[stock_symbols[i]]['close'].ewm(5).mean()

stock[stock_symbols[3]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5
Symbols,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2014-01-02,66.5,65.5,66.5,66.5,66.08,229950.0,,,,,,,1.0,1.0,1.0,1.0
2014-01-03,66.5,65.5,66.5,66.12963,65.976296,249964.814815,-0.556948,,,,,,0.997235,0.997272,0.997304,0.997461
2014-01-06,66.839559,65.839559,66.160441,66.720713,66.270832,200155.093379,0.893825,-0.556948,,,,,1.00405,1.004019,1.003992,1.003844
2014-01-07,67.275386,66.275386,66.714468,66.905015,66.705819,169367.272952,0.276229,0.893825,-0.556948,,,,1.005058,1.004965,1.004886,1.004477
2014-01-08,67.422156,66.422156,67.22776,67.293797,67.061404,177190.635299,0.581096,0.276229,0.893825,-0.556948,,,1.008612,1.008418,1.008254,1.007419


In [None]:
def rsi(X, window=14):
  delta = X.diff(1)

  gains = delta.where(delta > 0, 0)
  losses = -delta.where(delta < 0, 0)

  avg_gains = gains.rolling(window=window, min_periods=1).mean()
  avg_losses = losses.rolling(window=window, min_periods=1).mean()

  rs = avg_gains / avg_losses
  rsi = 100 - (100 / (1 + rs))

  return rsi

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['rsi', str(stock_symbols[i])] = rsi(stock[stock_symbols[i]]['close'])

stock[stock_symbols[4]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi
Symbols,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
2017-01-03,39.2,39.2,39.2,39.2,39.2,700.0,,,,,,,1.0,1.0,1.0,1.0,
2017-01-04,43.496296,43.496296,43.496296,43.496296,43.496296,6750.37037,10.95994,,,,,,1.051412,1.050683,1.050072,1.047008,100.0
2017-01-05,49.067572,45.196604,49.067572,47.437691,48.052971,521133.820034,9.061449,10.95994,,,,,1.09222,1.09038,1.08884,1.081195,100.0
2017-01-06,51.13467,44.406962,51.13467,45.16915,46.837449,342822.959663,-4.78215,9.061449,10.95994,,,,1.02939,1.027717,1.026338,1.01981,78.40766
2017-01-09,47.910199,42.180744,47.910199,42.96764,43.843784,223068.513075,-4.873924,-4.78215,9.061449,10.95994,,,0.983469,0.982441,0.981627,0.978254,64.824189


In [None]:
def macd(X, short_window=12, long_window=29, signal=9):
  short_ema = X.ewm(span=short_window, adjust=False).mean()

  long_ema = X.ewm(span=long_window, adjust=False).mean()

  macd = short_ema - long_ema

  signal = macd.ewm(span=signal, adjust=False).mean()

  return signal

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['macd', str(stock_symbols[i])] = macd(stock[stock_symbols[i]]['close'])

stock[stock_symbols[5]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd
Symbols,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2014-07-14,81.5,81.0,81.0,81.5,81.45,10660.0,,,,,,,1.0,1.0,1.0,1.0,,0.0
2014-07-15,85.574074,85.444444,85.444444,85.574074,85.561111,12748.888889,4.998864,,,,,,1.024138,1.023804,1.023524,1.022119,100.0,0.071035
2014-07-16,90.617148,90.575552,90.575552,90.617148,90.612988,15832.835314,5.893226,4.998864,,,,,1.054212,1.053216,1.05238,1.048204,100.0,0.271165
2014-07-17,96.478966,96.464819,96.464819,96.478966,96.477551,68154.64654,6.468773,5.893226,4.998864,,,,1.088113,1.086065,1.084349,1.075834,100.0,0.644448
2014-07-18,102.700314,102.695411,102.695411,102.700314,102.699823,111480.682755,6.448398,6.468773,5.893226,4.998864,,,1.121361,1.117879,1.114968,1.100672,100.0,1.215521


In [None]:
window = 6
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['roc', str(stock_symbols[i])] = ((stock[stock_symbols[i]]['close'] - stock[stock_symbols[i]]['close'].shift(window)) / stock[stock_symbols[i]]['close'].shift(window)) * 100

stock[stock_symbols[6]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd,roc
Symbols,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2014-01-02,47.8,47.2,47.6,47.4,47.55,207470.0,,,,,,,1.0,1.0,1.0,1.0,,0.0,
2014-01-03,47.503704,47.051852,47.377778,47.325926,47.305556,176092.222222,-0.156274,,,,,,0.999226,0.999236,0.999245,0.999289,0.0,-0.001292,
2014-01-06,47.840747,47.016638,47.324958,47.783701,47.34253,261374.244482,0.967282,-0.156274,,,,,1.005851,1.005777,1.005715,1.005393,86.072358,0.00465,
2014-01-07,47.945838,47.33561,47.77042,47.596486,47.637817,280917.760455,-0.391797,0.967282,-0.156274,,,,1.001417,1.001344,1.001283,1.000986,63.662614,0.011589,
2014-01-08,47.981229,47.377684,47.528376,47.729469,47.743792,182041.901609,0.279396,-0.391797,0.967282,-0.156274,,,1.003336,1.003232,1.003146,1.002725,69.333942,0.021105,


In [None]:
window = 16
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['high-low', str(stock_symbols[i])] = stock[stock_symbols[i]]['high', str(stock_symbols[i])] - stock[stock_symbols[i]]['low', str(stock_symbols[i])]
  stock[stock_symbols[i]]['high-preclose', str(stock_symbols[i])] = abs(stock[stock_symbols[i]]['high', str(stock_symbols[i])] - stock[stock_symbols[i]]['close', str(stock_symbols[i])].shift(1))
  stock[stock_symbols[i]]['low-preclose', str(stock_symbols[i])] = abs(stock[stock_symbols[i]]['low', str(stock_symbols[i])] - stock[stock_symbols[i]]['close', str(stock_symbols[i])].shift(1))

  stock[stock_symbols[i]]['tr', str(stock_symbols[i])] = stock[stock_symbols[i]][[('high-low', str(stock_symbols[i])), ('high-preclose', str(stock_symbols[i])), ('low-preclose', str(stock_symbols[i]))]].max(axis=1)

  stock[stock_symbols[i]]['atr', str(stock_symbols[i])] = stock[stock_symbols[i]]['tr', str(stock_symbols[i])].rolling(window=window).mean()

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]] = stock[stock_symbols[i]].drop([('high-low', str(stock_symbols[i])), ('high-preclose', str(stock_symbols[i])), ('low-preclose', str(stock_symbols[i])), ('tr', str(stock_symbols[i]))], axis=1)

stock[stock_symbols[7]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd,roc,atr
Symbols,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2014-01-02,41.5,41.0,41.5,41.1,41.13,276250.0,,,,,,,1.0,1.0,1.0,1.0,,0.0,,
2014-01-03,41.574074,41.0,41.277778,41.47037,41.30037,424864.814815,0.901144,,,,,,1.004441,1.004381,1.00433,1.004076,100.0,0.006458,,
2014-01-06,42.134975,41.339559,41.768251,42.033786,41.802666,443856.451613,1.358598,0.901144,,,,,1.011865,1.011663,1.011492,1.010634,100.0,0.026481,,
2014-01-07,42.573827,41.907366,42.251134,42.143471,42.302433,337198.725563,0.260945,1.358598,0.901144,,,,1.010731,1.010434,1.010185,1.008961,100.0,0.055928,,
2014-01-08,42.590929,41.967896,42.087035,42.376438,42.418461,350568.775959,0.552796,0.260945,1.358598,0.901144,,,1.01288,1.012467,1.012122,1.010445,100.0,0.093794,,


In [None]:
def obv(X):
  obv = pd.Series(index=X.index)
  obv.iloc[0] = 0

  for i in range(1, len(X)):
    if (X['close'].iloc[i].values > X['close'].iloc[i-1].values):
      obv.iloc[i] = obv.iloc[i-1] + X['volume'].iloc[i].values
    elif (X['close'].iloc[i].values < X['close'].iloc[i-1].values):
      obv.iloc[i] = obv.iloc[i-1] - X['volume'].iloc[i].values
    else:
      obv.iloc[i] = obv.iloc[i-1]

  return obv

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['obv', str(stock_symbols[i])] = obv(stock[stock_symbols[i]])

stock[stock_symbols[8]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,previous5,ema50,ema21,ema14,ema5,rsi,macd,roc,atr,obv
Symbols,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,...,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,83.0,81.5,83.0,82.0,82.24,92560.0,,,,,...,,1.0,1.0,1.0,1.0,,0.0,,,0.0
2014-01-03,82.62963,81.5,82.259259,81.62963,81.884444,59922.962963,-0.451671,,,,...,,0.997759,0.997789,0.997814,0.997942,0.0,-0.006458,,,-59922.962963
2014-01-06,82.541596,81.5,82.083192,81.881154,81.942547,100389.541596,0.308129,-0.451671,,,...,,1.00055,1.000562,1.000572,1.000614,40.444894,-0.012272,,,40466.578633
2014-01-07,83.174049,81.829951,82.028293,82.289532,82.409397,114122.418543,0.498745,0.308129,-0.451671,,...,,1.004108,1.00406,1.004018,1.003794,64.051241,-0.010242,,,154588.997176
2014-01-08,84.040468,82.267783,82.989953,83.733923,83.115498,139748.439155,1.755254,0.498745,0.308129,-0.451671,...,,1.017135,1.016854,1.016614,1.015355,85.033504,0.021847,,,294337.436332


In [None]:
def cmf(X, window=16):
  money_flow_multiplier = ((X['close'] - X['low']) - (X['high'] - X['close']))/(X['high']-X['low'])

  money_flow_volume = money_flow_multiplier * X['volume']

  cmf = money_flow_volume.rolling(window=window).sum() / X['volume'].rolling(window=window).sum()

  return cmf

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['cmf', str(stock_symbols[i])] = cmf(stock[stock_symbols[i]])

stock[stock_symbols[9]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema50,ema21,ema14,ema5,rsi,macd,roc,atr,obv,cmf
Symbols,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,...,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2016-12-06,132.0,132.0,132.0,132.0,132.0,3010.0,,,,,...,1.0,1.0,1.0,1.0,,0.0,,,0.0,
2016-12-07,138.814815,138.814815,138.814815,138.814815,138.814815,802.592593,5.162738,,,,...,1.024909,1.024565,1.024275,1.022824,100.0,0.118822,,,802.592593,
2016-12-08,147.089983,147.089983,147.089983,147.089983,147.089983,936.655348,5.9613,5.162738,,,...,1.055156,1.054139,1.053285,1.049022,100.0,0.450786,,,1739.247941,
2016-12-09,156.599192,149.670214,149.670214,156.599192,151.993071,2304.861008,6.464892,5.9613,5.162738,,...,1.088785,1.086712,1.084974,1.076356,100.0,1.065604,,,4044.108949,
2016-12-12,167.185317,164.783955,164.783955,167.185317,165.588983,24119.777384,6.760013,6.464892,5.9613,5.162738,...,1.124409,1.120852,1.117878,1.103275,100.0,2.010623,,,28163.886333,


In [None]:
def emv(X):
  emv = pd.Series(index=X.index)
  emv.iloc[0] = np.nan

  for i in range(1, len(X)):
    dm = 0.5 * ((X['high'].iloc[i].values + X['low'].iloc[i].values) - (X['high'].iloc[i-1].values + X['low'].iloc[i-1].values))
    br = X['volume'].iloc[i].values / (1000000 * (X['high'].iloc[i].values - X['low'].iloc[i].values))
    emv.iloc[i] = dm / br if br != 0 else 0

  return emv

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['emv', str(stock_symbols[i])] = emv(stock[stock_symbols[i]])

In [None]:
stock[stock_symbols[8]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema21,ema14,ema5,rsi,macd,roc,atr,obv,cmf,emv
Symbols,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,...,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,83.0,81.5,83.0,82.0,82.24,92560.0,,,,,...,1.0,1.0,1.0,,0.0,,,0.0,,
2014-01-03,82.62963,81.5,82.259259,81.62963,81.884444,59922.962963,-0.451671,,,,...,0.997789,0.997814,0.997942,0.0,-0.006458,,,-59922.962963,,-3.490993
2014-01-06,82.541596,81.5,82.083192,81.881154,81.942547,100389.541596,0.308129,-0.451671,,,...,1.000562,1.000572,1.000614,40.444894,-0.012272,,,40466.578633,,-0.456699
2014-01-07,83.174049,81.829951,82.028293,82.289532,82.409397,114122.418543,0.498745,0.308129,-0.451671,,...,1.00406,1.004018,1.003794,64.051241,-0.010242,,,154588.997176,,5.66745
2014-01-08,84.040468,82.267783,82.989953,83.733923,83.115498,139748.439155,1.755254,0.498745,0.308129,-0.451671,...,1.016854,1.016614,1.015355,85.033504,0.021847,,,294337.436332,,8.272094


In [None]:
window = 16
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['minimum_low', str(stock_symbols[i])] = stock[stock_symbols[i]]['low', str(stock_symbols[i])].rolling(window=window).min()
  stock[stock_symbols[i]]['maximum_high', str(stock_symbols[i])] = stock[stock_symbols[i]]['high', str(stock_symbols[i])].rolling(window=window).max()

  stock[stock_symbols[i]]['stoch', str(stock_symbols[i])] = ((stock[stock_symbols[i]]['close', str(stock_symbols[i])] - stock[stock_symbols[i]]['minimum_low', str(stock_symbols[i])]) / (stock[stock_symbols[i]]['maximum_high', str(stock_symbols[i])] - stock[stock_symbols[i]]['minimum_low', str(stock_symbols[i])])) * 100

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]] = stock[stock_symbols[i]].drop([('minimum_low', str(stock_symbols[i])), ('maximum_high', str(stock_symbols[i]))], axis=1)

stock[stock_symbols[7]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema14,ema5,rsi,macd,roc,atr,obv,cmf,emv,stoch
Symbols,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,...,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,41.5,41.0,41.5,41.1,41.13,276250.0,,,,,...,1.0,1.0,,0.0,,,0.0,,,
2014-01-03,41.574074,41.0,41.277778,41.47037,41.30037,424864.814815,0.901144,,,,...,1.00433,1.004076,100.0,0.006458,,,424864.8,,0.050044,
2014-01-06,42.134975,41.339559,41.768251,42.033786,41.802666,443856.451613,1.358598,0.901144,,,...,1.011492,1.010634,100.0,0.026481,,,868721.3,,0.806837,
2014-01-07,42.573827,41.907366,42.251134,42.143471,42.302433,337198.725563,0.260945,1.358598,0.901144,,...,1.010185,1.008961,100.0,0.055928,,,1205920.0,,0.994812,
2014-01-08,42.590929,41.967896,42.087035,42.376438,42.418461,350568.775959,0.552796,0.260945,1.358598,0.901144,...,1.012122,1.010445,100.0,0.093794,,,1556489.0,,0.068984,


In [None]:
def mfi(X, window=14):
  combine_price = (X['high'] + X['low'] + X['close']) / 3

  raw_money_flow = combine_price * X['volume']

  flow_direction = (combine_price.diff() > 0).astype(int)

  positive_money_flow = flow_direction * raw_money_flow
  negative_money_flow = (1 - flow_direction) * raw_money_flow

  positive = positive_money_flow.rolling(window=window, min_periods=1).sum()
  negative = negative_money_flow.rolling(window=window, min_periods=1).sum()

  mf = positive / negative
  mfi = 100 - (100 / (1 + mf))

  return mfi

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['mfi', str(stock_symbols[i])] = mfi(stock[stock_symbols[i]])

stock[stock_symbols[6]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema5,rsi,macd,roc,atr,obv,cmf,emv,stoch,mfi
Symbols,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,...,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,47.8,47.2,47.6,47.4,47.55,207470.0,,,,,...,1.0,,0.0,,,0.0,,,,0.0
2014-01-03,47.503704,47.051852,47.377778,47.325926,47.305556,176092.222222,-0.156274,,,,...,0.999289,0.0,-0.001292,,,-176092.222222,,-0.570221,,0.0
2014-01-06,47.840747,47.016638,47.324958,47.783701,47.34253,261374.244482,0.967282,-0.156274,,,...,1.005393,86.072358,0.00465,,,85282.02226,,0.475832,,40.608254
2014-01-07,47.945838,47.33561,47.77042,47.596486,47.637817,280917.760455,-0.391797,0.967282,-0.156274,,...,1.000986,63.662614,0.011589,,,-195635.738195,,0.460589,,58.674542
2014-01-08,47.981229,47.377684,47.528376,47.729469,47.743792,182041.901609,0.279396,-0.391797,0.967282,-0.156274,...,1.002725,69.333942,0.021105,,,-13593.836586,,0.128415,,65.487696


In [None]:
window = 21
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])] = (stock[stock_symbols[i]]['high', str(stock_symbols[i])] + stock[stock_symbols[i]]['low', str(stock_symbols[i])] + stock[stock_symbols[i]]['close', str(stock_symbols[i])]) / 3

  stock[stock_symbols[i]]['sma_combine_price', str(stock_symbols[i])] = stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])].rolling(window=window).mean()
  stock[stock_symbols[i]]['mean_deviation', str(stock_symbols[i])] = stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])].rolling(window).apply(lambda x: x.mad())

  stock[stock_symbols[i]]['cci', str(stock_symbols[i])] = (stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])] - stock[stock_symbols[i]]['sma_combine_price', str(stock_symbols[i])]) / (0.015 * stock[stock_symbols[i]]['mean_deviation', str(stock_symbols[i])])

In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]] = stock[stock_symbols[i]].drop([('combine_price', str(stock_symbols[i])), ('sma_combine_price', str(stock_symbols[i])), ('mean_deviation', str(stock_symbols[i]))], axis=1)

stock[stock_symbols[5]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,roc,atr,obv,cmf,emv,stoch,mfi,cci
Symbols,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,...,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-07-14,81.5,81.0,81.0,81.5,81.45,10660.0,,,,,...,,0.0,,,0.0,,,,0.0,
2014-07-15,85.574074,85.444444,85.444444,85.574074,85.561111,12748.888889,4.998864,,,,...,100.0,0.071035,,,12748.888889,,43.30779,,55.706701,
2014-07-16,90.617148,90.575552,90.575552,90.617148,90.612988,15832.835314,5.893226,4.998864,,,...,100.0,0.271165,,,28581.724203,,13.364772,,74.439041,
2014-07-17,96.478966,96.464819,96.464819,96.478966,96.477551,68154.64654,6.468773,5.893226,4.998864,,...,100.0,0.644448,,,96736.370743,,1.219569,,91.301258,
2014-07-18,102.700314,102.695411,102.695411,102.700314,102.699823,111480.682755,6.448398,6.468773,5.893226,4.998864,...,100.0,1.215521,,,208217.053497,,0.27381,,95.951569,


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['volume'] = stock[stock_symbols[i]]['volume'] / stock[stock_symbols[i]]['volume'].ewm(5).mean()

stock[stock_symbols[4]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,roc,atr,obv,cmf,emv,stoch,mfi,cci
Symbols,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,...,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-01-03,39.2,39.2,39.2,39.2,39.2,1.0,,,,,...,,0.0,,,0.0,,,,0.0,
2017-01-04,43.496296,43.496296,43.496296,43.496296,43.496296,1.687507,10.95994,,,,...,100.0,0.07491,,,6750.37037,,0.0,,91.453207,
2017-01-05,49.067572,45.196604,49.067572,47.437691,48.052971,2.498478,9.061449,10.95994,,,...,100.0,0.261951,,,527884.190404,,27.006563,,99.889959,
2017-01-06,51.13467,44.406962,51.13467,45.16915,46.837449,1.361521,-4.78215,9.061449,10.95994,,...,78.40766,0.470344,,,185061.230741,,12.534668,,60.729692,
2017-01-09,47.910199,42.180744,47.910199,42.96764,43.843784,0.915004,-4.873924,-4.78215,9.061449,10.95994,...,64.824189,0.642404,,,-38007.282334,,-69.999734,,48.927543,


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]].replace(0, np.nan, inplace=True)
  stock[stock_symbols[i]] = stock[stock_symbols[i]].dropna()

stock[stock_symbols[3]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,roc,atr,obv,cmf,emv,stoch,mfi,cci
Symbols,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,...,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-02-10,77.087841,75.734742,77.013348,76.134529,76.353445,0.481059,-0.327083,-0.925871,-0.235985,-0.669727,...,76.420951,2.766264,0.550935,2.292986,5415093.0,0.030853,-4.502533,73.008832,75.069384,56.746374
2014-02-11,77.680744,75.58216,77.004672,76.047085,76.663206,0.91417,-0.114855,-0.327083,-0.925871,-0.235985,...,71.804258,2.775967,0.623218,2.361648,4986405.0,0.001324,1.077765,70.134633,74.329985,53.488925
2014-02-12,78.538261,76.178756,76.676635,77.96648,77.532122,1.112164,2.523955,-0.114855,-0.327083,-0.925871,...,75.059557,2.789409,0.211887,2.377287,5520134.0,0.037561,3.214161,85.242257,74.252493,67.604111
2014-02-13,81.113391,78.337565,78.511822,80.913268,80.104743,1.326579,3.779558,2.523955,-0.114855,-0.327083,...,77.604675,2.850197,4.700661,2.423413,6201980.0,0.049635,9.636052,98.072188,73.882216,109.447478
2014-02-14,83.639687,81.043148,82.079138,82.594644,82.42866,1.214896,2.077998,3.779558,2.523955,-0.114855,...,75.817909,2.961312,7.129147,2.509629,6854838.0,0.07148,10.404078,91.792829,71.125219,151.394382


In [None]:
for i in range(len(stock_symbols)):
  stock[stock_symbols[i]]['trend', str(stock_symbols[i])] = (stock[stock_symbols[i]]['today'].iloc[:] > 0).astype(int)

In [None]:
stock[stock_symbols[2]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,macd,roc,atr,obv,cmf,emv,stoch,mfi,cci,trend
Symbols,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,...,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-02-10,140.482532,138.480667,140.147984,140.051374,139.450628,1.033771,-0.068078,-0.194131,-0.5516,-0.26005,...,1.461313,-0.189545,2.279177,527310.400136,-0.006045,-8.53916,61.359538,60.928457,43.407333,0
2014-02-11,140.818886,138.818234,139.401794,139.367981,139.59322,1.166398,-0.487959,-0.068078,-0.194131,-0.5516,...,1.460017,-0.989682,2.341504,282401.448555,-0.049409,2.752617,49.03142,54.894444,38.354371,0
2014-02-12,140.28661,138.936382,139.140628,139.778793,139.708127,0.719341,0.294768,-0.487959,-0.068078,-0.194131,...,1.441397,-1.262716,2.282017,425274.081605,-0.056854,-1.956872,51.817299,53.959517,33.522517,1
2014-02-13,140.750314,138.977734,140.34922,139.272578,139.735344,0.765515,-0.362155,0.294768,-0.487959,-0.068078,...,1.400984,-1.363794,2.301833,280141.160038,-0.078973,3.084245,44.754913,49.939644,28.820495,0
2014-02-14,141.56261,138.992207,140.122227,139.745402,140.349371,1.243744,0.339496,-0.362155,0.294768,-0.487959,...,1.354204,-0.479975,2.349392,528186.682587,-0.103236,4.283751,51.351448,52.427265,40.076866,1


In [None]:
df = {}
for i in range(len(stock_symbols)):
  df[stock_symbols[i]] = stock[stock_symbols[i]][['today', 'previous1', 'previous2', 'previous3', 'previous4', 'previous5', 'volume',
                                                  'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'roc', 'atr', 'obv', 'cmf', 'emv', 'stoch', 'mfi', 'cci', 'trend']]

df[stock_symbols[1]].head()

Attributes,today,previous1,previous2,previous3,previous4,previous5,volume,ema50,ema21,ema14,...,macd,roc,atr,obv,cmf,emv,stoch,mfi,cci,trend
Symbols,VCB,VCB,VCB,VCB,VCB,VCB,VCB,VCB,VCB,VCB,...,VCB,VCB,VCB,VCB,VCB,VCB,VCB,VCB,VCB,VCB
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-02-10,0.857851,-0.865907,-1.782533,-1.194187,-0.846114,0.119654,0.527679,1.000768,0.998219,0.99639,...,0.516188,-3.676323,0.643458,9345103.0,0.015614,-0.064001,35.924138,65.890276,-0.00533,1
2014-02-11,0.297694,0.857851,-0.865907,-1.782533,-1.194187,-0.846114,0.755266,1.003538,1.001106,0.999411,...,0.494253,-3.505033,0.645363,10179520.0,-0.014983,0.131105,39.579557,72.270852,13.978239,1
2014-02-12,1.714227,0.297694,0.857851,-0.865907,-1.782533,-1.194187,0.945689,1.019608,1.016982,1.015136,...,0.478357,-1.01335,0.650089,11212880.0,0.0444,0.146417,60.691392,78.106857,46.94164,1
2014-02-13,0.363696,1.714227,0.297694,0.857851,-0.865907,-1.782533,0.898741,1.022081,1.019256,1.017249,...,0.467917,0.547385,0.643616,12175160.0,0.035046,0.23095,64.715457,78.258919,77.96618,1
2014-02-14,0.352185,0.363696,1.714227,0.297694,0.857851,-0.865907,0.739489,1.02436,1.021303,1.01911,...,0.46226,2.732742,0.637289,12927240.0,0.010264,0.040263,68.12331,77.794244,79.577302,1


In [None]:
stock_symbols

['VIC', 'VCB', 'VNM', 'GAS', 'HVN', 'MWG', 'FPT', 'HPG', 'MSN', 'SAB']

## Logistic Regression - Prediction & Valuation Model

#### VIC

In [None]:
X = df[stock_symbols[0]].loc[:, df[stock_symbols[0]].columns != ('trend', stock_symbols[0])]
y = df[stock_symbols[0]][('trend', stock_symbols[0])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1])

In [None]:
y_test.values[:21]

array([0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


#### VCB

In [None]:
X = df[stock_symbols[1]].loc[:, df[stock_symbols[1]].columns != ('trend', stock_symbols[1])]
y = df[stock_symbols[1]][('trend', stock_symbols[1])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])

In [None]:
y_test.values[:21]

array([0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.66


#### VNM

In [None]:
X = df[stock_symbols[2]].loc[:, df[stock_symbols[2]].columns != ('trend', stock_symbols[2])]
y = df[stock_symbols[2]][('trend', stock_symbols[2])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])

In [None]:
y_test.values[:21]

array([1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.73


#### GAS

In [None]:
X = df[stock_symbols[3]].loc[:, df[stock_symbols[3]].columns != ('trend', stock_symbols[3])]
y = df[stock_symbols[3]][('trend', stock_symbols[3])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.78


#### HVN

In [None]:
X = df[stock_symbols[4]].loc[:, df[stock_symbols[4]].columns != ('trend', stock_symbols[4])]
y = df[stock_symbols[4]][('trend', stock_symbols[4])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
y_test.values[:21]

array([0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.73


#### MWG

In [None]:
X = df[stock_symbols[5]].loc[:, df[stock_symbols[5]].columns != ('trend', stock_symbols[5])]
y = df[stock_symbols[5]][('trend', stock_symbols[5])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1])

In [None]:
y_test.values[:21]

array([1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.66


#### FPT

In [None]:
X = df[stock_symbols[6]].loc[:, df[stock_symbols[6]].columns != ('trend', stock_symbols[6])]
y = df[stock_symbols[6]][('trend', stock_symbols[6])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.65


#### HPG

In [None]:
X = df[stock_symbols[7]].loc[:, df[stock_symbols[7]].columns != ('trend', stock_symbols[7])]
y = df[stock_symbols[7]][('trend', stock_symbols[7])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1])

In [None]:
y_test.values[:21]

array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.72


#### MSN

In [None]:
X = df[stock_symbols[8]].loc[:, df[stock_symbols[8]].columns != ('trend', stock_symbols[8])]
y = df[stock_symbols[8]][('trend', stock_symbols[8])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1])

In [None]:
y_test.values[:21]

array([1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.70


#### SAB

In [None]:
X = df[stock_symbols[9]].loc[:, df[stock_symbols[9]].columns != ('trend', stock_symbols[9])]
y = df[stock_symbols[9]][('trend', stock_symbols[9])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


## Random Forest - Prediction & Valuation Model

#### VIC

In [None]:
X = df[stock_symbols[0]][[('volume', stock_symbols[0]), ('ema50', stock_symbols[0]), ('ema21', stock_symbols[0]),
                          ('ema14', stock_symbols[0]), ('ema5', stock_symbols[0]), ('rsi', stock_symbols[0]),
                           ('macd', stock_symbols[0]), ('roc', stock_symbols[0]), ('obv', stock_symbols[0]),
                            ('atr', stock_symbols[0]), ('cmf', stock_symbols[0]), ('emv', stock_symbols[0]),
                             ('stoch', stock_symbols[0]), ('cci', stock_symbols[0]), ('mfi', stock_symbols[0])]]
y = df[stock_symbols[0]][('trend', stock_symbols[0])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1])

In [None]:
y_test.values[:21]

array([1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.79


#### VCB

In [None]:
X = df[stock_symbols[1]][[('volume', stock_symbols[1]), ('ema50', stock_symbols[1]), ('ema21', stock_symbols[1]),
                          ('ema14', stock_symbols[1]), ('ema5', stock_symbols[1]), ('rsi', stock_symbols[1]),
                           ('macd', stock_symbols[1]), ('roc', stock_symbols[1]), ('obv', stock_symbols[1]),
                            ('atr', stock_symbols[1]), ('cmf', stock_symbols[1]), ('emv', stock_symbols[1]),
                             ('stoch', stock_symbols[1]), ('cci', stock_symbols[1]), ('mfi', stock_symbols[1])]]
y = df[stock_symbols[1]][('trend', stock_symbols[1])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1])

In [None]:
y_test.values[:21]

array([0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.76


#### VNM

In [None]:
X = df[stock_symbols[2]][[('volume', stock_symbols[2]), ('ema50', stock_symbols[2]), ('ema21', stock_symbols[2]),
                          ('ema14', stock_symbols[2]), ('ema5', stock_symbols[2]), ('rsi', stock_symbols[2]),
                           ('macd', stock_symbols[2]), ('roc', stock_symbols[2]), ('obv', stock_symbols[2]),
                            ('atr', stock_symbols[2]), ('cmf', stock_symbols[2]), ('emv', stock_symbols[2]),
                             ('stoch', stock_symbols[2]), ('cci', stock_symbols[2]), ('mfi', stock_symbols[2])]]
y = df[stock_symbols[2]][('trend', stock_symbols[2])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0])

In [None]:
y_test.values[:21]

array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.79


#### GAS

In [None]:
X = df[stock_symbols[3]][[('volume', stock_symbols[3]), ('ema50', stock_symbols[3]), ('ema21', stock_symbols[3]),
                          ('ema14', stock_symbols[3]), ('ema5', stock_symbols[3]), ('rsi', stock_symbols[3]),
                           ('macd', stock_symbols[3]), ('roc', stock_symbols[3]), ('obv', stock_symbols[3]),
                            ('atr', stock_symbols[3]), ('cmf', stock_symbols[3]), ('emv', stock_symbols[3]),
                             ('stoch', stock_symbols[3]), ('cci', stock_symbols[3]), ('mfi', stock_symbols[3])]]
y = df[stock_symbols[3]][('trend', stock_symbols[3])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1])

In [None]:
y_test.values[:21]

array([1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### HVN

In [None]:
X = df[stock_symbols[4]][[('volume', stock_symbols[4]), ('ema50', stock_symbols[4]), ('ema21', stock_symbols[4]),
                          ('ema14', stock_symbols[4]), ('ema5', stock_symbols[4]), ('rsi', stock_symbols[4]),
                           ('macd', stock_symbols[4]), ('roc', stock_symbols[4]), ('obv', stock_symbols[4]),
                            ('atr', stock_symbols[4]), ('cmf', stock_symbols[4]), ('emv', stock_symbols[4]),
                             ('stoch', stock_symbols[4]), ('cci', stock_symbols[4]), ('mfi', stock_symbols[4])]]
y = df[stock_symbols[4]][('trend', stock_symbols[4])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0])

In [None]:
y_test.values[:21]

array([0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.78


#### MWG

In [None]:
X = df[stock_symbols[5]][[('volume', stock_symbols[5]), ('ema50', stock_symbols[5]), ('ema21', stock_symbols[5]),
                          ('ema14', stock_symbols[5]), ('ema5', stock_symbols[5]), ('rsi', stock_symbols[5]),
                           ('macd', stock_symbols[5]), ('roc', stock_symbols[5]), ('obv', stock_symbols[5]),
                            ('atr', stock_symbols[5]), ('cmf', stock_symbols[5]), ('emv', stock_symbols[5]),
                             ('stoch', stock_symbols[5]), ('cci', stock_symbols[5]), ('mfi', stock_symbols[5])]]
y = df[stock_symbols[5]][('trend', stock_symbols[5])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.80


#### FPT

In [None]:
X = df[stock_symbols[6]][[('volume', stock_symbols[6]), ('ema50', stock_symbols[6]), ('ema21', stock_symbols[6]),
                          ('ema14', stock_symbols[6]), ('ema5', stock_symbols[6]), ('rsi', stock_symbols[6]),
                           ('macd', stock_symbols[6]), ('roc', stock_symbols[6]), ('obv', stock_symbols[6]),
                            ('atr', stock_symbols[6]), ('cmf', stock_symbols[6]), ('emv', stock_symbols[6]),
                             ('stoch', stock_symbols[6]), ('cci', stock_symbols[6]), ('mfi', stock_symbols[6])]]
y = df[stock_symbols[6]][('trend', stock_symbols[6])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [None]:
y_test.values[:21]

array([0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### HPG

In [None]:
X = df[stock_symbols[7]][[('volume', stock_symbols[7]), ('ema50', stock_symbols[7]), ('ema21', stock_symbols[7]),
                          ('ema14', stock_symbols[7]), ('ema5', stock_symbols[7]), ('rsi', stock_symbols[7]),
                           ('macd', stock_symbols[7]), ('roc', stock_symbols[7]), ('obv', stock_symbols[7]),
                            ('atr', stock_symbols[7]), ('cmf', stock_symbols[7]), ('emv', stock_symbols[7]),
                             ('stoch', stock_symbols[7]), ('cci', stock_symbols[7]), ('mfi', stock_symbols[7])]]
y = df[stock_symbols[7]][('trend', stock_symbols[7])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.78


#### MSN

In [None]:
X = df[stock_symbols[8]][[('volume', stock_symbols[8]), ('ema50', stock_symbols[8]), ('ema21', stock_symbols[8]),
                          ('ema14', stock_symbols[8]), ('ema5', stock_symbols[8]), ('rsi', stock_symbols[8]),
                           ('macd', stock_symbols[8]), ('roc', stock_symbols[8]), ('obv', stock_symbols[8]),
                            ('atr', stock_symbols[8]), ('cmf', stock_symbols[8]), ('emv', stock_symbols[8]),
                             ('stoch', stock_symbols[8]), ('cci', stock_symbols[8]), ('mfi', stock_symbols[8])]]
y = df[stock_symbols[8]][('trend', stock_symbols[8])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0])

In [None]:
y_test.values[:21]

array([0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.78


#### SAB

In [None]:
X = df[stock_symbols[9]][[('volume', stock_symbols[9]), ('ema50', stock_symbols[9]), ('ema21', stock_symbols[9]),
                          ('ema14', stock_symbols[9]), ('ema5', stock_symbols[9]), ('rsi', stock_symbols[9]),
                           ('macd', stock_symbols[9]), ('roc', stock_symbols[9]), ('obv', stock_symbols[9]),
                            ('atr', stock_symbols[9]), ('cmf', stock_symbols[9]), ('emv', stock_symbols[9]),
                             ('stoch', stock_symbols[9]), ('cci', stock_symbols[9]), ('mfi', stock_symbols[9])]]
y = df[stock_symbols[9]][('trend', stock_symbols[9])]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0])

In [None]:
y_test.values[:21]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.79


## Predict with new data

In [None]:
new_stock = {}
for i in range(len(stock_symbols)):
  data = dt.DataLoader(symbols=stock_symbols[i],
                        start="2024-01-01",
                        end="2024-03-01",
                        data_source="VND")
  new_stock[stock_symbols[i]] = data.download()

In [None]:
new_stock[stock_symbols[0]].head()

Attributes,high,low,open,close,avg,volume
Symbols,VIC,VIC,VIC,VIC,VIC,VIC
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2024-01-02,44.95,44.0,44.95,44.0,44.35,2324300.0
2024-01-03,44.15,43.5,43.5,44.15,43.73,2347100.0
2024-01-04,44.4,43.8,44.15,44.15,44.14,2380800.0
2024-01-05,44.2,43.9,44.15,44.1,44.03,1553600.0
2024-01-08,44.75,44.1,44.45,44.35,44.43,2577400.0


In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]] = new_stock[stock_symbols[i]].ewm(alpha=0.65).mean()

new_stock[stock_symbols[1]].tail()

Attributes,high,low,open,close,avg,volume
Symbols,VCB,VCB,VCB,VCB,VCB,VCB
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2024-02-26,90.461559,88.999486,89.266926,89.431198,89.896155,1110129.0
2024-02-27,90.941546,89.06482,89.483424,90.515919,90.152154,1492830.0
2024-02-28,95.139541,89.932687,90.469198,94.990572,93.413754,3339525.0
2024-02-29,98.623839,94.72144,95.689219,96.4917,96.817314,2462984.0
2024-03-01,98.348344,94.837504,97.321227,97.017095,96.58506,2437384.0


In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['today', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['close'].pct_change() * 100

new_stock[stock_symbols[2]].head()

Attributes,high,low,open,close,avg,volume,today
Symbols,VNM,VNM,VNM,VNM,VNM,VNM,VNM
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2024-01-02,68.5,67.9,68.2,68.3,68.16,2142800.0,
2024-01-03,68.87037,67.974074,68.348148,68.818519,68.352593,2022059.0,0.759178
2024-01-04,69.297963,68.670798,68.858744,68.941766,68.907708,2958861.0,0.17909
2024-01-05,69.299307,68.558088,69.14993,68.848214,68.790438,2178090.0,-0.135697
2024-01-08,69.169073,68.324102,69.051961,68.42465,68.607191,2320741.0,-0.615214


In [None]:
for i in range(len(stock_symbols)):
  for j in range(1, 6):
    new_stock[stock_symbols[i]][f'previous{str(j)}', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['today'].shift(j)

new_stock[stock_symbols[3]].tail()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5
Symbols,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS,GAS
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2024-02-26,77.379891,76.146502,77.071826,76.456802,76.686388,1292453.0,-0.619908,-1.271988,-0.059347,-0.641783,0.297105,2.575017
2024-02-27,77.652962,76.181276,76.700139,77.199881,76.903236,1202653.0,0.971893,-0.619908,-1.271988,-0.059347,-0.641783,0.297105
2024-02-28,78.658537,77.168447,77.545049,77.719958,77.817633,1441819.0,0.673677,0.971893,-0.619908,-1.271988,-0.059347,-0.641783
2024-02-29,78.555488,77.188956,78.165767,77.511985,77.780171,1341057.0,-0.267593,0.673677,0.971893,-0.619908,-1.271988,-0.059347
2024-03-01,78.064421,77.261135,77.928018,77.569195,77.57206,1113520.0,0.073807,-0.267593,0.673677,0.971893,-0.619908,-1.271988


In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['ema50', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['close'] / new_stock[stock_symbols[i]]['close'].ewm(50).mean()
  new_stock[stock_symbols[i]]['ema21', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['close'] / new_stock[stock_symbols[i]]['close'].ewm(21).mean()
  new_stock[stock_symbols[i]]['ema14', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['close'] / new_stock[stock_symbols[i]]['close'].ewm(14).mean()
  new_stock[stock_symbols[i]]['ema5', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['close'] / new_stock[stock_symbols[i]]['close'].ewm(5).mean()

new_stock[stock_symbols[4]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5
Symbols,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2024-01-02,12.45,11.4,12.25,12.25,12.28,1788600.0,,,,,,,1.0,1.0,1.0,1.0
2024-01-03,12.931481,12.02963,12.435185,12.87963,12.872593,3876748.0,5.139834,,,,,,1.024801,1.024458,1.02417,1.022726
2024-01-04,13.317572,12.654669,12.886672,12.995331,13.040611,3153795.0,0.898329,5.139834,,,,,1.02219,1.021661,1.021219,1.019048
2024-01-05,13.338971,12.684583,12.994453,12.833436,12.88843,2758517.0,-1.245791,0.898329,5.139834,,,,1.007004,1.006526,1.006134,1.004299
2024-01-08,12.98679,12.563971,12.834719,12.64823,12.706495,2232166.0,-1.443155,-1.245791,0.898329,5.139834,,,0.994028,0.993741,0.993517,0.992625


In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['rsi', str(stock_symbols[i])] = rsi(new_stock[stock_symbols[i]]['close'])
  new_stock[stock_symbols[i]]['macd', str(stock_symbols[i])] = macd(new_stock[stock_symbols[i]]['close'])
  new_stock[stock_symbols[i]]['obv', str(stock_symbols[i])] = obv(new_stock[stock_symbols[i]])
  new_stock[stock_symbols[i]]['cmf', str(stock_symbols[i])] = cmf(new_stock[stock_symbols[i]])
  new_stock[stock_symbols[i]]['emv', str(stock_symbols[i])] = emv(new_stock[stock_symbols[i]])
  new_stock[stock_symbols[i]]['mfi', str(stock_symbols[i])] = mfi(new_stock[stock_symbols[i]])

In [None]:
new_stock[stock_symbols[5]].tail()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema50,ema21,ema14,ema5,rsi,macd,obv,cmf,emv,mfi
Symbols,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,...,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-02-26,44.980788,43.552753,44.596237,44.430552,44.103479,15613640.0,-0.335567,-2.359779,-1.433564,-0.880683,...,0.985973,0.98049,0.977077,0.973063,44.039311,0.790197,15503480.0,-0.048079,-0.08353,48.395176
2024-02-27,45.123276,44.103464,44.663683,44.638193,44.517218,13126780.0,0.467339,-0.335567,-2.359779,-1.433564,...,0.990939,0.985895,0.982956,0.981277,46.648207,0.700408,28630260.0,-0.025943,0.026927,49.936363
2024-02-28,45.595647,44.523712,44.882289,45.068368,45.000026,15337510.0,0.963692,0.467339,-0.335567,-2.359779,...,1.000471,0.99565,0.992972,0.992268,43.841812,0.616391,43967770.0,-0.044101,0.031192,51.518675
2024-02-29,46.508476,45.483299,45.868801,45.803929,45.903509,15550900.0,1.632101,0.963692,0.467339,-0.335567,...,1.016166,1.01124,1.008513,1.007041,40.809297,0.551804,59518670.0,-0.066682,0.061719,52.622352
2024-03-01,47.022967,45.884155,46.31158,46.321375,46.486228,15958840.0,1.129699,1.632101,0.963692,0.467339,...,1.026612,1.021407,1.018456,1.015299,42.378139,0.510628,75477510.0,-0.100122,0.032659,54.668892


In [None]:
window = 6
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['roc', str(stock_symbols[i])] = ((new_stock[stock_symbols[i]]['close'] - new_stock[stock_symbols[i]]['close'].shift(window)) / new_stock[stock_symbols[i]]['close'].shift(window)) * 100

new_stock[stock_symbols[6]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema21,ema14,ema5,rsi,macd,obv,cmf,emv,mfi,roc
Symbols,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,...,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-01-02,96.9,95.7,96.9,95.9,96.15,2217776.0,,,,,...,1.0,1.0,1.0,,0.0,0.0,,,0.0,
2024-01-03,96.603704,95.477778,95.937037,96.196296,95.898148,1729720.0,0.308964,,,,...,1.001507,1.001489,1.001402,100.0,0.005166,1729720.0,,-0.16876,0.0,
2024-01-04,97.823599,95.968251,96.183531,96.877929,96.918081,2592796.0,0.708585,0.308964,,,...,1.005584,1.005507,1.005119,100.0,0.025211,4322516.0,,0.611951,39.830944,
2024-01-05,97.610055,96.319154,96.920292,97.222445,96.985338,1863078.0,0.355619,0.708585,0.308964,,...,1.006692,1.006556,1.005878,100.0,0.059604,6185594.0,,0.047587,53.237097,
2024-01-08,98.060887,96.829384,97.233749,97.273122,97.413115,1660725.0,0.052125,0.355619,0.708585,0.308964,...,1.005627,1.005454,1.00461,100.0,0.102095,7846319.0,,0.356336,61.00812,


In [None]:
window = 16
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['high-low', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['high', str(stock_symbols[i])] - new_stock[stock_symbols[i]]['low', str(stock_symbols[i])]
  new_stock[stock_symbols[i]]['high-preclose', str(stock_symbols[i])] = abs(new_stock[stock_symbols[i]]['high', str(stock_symbols[i])] - new_stock[stock_symbols[i]]['close', str(stock_symbols[i])].shift(1))
  new_stock[stock_symbols[i]]['low-preclose', str(stock_symbols[i])] = abs(new_stock[stock_symbols[i]]['low', str(stock_symbols[i])] - new_stock[stock_symbols[i]]['close', str(stock_symbols[i])].shift(1))

  new_stock[stock_symbols[i]]['tr', str(stock_symbols[i])] = new_stock[stock_symbols[i]][[('high-low', str(stock_symbols[i])), ('high-preclose', str(stock_symbols[i])), ('low-preclose', str(stock_symbols[i]))]].max(axis=1)

  new_stock[stock_symbols[i]]['atr', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['tr', str(stock_symbols[i])].rolling(window=window).mean()

In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]] = new_stock[stock_symbols[i]].drop([('high-low', str(stock_symbols[i])), ('high-preclose', str(stock_symbols[i])), ('low-preclose', str(stock_symbols[i])), ('tr', str(stock_symbols[i]))], axis=1)

new_stock[stock_symbols[7]].tail()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema14,ema5,rsi,macd,obv,cmf,emv,mfi,roc,atr
Symbols,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,...,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG,HPG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-02-26,28.828467,28.361904,28.486474,28.693923,28.569501,22865820.0,0.691301,-0.627688,-0.81528,-0.718351,...,1.013996,1.004724,64.359038,0.27771,195005800.0,-0.214059,-0.001712,57.613158,0.461987,0.401109
2024-02-27,29.882463,28.646666,28.755266,29.770373,29.466825,64917750.0,3.75149,0.691301,-0.627688,-0.81528,...,1.048069,1.035088,80.268574,0.301918,259923600.0,-0.021082,0.012743,68.896447,2.739041,0.455758
2024-02-28,30.576362,29.623833,29.824343,30.309631,30.092889,46771210.0,1.81139,3.75149,0.691301,-0.627688,...,1.061906,1.044455,83.151796,0.343613,306694800.0,0.037019,0.017016,75.239648,4.078689,0.497229
2024-02-29,30.949227,30.193342,30.42602,30.758371,30.546011,38519000.0,1.48052,1.81139,3.75149,0.691301,...,1.071647,1.049428,84.852613,0.401382,345213800.0,0.117034,0.009246,80.216489,6.383803,0.523931
2024-03-01,31.112229,30.49017,30.929107,30.81793,30.756604,31316800.0,0.193635,1.48052,1.81139,3.75149,...,1.068091,1.042511,84.790923,0.466617,376530600.0,0.1296,0.004567,81.025355,7.465947,0.539371


In [None]:
window = 16
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['minimum_low', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['low', str(stock_symbols[i])].rolling(window=window).min()
  new_stock[stock_symbols[i]]['maximum_high', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['high', str(stock_symbols[i])].rolling(window=window).max()

  new_stock[stock_symbols[i]]['stoch', str(stock_symbols[i])] = ((new_stock[stock_symbols[i]]['close', str(stock_symbols[i])] - new_stock[stock_symbols[i]]['minimum_low', str(stock_symbols[i])]) / (new_stock[stock_symbols[i]]['maximum_high', str(stock_symbols[i])] - new_stock[stock_symbols[i]]['minimum_low', str(stock_symbols[i])])) * 100

In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]] = new_stock[stock_symbols[i]].drop([('minimum_low', str(stock_symbols[i])), ('maximum_high', str(stock_symbols[i]))], axis=1)

new_stock[stock_symbols[8]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,ema5,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch
Symbols,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,...,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN,MSN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-01-02,69.3,67.3,67.7,68.4,68.35,3624100.0,,,,,...,1.0,,0.0,0.0,,,0.0,,,
2024-01-03,69.003704,67.596296,67.922222,68.77037,68.357407,5457656.0,0.541477,,,,...,1.002454,100.0,0.006458,5457656.0,,0.0,60.137999,,,
2024-01-04,69.0691,67.93837,68.65416,68.31511,68.528947,3466718.0,-0.662,0.541477,,,...,0.997468,44.859101,0.00872,1990938.0,,0.066452,43.518049,,,
2024-01-05,68.561569,67.055176,68.354458,68.041178,67.830145,3426254.0,-0.400984,-0.662,0.541477,,...,0.995562,33.683421,0.003423,-1435317.0,,-0.305724,34.241286,,,
2024-01-08,68.194622,66.757749,68.122844,67.099466,67.379182,4299923.0,-1.384032,-0.400984,-0.662,0.541477,...,0.986792,18.144075,-0.022809,-5735239.0,,-0.111004,27.059279,,,


In [None]:
window = 21
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])] = (new_stock[stock_symbols[i]]['high', str(stock_symbols[i])] + new_stock[stock_symbols[i]]['low', str(stock_symbols[i])] + new_stock[stock_symbols[i]]['close', str(stock_symbols[i])]) / 3

  new_stock[stock_symbols[i]]['sma_combine_price', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])].rolling(window=window).mean()
  new_stock[stock_symbols[i]]['mean_deviation', str(stock_symbols[i])] = new_stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])].rolling(window).apply(lambda x: x.mad())

  new_stock[stock_symbols[i]]['cci', str(stock_symbols[i])] = (new_stock[stock_symbols[i]]['combine_price', str(stock_symbols[i])] - new_stock[stock_symbols[i]]['sma_combine_price', str(stock_symbols[i])]) / (0.015 * new_stock[stock_symbols[i]]['mean_deviation', str(stock_symbols[i])])

In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]] = new_stock[stock_symbols[i]].drop([('combine_price', str(stock_symbols[i])), ('sma_combine_price', str(stock_symbols[i])), ('mean_deviation', str(stock_symbols[i]))], axis=1)

new_stock[stock_symbols[9]].tail()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci
Symbols,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,...,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB,SAB
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-02-26,57.740243,56.695764,57.519138,56.870873,57.146922,1054533.0,-0.876793,-1.195883,-0.21813,-0.303183,...,43.66569,-1.199624,-15670370.0,-0.312092,-0.584947,45.39654,-2.015982,1.058603,14.130744,-86.552537
2024-02-27,56.999085,56.113518,56.921698,56.499806,56.433923,1715951.0,-0.652474,-0.876793,-1.195883,-0.21813,...,47.584047,-1.182763,-17386320.0,-0.285172,-0.341491,43.670572,-2.50138,1.004261,12.776422,-127.764795
2024-02-28,57.45468,56.364731,56.647594,57.279932,57.003373,1341778.0,1.380759,-0.652474,-0.876793,-1.195883,...,50.639238,-1.160353,-16044550.0,-0.191751,0.287076,45.063528,-1.871328,1.001491,38.578985,-81.779989
2024-02-29,58.589138,57.362656,57.526658,58.072976,58.093181,2289882.0,1.384506,1.380759,-0.652474,-0.876793,...,59.61299,-1.119354,-13754660.0,-0.107852,0.571062,49.855555,-0.210185,0.993505,64.808806,30.133605
2024-03-01,58.791198,57.84193,58.35433,58.415542,58.396613,1384769.0,0.589888,1.384506,1.380759,-0.652474,...,62.69353,-1.060403,-12369890.0,-0.095072,0.23353,56.224926,0.597897,0.968587,76.139108,88.785058


In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['volume'] = new_stock[stock_symbols[i]]['volume'] / new_stock[stock_symbols[i]]['volume'].ewm(5).mean()

new_stock[stock_symbols[6]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci
Symbols,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,...,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT,FPT
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-01-02,96.9,95.7,96.9,95.9,96.15,1.0,,,,,...,,0.0,0.0,,,0.0,,,,
2024-01-03,96.603704,95.477778,95.937037,96.196296,95.898148,0.886325,0.308964,,,,...,100.0,0.005166,1729720.0,,-0.16876,0.0,,,,
2024-01-04,97.823599,95.968251,96.183531,96.877929,96.918081,1.175744,0.708585,0.308964,,,...,100.0,0.025211,4322516.0,,0.611951,39.830944,,,,
2024-01-05,97.610055,96.319154,96.920292,97.222445,96.985338,0.889258,0.355619,0.708585,0.308964,,...,100.0,0.059604,6185594.0,,0.047587,53.237097,,,,
2024-01-08,98.060887,96.829384,97.233749,97.273122,97.413115,0.841275,0.052125,0.355619,0.708585,0.308964,...,100.0,0.102095,7846319.0,,0.356336,61.00812,,,,


In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]].replace(0, np.nan, inplace=True)
  new_stock[stock_symbols[i]] = new_stock[stock_symbols[i]].dropna()

new_stock[stock_symbols[5]].tail()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci
Symbols,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,...,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG,MWG
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-02-26,44.980788,43.552753,44.596237,44.430552,44.103479,1.445761,-0.335567,-2.359779,-1.433564,-0.880683,...,44.039311,0.790197,15503480.0,-0.048079,-0.08353,48.395176,-3.943468,1.028053,22.432805,-139.13727
2024-02-27,45.123276,44.103464,44.663683,44.638193,44.517218,1.17329,0.467339,-0.335567,-2.359779,-1.433564,...,46.648207,0.700408,28630260.0,-0.025943,0.026927,49.936363,-4.228599,1.046381,27.739237,-98.36551
2024-02-28,45.595647,44.523712,44.882289,45.068368,45.000026,1.290992,0.963692,0.467339,-0.335567,-2.359779,...,43.841812,0.616391,43967770.0,-0.044101,0.031192,51.518675,-3.562392,1.05889,38.732673,-55.464263
2024-02-29,46.508476,45.483299,45.868801,45.803929,45.903509,1.244793,1.632101,0.963692,0.467339,-0.335567,...,40.809297,0.551804,59518670.0,-0.066682,0.061719,52.622352,-1.117592,1.079045,57.530494,14.520018
2024-03-01,47.022967,45.884155,46.31158,46.321375,46.486228,1.220943,1.129699,1.632101,0.963692,0.467339,...,42.378139,0.510628,75477510.0,-0.100122,0.032659,54.668892,1.453888,1.061724,70.754229,51.024309


In [None]:
for i in range(len(stock_symbols)):
  new_stock[stock_symbols[i]]['trend', str(stock_symbols[i])] = (new_stock[stock_symbols[i]]['today'].iloc[:] > 0).astype(int)

new_stock[stock_symbols[4]].head()

Attributes,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci,trend
Symbols,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,...,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN,HVN
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-01-30,12.581514,12.365041,12.4678,12.521239,12.507331,0.93409,0.428412,-0.264811,0.734356,-0.146396,...,0.045515,-2407897.0,-0.058245,0.000421,49.258463,-0.114886,0.242958,66.375376,-23.035099,1
2024-01-31,12.59353,12.420265,12.52123,12.507434,12.522066,0.799458,-0.110254,0.428412,-0.264811,0.734356,...,0.042412,-3758621.0,-0.045733,0.004313,47.863387,0.220028,0.234394,68.028784,-29.299912,0
2024-02-01,12.565235,12.374593,12.50743,12.470102,12.501223,0.962968,-0.298477,-0.110254,0.428412,-0.264811,...,0.039171,-5373425.0,-0.033118,-0.004366,48.388841,0.338838,0.229367,62.651209,-48.104247,0
2024-02-02,12.555332,12.293607,12.470101,12.359536,12.402928,1.102211,-0.88665,-0.298477,-0.110254,0.428412,...,0.033999,-7260901.0,-0.071073,-0.006301,48.526676,-0.405012,0.229639,50.410963,-92.71348,0
2024-02-05,12.454366,12.167763,12.229535,12.320837,12.342525,1.058082,-0.313104,-0.88665,-0.298477,-0.110254,...,0.027121,-9094378.0,-0.10517,-0.017727,41.388589,-1.440624,0.207389,29.964676,-178.618225,0


In [None]:
df_new_stock = {}
for i in range(len(stock_symbols)):
  df_new_stock[stock_symbols[i]] = new_stock[stock_symbols[i]][['today', 'previous1', 'previous2', 'previous3', 'previous4', 'previous5', 'volume',
                                                  'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'roc', 'atr', 'obv', 'cmf', 'emv', 'stoch', 'mfi', 'cci', 'trend']]

In [None]:
df_new_stock[stock_symbols[2]]

Attributes,today,previous1,previous2,previous3,previous4,previous5,volume,ema50,ema21,ema14,...,macd,roc,atr,obv,cmf,emv,stoch,mfi,cci,trend
Symbols,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,...,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM,VNM
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-01-30,-0.109669,-0.036195,-0.103309,-0.56866,-0.521327,-0.393008,0.864337,0.988845,0.989955,0.990823,...,-0.266775,-1.721066,0.701796,-13093520.0,-0.510432,-0.009869,5.822601,36.863402,-103.352428,0
2024-01-31,0.058676,-0.109669,-0.036195,-0.103309,-0.56866,-0.521327,1.448967,0.990006,0.991201,0.992133,...,-0.285272,-1.275405,0.725008,-9531352.0,-0.530875,0.077006,8.647386,39.077231,-70.410333,1
2024-02-01,0.505752,0.058676,-0.109669,-0.036195,-0.103309,-0.56866,1.146033,0.995279,0.996475,0.997389,...,-0.295389,-0.256111,0.736441,-6627549.0,-0.508912,0.016324,28.152368,47.698535,-44.885058,1
2024-02-02,0.079565,0.505752,0.058676,-0.109669,-0.036195,-0.103309,1.072018,0.996274,0.997452,0.998332,...,-0.298163,0.394152,0.725672,-3870973.0,-0.487646,-0.019397,31.236427,47.409823,-44.382158,1
2024-02-05,0.606708,0.079565,0.505752,0.058676,-0.109669,-0.036195,1.219773,1.002202,1.003271,1.004031,...,-0.28842,1.107705,0.725965,-588335.9,-0.413876,0.05515,54.771956,55.338522,10.090878,1
2024-02-06,-0.172527,0.606708,0.079565,0.505752,0.058676,-0.109669,1.018233,1.00045,1.001441,1.002115,...,-0.272706,0.969813,0.73013,-3338729.0,-0.389629,0.051366,48.038652,56.25531,42.051606,0
2024-02-07,-0.060489,-0.172527,0.606708,0.079565,0.505752,0.058676,1.01232,0.999852,1.000782,1.001389,...,-0.254163,1.019525,0.731588,-6079961.0,-0.410724,-0.040277,43.178,48.724429,26.597008,0
2024-02-15,-0.021184,-0.060489,-0.172527,0.606708,0.079565,0.505752,1.269362,0.999657,1.000534,1.001085,...,-0.234493,0.938898,0.728036,-9714468.0,-0.375464,-0.004755,42.315146,40.054901,26.640693,0
2024-02-16,2.300013,-0.021184,-0.060489,-0.172527,0.606708,0.079565,2.23188,1.02161,1.022069,1.022211,...,-0.187506,2.740891,0.82023,-1217900.0,-0.180395,0.20571,87.2284,48.90684,265.034257,1
2024-02-19,2.57255,2.300013,-0.021184,-0.060489,-0.172527,0.606708,1.713877,1.0457,1.045308,1.044641,...,-0.094214,5.30017,0.904945,6399823.0,-0.097673,0.536878,87.613776,59.492309,428.959122,1


## Logistic Regression - Prediction & Valuation Model

#### VIC

In [None]:
X_train = df[stock_symbols[0]].loc[:, df[stock_symbols[0]].columns != ('trend', stock_symbols[0])]
y_train = df[stock_symbols[0]][('trend', stock_symbols[0])]
X_test = df_new_stock[stock_symbols[0]].loc[:, df_new_stock[stock_symbols[0]].columns != ('trend', stock_symbols[0])]
y_test = df_new_stock[stock_symbols[0]][('trend', stock_symbols[0])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.2, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0])

In [None]:
y_test.values

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.74


#### VCB

In [None]:
X_train = df[stock_symbols[1]].loc[:, df[stock_symbols[1]].columns != ('trend', stock_symbols[1])]
y_train = df[stock_symbols[1]][('trend', stock_symbols[1])]
X_test = df_new_stock[stock_symbols[1]].loc[:, df_new_stock[stock_symbols[1]].columns != ('trend', stock_symbols[1])]
y_test = df_new_stock[stock_symbols[1]][('trend', stock_symbols[1])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.2, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1])

In [None]:
y_test.values

array([0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.68


#### VNM

In [None]:
X_train = df[stock_symbols[2]].loc[:, df[stock_symbols[2]].columns != ('trend', stock_symbols[2])]
y_train = df[stock_symbols[2]][('trend', stock_symbols[2])]
X_test = df_new_stock[stock_symbols[2]].loc[:, df_new_stock[stock_symbols[2]].columns != ('trend', stock_symbols[2])]
y_test = df_new_stock[stock_symbols[2]][('trend', stock_symbols[2])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.5, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1])

In [None]:
y_test.values

array([0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.68


#### GAS

In [None]:
X_train = df[stock_symbols[3]].loc[:, df[stock_symbols[3]].columns != ('trend', stock_symbols[3])]
y_train = df[stock_symbols[3]][('trend', stock_symbols[3])]
X_test = df_new_stock[stock_symbols[3]].loc[:, df_new_stock[stock_symbols[3]].columns != ('trend', stock_symbols[3])]
y_test = df_new_stock[stock_symbols[3]][('trend', stock_symbols[3])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.2, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1])

In [None]:
y_test.values

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.74


#### HVN

In [None]:
X_train = df[stock_symbols[4]].loc[:, df[stock_symbols[4]].columns != ('trend', stock_symbols[4])]
y_train = df[stock_symbols[4]][('trend', stock_symbols[4])]
X_test = df_new_stock[stock_symbols[4]].loc[:, df_new_stock[stock_symbols[4]].columns != ('trend', stock_symbols[4])]
y_test = df_new_stock[stock_symbols[4]][('trend', stock_symbols[4])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.5, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1])

In [None]:
y_test.values

array([1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.74


#### MWG

In [None]:
X_train = df[stock_symbols[5]].loc[:, df[stock_symbols[5]].columns != ('trend', stock_symbols[5])]
y_train = df[stock_symbols[5]][('trend', stock_symbols[5])]
X_test = df_new_stock[stock_symbols[5]].loc[:, df_new_stock[stock_symbols[5]].columns != ('trend', stock_symbols[5])]
y_test = df_new_stock[stock_symbols[5]][('trend', stock_symbols[5])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1])

In [None]:
y_test.values

array([1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.63


#### FPT

In [None]:
X_train = df[stock_symbols[6]].loc[:, df[stock_symbols[6]].columns != ('trend', stock_symbols[6])]
y_train = df[stock_symbols[6]][('trend', stock_symbols[6])]
X_test = df_new_stock[stock_symbols[6]].loc[:, df_new_stock[stock_symbols[6]].columns != ('trend', stock_symbols[6])]
y_test = df_new_stock[stock_symbols[6]][('trend', stock_symbols[6])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [None]:
y_test.values

array([0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.74


#### HPG

In [None]:
X_train = df[stock_symbols[7]].loc[:, df[stock_symbols[7]].columns != ('trend', stock_symbols[7])]
y_train = df[stock_symbols[7]][('trend', stock_symbols[7])]
X_test = df_new_stock[stock_symbols[7]].loc[:, df_new_stock[stock_symbols[7]].columns != ('trend', stock_symbols[7])]
y_test = df_new_stock[stock_symbols[7]][('trend', stock_symbols[7])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1])

In [None]:
y_test.values

array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.79


#### MSN

In [None]:
X_train = df[stock_symbols[8]].loc[:, df[stock_symbols[8]].columns != ('trend', stock_symbols[8])]
y_train = df[stock_symbols[8]][('trend', stock_symbols[8])]
X_test = df_new_stock[stock_symbols[8]].loc[:, df_new_stock[stock_symbols[8]].columns != ('trend', stock_symbols[8])]
y_test = df_new_stock[stock_symbols[8]][('trend', stock_symbols[8])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.5, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1])

In [None]:
y_test.values

array([1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.63


#### SAB

In [None]:
X_train = df[stock_symbols[9]].loc[:, df[stock_symbols[9]].columns != ('trend', stock_symbols[9])]
y_train = df[stock_symbols[9]][('trend', stock_symbols[9])]
X_test = df_new_stock[stock_symbols[9]].loc[:, df_new_stock[stock_symbols[9]].columns != ('trend', stock_symbols[9])]
y_test = df_new_stock[stock_symbols[9]][('trend', stock_symbols[9])]

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)
lr.fit(X_train_scaled, y_train.values)

In [None]:
predictions = lr.predict(X_test_scaled)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1])

In [None]:
y_test.values

array([0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.68


## Random Forest - Prediction & Valuation Model

#### VIC

In [None]:
X_train = df[stock_symbols[0]][[('volume', stock_symbols[0]), ('ema50', stock_symbols[0]), ('ema21', stock_symbols[0]),
                          ('ema14', stock_symbols[0]), ('ema5', stock_symbols[0]), ('rsi', stock_symbols[0]),
                           ('macd', stock_symbols[0]), ('roc', stock_symbols[0]), ('obv', stock_symbols[0]),
                            ('atr', stock_symbols[0]), ('cmf', stock_symbols[0]), ('emv', stock_symbols[0]),
                             ('stoch', stock_symbols[0]), ('cci', stock_symbols[0]), ('mfi', stock_symbols[0])]]
y_train = df[stock_symbols[0]][('trend', stock_symbols[0])]

X_test = df_new_stock[stock_symbols[0]][[('volume', stock_symbols[0]), ('ema50', stock_symbols[0]), ('ema21', stock_symbols[0]),
                          ('ema14', stock_symbols[0]), ('ema5', stock_symbols[0]), ('rsi', stock_symbols[0]),
                           ('macd', stock_symbols[0]), ('roc', stock_symbols[0]), ('obv', stock_symbols[0]),
                            ('atr', stock_symbols[0]), ('cmf', stock_symbols[0]), ('emv', stock_symbols[0]),
                             ('stoch', stock_symbols[0]), ('cci', stock_symbols[0]), ('mfi', stock_symbols[0])]]
y_test = df_new_stock[stock_symbols[0]][('trend', stock_symbols[0])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1])

In [None]:
y_test.values

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.79


#### VCB

In [None]:
X_train = df[stock_symbols[1]][[('volume', stock_symbols[1]), ('ema50', stock_symbols[1]), ('ema21', stock_symbols[1]),
                          ('ema14', stock_symbols[1]), ('ema5', stock_symbols[1]), ('rsi', stock_symbols[1]),
                           ('macd', stock_symbols[1]), ('roc', stock_symbols[1]), ('obv', stock_symbols[1]),
                            ('atr', stock_symbols[1]), ('cmf', stock_symbols[1]), ('emv', stock_symbols[1]),
                             ('stoch', stock_symbols[1]), ('cci', stock_symbols[1]), ('mfi', stock_symbols[1])]]
y_train = df[stock_symbols[1]][('trend', stock_symbols[1])]

X_test = df_new_stock[stock_symbols[1]][[('volume', stock_symbols[1]), ('ema50', stock_symbols[1]), ('ema21', stock_symbols[1]),
                          ('ema14', stock_symbols[1]), ('ema5', stock_symbols[1]), ('rsi', stock_symbols[1]),
                           ('macd', stock_symbols[1]), ('roc', stock_symbols[1]), ('obv', stock_symbols[1]),
                            ('atr', stock_symbols[1]), ('cmf', stock_symbols[1]), ('emv', stock_symbols[1]),
                             ('stoch', stock_symbols[1]), ('cci', stock_symbols[1]), ('mfi', stock_symbols[1])]]
y_test = df_new_stock[stock_symbols[1]][('trend', stock_symbols[1])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0])

In [None]:
y_test.values

array([0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.68


#### VNM

In [None]:
X_train = df[stock_symbols[2]][[('volume', stock_symbols[2]), ('ema50', stock_symbols[2]), ('ema21', stock_symbols[2]),
                          ('ema14', stock_symbols[2]), ('ema5', stock_symbols[2]), ('rsi', stock_symbols[2]),
                           ('macd', stock_symbols[2]), ('roc', stock_symbols[2]), ('obv', stock_symbols[2]),
                            ('atr', stock_symbols[2]), ('cmf', stock_symbols[2]), ('emv', stock_symbols[2]),
                             ('stoch', stock_symbols[2]), ('cci', stock_symbols[2]), ('mfi', stock_symbols[2])]]
y_train = df[stock_symbols[2]][('trend', stock_symbols[2])]

X_test = df_new_stock[stock_symbols[2]][[('volume', stock_symbols[2]), ('ema50', stock_symbols[2]), ('ema21', stock_symbols[2]),
                          ('ema14', stock_symbols[2]), ('ema5', stock_symbols[2]), ('rsi', stock_symbols[2]),
                           ('macd', stock_symbols[2]), ('roc', stock_symbols[2]), ('obv', stock_symbols[2]),
                            ('atr', stock_symbols[2]), ('cmf', stock_symbols[2]), ('emv', stock_symbols[2]),
                             ('stoch', stock_symbols[2]), ('cci', stock_symbols[2]), ('mfi', stock_symbols[2])]]
y_test = df_new_stock[stock_symbols[2]][('trend', stock_symbols[2])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0])

In [None]:
y_test.values

array([0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.68


#### GAS

In [None]:
X_train = df[stock_symbols[3]][[('volume', stock_symbols[3]), ('ema50', stock_symbols[3]), ('ema21', stock_symbols[3]),
                          ('ema14', stock_symbols[3]), ('ema5', stock_symbols[3]), ('rsi', stock_symbols[3]),
                           ('macd', stock_symbols[3]), ('roc', stock_symbols[3]), ('obv', stock_symbols[3]),
                            ('atr', stock_symbols[3]), ('cmf', stock_symbols[3]), ('emv', stock_symbols[3]),
                             ('stoch', stock_symbols[3]), ('cci', stock_symbols[3]), ('mfi', stock_symbols[3])]]
y_train = df[stock_symbols[3]][('trend', stock_symbols[3])]

X_test = df_new_stock[stock_symbols[3]][[('volume', stock_symbols[3]), ('ema50', stock_symbols[3]), ('ema21', stock_symbols[3]),
                          ('ema14', stock_symbols[3]), ('ema5', stock_symbols[3]), ('rsi', stock_symbols[3]),
                           ('macd', stock_symbols[3]), ('roc', stock_symbols[3]), ('obv', stock_symbols[3]),
                            ('atr', stock_symbols[3]), ('cmf', stock_symbols[3]), ('emv', stock_symbols[3]),
                             ('stoch', stock_symbols[3]), ('cci', stock_symbols[3]), ('mfi', stock_symbols[3])]]
y_test = df_new_stock[stock_symbols[3]][('trend', stock_symbols[3])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0])

In [None]:
y_test.values

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.95


#### HVN

In [None]:
X_train = df[stock_symbols[4]][[('volume', stock_symbols[4]), ('ema50', stock_symbols[4]), ('ema21', stock_symbols[4]),
                          ('ema14', stock_symbols[4]), ('ema5', stock_symbols[4]), ('rsi', stock_symbols[4]),
                           ('macd', stock_symbols[4]), ('roc', stock_symbols[4]), ('obv', stock_symbols[4]),
                            ('atr', stock_symbols[4]), ('cmf', stock_symbols[4]), ('emv', stock_symbols[4]),
                             ('stoch', stock_symbols[4]), ('cci', stock_symbols[4]), ('mfi', stock_symbols[4])]]
y_train = df[stock_symbols[4]][('trend', stock_symbols[4])]

X_test = df_new_stock[stock_symbols[4]][[('volume', stock_symbols[4]), ('ema50', stock_symbols[4]), ('ema21', stock_symbols[4]),
                          ('ema14', stock_symbols[4]), ('ema5', stock_symbols[4]), ('rsi', stock_symbols[4]),
                           ('macd', stock_symbols[4]), ('roc', stock_symbols[4]), ('obv', stock_symbols[4]),
                            ('atr', stock_symbols[4]), ('cmf', stock_symbols[4]), ('emv', stock_symbols[4]),
                             ('stoch', stock_symbols[4]), ('cci', stock_symbols[4]), ('mfi', stock_symbols[4])]]
y_test = df_new_stock[stock_symbols[4]][('trend', stock_symbols[4])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0])

In [None]:
y_test.values

array([1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.63


#### MWG

In [None]:
X_train = df[stock_symbols[5]][[('volume', stock_symbols[5]), ('ema50', stock_symbols[5]), ('ema21', stock_symbols[5]),
                          ('ema14', stock_symbols[5]), ('ema5', stock_symbols[5]), ('rsi', stock_symbols[5]),
                           ('macd', stock_symbols[5]), ('roc', stock_symbols[5]), ('obv', stock_symbols[5]),
                            ('atr', stock_symbols[5]), ('cmf', stock_symbols[5]), ('emv', stock_symbols[5]),
                             ('stoch', stock_symbols[5]), ('cci', stock_symbols[5]), ('mfi', stock_symbols[5])]]
y_train = df[stock_symbols[5]][('trend', stock_symbols[5])]

X_test = df_new_stock[stock_symbols[5]][[('volume', stock_symbols[5]), ('ema50', stock_symbols[5]), ('ema21', stock_symbols[5]),
                          ('ema14', stock_symbols[5]), ('ema5', stock_symbols[5]), ('rsi', stock_symbols[5]),
                           ('macd', stock_symbols[5]), ('roc', stock_symbols[5]), ('obv', stock_symbols[5]),
                            ('atr', stock_symbols[5]), ('cmf', stock_symbols[5]), ('emv', stock_symbols[5]),
                             ('stoch', stock_symbols[5]), ('cci', stock_symbols[5]), ('mfi', stock_symbols[5])]]
y_test = df_new_stock[stock_symbols[5]][('trend', stock_symbols[5])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])

In [None]:
y_test.values

array([1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.84


#### FPT

In [None]:
X_train = df[stock_symbols[6]][[('volume', stock_symbols[6]), ('ema50', stock_symbols[6]), ('ema21', stock_symbols[6]),
                          ('ema14', stock_symbols[6]), ('ema5', stock_symbols[6]), ('rsi', stock_symbols[6]),
                           ('macd', stock_symbols[6]), ('roc', stock_symbols[6]), ('obv', stock_symbols[6]),
                            ('atr', stock_symbols[6]), ('cmf', stock_symbols[6]), ('emv', stock_symbols[6]),
                             ('stoch', stock_symbols[6]), ('cci', stock_symbols[6]), ('mfi', stock_symbols[6])]]
y_train = df[stock_symbols[6]][('trend', stock_symbols[6])]

X_test = df_new_stock[stock_symbols[6]][[('volume', stock_symbols[6]), ('ema50', stock_symbols[6]), ('ema21', stock_symbols[6]),
                          ('ema14', stock_symbols[6]), ('ema5', stock_symbols[6]), ('rsi', stock_symbols[6]),
                           ('macd', stock_symbols[6]), ('roc', stock_symbols[6]), ('obv', stock_symbols[6]),
                            ('atr', stock_symbols[6]), ('cmf', stock_symbols[6]), ('emv', stock_symbols[6]),
                             ('stoch', stock_symbols[6]), ('cci', stock_symbols[6]), ('mfi', stock_symbols[6])]]
y_test = df_new_stock[stock_symbols[6]][('trend', stock_symbols[6])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1])

In [None]:
y_test.values

array([0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.84


#### HPG

In [None]:
X_train = df[stock_symbols[7]][[('volume', stock_symbols[7]), ('ema50', stock_symbols[7]), ('ema21', stock_symbols[7]),
                          ('ema14', stock_symbols[7]), ('ema5', stock_symbols[7]), ('rsi', stock_symbols[7]),
                           ('macd', stock_symbols[7]), ('roc', stock_symbols[7]), ('obv', stock_symbols[7]),
                            ('atr', stock_symbols[7]), ('cmf', stock_symbols[7]), ('emv', stock_symbols[7]),
                             ('stoch', stock_symbols[7]), ('cci', stock_symbols[7]), ('mfi', stock_symbols[7])]]
y_train = df[stock_symbols[7]][('trend', stock_symbols[7])]

X_test = df_new_stock[stock_symbols[7]][[('volume', stock_symbols[7]), ('ema50', stock_symbols[7]), ('ema21', stock_symbols[7]),
                          ('ema14', stock_symbols[7]), ('ema5', stock_symbols[7]), ('rsi', stock_symbols[7]),
                           ('macd', stock_symbols[7]), ('roc', stock_symbols[7]), ('obv', stock_symbols[7]),
                            ('atr', stock_symbols[7]), ('cmf', stock_symbols[7]), ('emv', stock_symbols[7]),
                             ('stoch', stock_symbols[7]), ('cci', stock_symbols[7]), ('mfi', stock_symbols[7])]]
y_test = df_new_stock[stock_symbols[7]][('trend', stock_symbols[7])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])

In [None]:
y_test.values

array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.95


#### MSN

In [None]:
X_train = df[stock_symbols[8]][[('volume', stock_symbols[8]), ('ema50', stock_symbols[8]), ('ema21', stock_symbols[8]),
                          ('ema14', stock_symbols[8]), ('ema5', stock_symbols[8]), ('rsi', stock_symbols[8]),
                           ('macd', stock_symbols[8]), ('roc', stock_symbols[8]), ('obv', stock_symbols[8]),
                            ('atr', stock_symbols[8]), ('cmf', stock_symbols[8]), ('emv', stock_symbols[8]),
                             ('stoch', stock_symbols[8]), ('cci', stock_symbols[8]), ('mfi', stock_symbols[8])]]
y_train = df[stock_symbols[8]][('trend', stock_symbols[8])]

X_test = df_new_stock[stock_symbols[8]][[('volume', stock_symbols[8]), ('ema50', stock_symbols[8]), ('ema21', stock_symbols[8]),
                          ('ema14', stock_symbols[8]), ('ema5', stock_symbols[8]), ('rsi', stock_symbols[8]),
                           ('macd', stock_symbols[8]), ('roc', stock_symbols[8]), ('obv', stock_symbols[8]),
                            ('atr', stock_symbols[8]), ('cmf', stock_symbols[8]), ('emv', stock_symbols[8]),
                             ('stoch', stock_symbols[8]), ('cci', stock_symbols[8]), ('mfi', stock_symbols[8])]]
y_test = df_new_stock[stock_symbols[8]][('trend', stock_symbols[8])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1])

In [None]:
y_test.values

array([1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.68


#### SAB

In [None]:
X_train = df[stock_symbols[9]][[('volume', stock_symbols[9]), ('ema50', stock_symbols[9]), ('ema21', stock_symbols[9]),
                          ('ema14', stock_symbols[9]), ('ema5', stock_symbols[9]), ('rsi', stock_symbols[9]),
                           ('macd', stock_symbols[9]), ('roc', stock_symbols[9]), ('obv', stock_symbols[9]),
                            ('atr', stock_symbols[9]), ('cmf', stock_symbols[9]), ('emv', stock_symbols[9]),
                             ('stoch', stock_symbols[9]), ('cci', stock_symbols[9]), ('mfi', stock_symbols[9])]]
y_train = df[stock_symbols[9]][('trend', stock_symbols[9])]

X_test = df_new_stock[stock_symbols[9]][[('volume', stock_symbols[9]), ('ema50', stock_symbols[9]), ('ema21', stock_symbols[9]),
                          ('ema14', stock_symbols[9]), ('ema5', stock_symbols[9]), ('rsi', stock_symbols[9]),
                           ('macd', stock_symbols[9]), ('roc', stock_symbols[9]), ('obv', stock_symbols[9]),
                            ('atr', stock_symbols[9]), ('cmf', stock_symbols[9]), ('emv', stock_symbols[9]),
                             ('stoch', stock_symbols[9]), ('cci', stock_symbols[9]), ('mfi', stock_symbols[9])]]
y_test = df_new_stock[stock_symbols[9]][('trend', stock_symbols[9])]


rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [None]:
y_pred = rf.predict(X_test.values)
y_pred

array([0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1])

In [None]:
y_test.values

array([0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1])

In [None]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.68


## THE END