## Install needed libraries

In [None]:
import os
from google.colab import drive
drive.mount('/content/gdrive')

path = "/content/gdrive/MyDrive/Chuyên đề nghiên cứu 1/vnquant_package"
%cd {path}
!ls

In [None]:
!git clone https://github.com/phamdinhkhanh/vnquant
%cd vnquant
!python setup.py install

In [None]:
%cd ..
!rm -rf vnquant
!ls

In [4]:
!pip freeze | grep vnquant

vnquant==0.1.1


In [5]:
import vnquant
vnquant.__version__

'0.1.1'

## Import libraries

In [6]:
import vnquant.data as dt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

## We choose 10 codes from biggest companies on stock exchange in Vietnam

In [7]:
stock_symbols = ['VIC', 'VCB', 'VNM', 'GAS', 'HVN', 'MWG', 'FPT', 'HPG', 'MSN', 'SAB']

## Prepare and process the dataset

In [8]:
stock = {}
for i in range(len(stock_symbols)):
  data = dt.DataLoader(symbols=stock_symbols[i],
                        start="2014-01-01",
                        end="2024-01-01",
                        data_source="VND")
  stock[stock_symbols[i]] = data.download()

In [9]:
stock[stock_symbols[0]]

Attributes,high,low,open,close,avg,volume
Symbols,VIC,VIC,VIC,VIC,VIC,VIC
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2014-01-02,70.50,69.50,70.00,70.00,69.92,74950.0
2014-01-03,70.50,69.50,70.00,70.50,70.00,91920.0
2014-01-06,70.50,69.50,70.50,70.50,70.11,111760.0
2014-01-07,70.50,70.00,70.50,70.50,70.47,294120.0
2014-01-08,70.50,70.00,70.00,70.00,70.02,109890.0
...,...,...,...,...,...,...
2023-12-25,43.55,43.00,43.10,43.40,43.34,3364500.0
2023-12-26,43.75,43.35,43.40,43.55,43.55,1806700.0
2023-12-27,43.95,43.60,43.65,43.60,43.76,1920500.0
2023-12-28,44.60,43.60,43.60,44.45,44.32,4359700.0


In [10]:
for i in range(len(stock_symbols)):
  print(f"Length of {stock_symbols[i]} with index {i}: ", len(stock[stock_symbols[i]]))

Length of VIC with index 0:  2494
Length of VCB with index 1:  2494
Length of VNM with index 2:  2494
Length of GAS with index 3:  2494
Length of HVN with index 4:  1743
Length of MWG with index 5:  2368
Length of FPT with index 6:  2494
Length of HPG with index 7:  2494
Length of MSN with index 8:  2494
Length of SAB with index 9:  1767


In [11]:
changed_start_index5 = len(stock[stock_symbols[5]]) - len(stock[stock_symbols[4]])
stock[stock_symbols[5]] = stock[stock_symbols[5]][changed_start_index5:]

In [12]:
changed_start_index9 = len(stock[stock_symbols[9]]) - len(stock[stock_symbols[4]])
stock[stock_symbols[9]] = stock[stock_symbols[9]][changed_start_index9:]

In [13]:
changed_start_other_index = len(stock[stock_symbols[0]]) - len(stock[stock_symbols[4]])
for i in range(len(stock_symbols)):
  if ((i == 4) or (i == 5) or (i == 9)):
    pass
  else:
    stock[stock_symbols[i]] = stock[stock_symbols[i]][changed_start_other_index:]

In [14]:
combined_high = (stock[stock_symbols[0]]['high'].values + stock[stock_symbols[1]]['high'].values + stock[stock_symbols[2]]['high'].values +
                 stock[stock_symbols[3]]['high'].values + stock[stock_symbols[4]]['high'].values + stock[stock_symbols[5]]['high'].values +
                 stock[stock_symbols[6]]['high'].values + stock[stock_symbols[7]]['high'].values + stock[stock_symbols[8]]['high'].values +
                 stock[stock_symbols[9]]['high'].values) / len(stock_symbols)
combined_high

array([[82.885],
       [83.95 ],
       [85.11 ],
       ...,
       [58.545],
       [58.44 ],
       [58.635]])

In [15]:
combined_low = (stock[stock_symbols[0]]['low'].values + stock[stock_symbols[1]]['low'].values + stock[stock_symbols[2]]['low'].values +
                 stock[stock_symbols[3]]['low'].values + stock[stock_symbols[4]]['low'].values + stock[stock_symbols[5]]['low'].values +
                 stock[stock_symbols[6]]['low'].values + stock[stock_symbols[7]]['low'].values + stock[stock_symbols[8]]['low'].values +
                 stock[stock_symbols[9]]['low'].values) / len(stock_symbols)
combined_low

array([[81.455],
       [82.445],
       [83.565],
       ...,
       [57.755],
       [57.685],
       [57.625]])

In [16]:
combined_open = (stock[stock_symbols[0]]['open'].values + stock[stock_symbols[1]]['open'].values + stock[stock_symbols[2]]['open'].values +
                 stock[stock_symbols[3]]['open'].values + stock[stock_symbols[4]]['open'].values + stock[stock_symbols[5]]['open'].values +
                 stock[stock_symbols[6]]['open'].values + stock[stock_symbols[7]]['open'].values + stock[stock_symbols[8]]['open'].values +
                 stock[stock_symbols[9]]['open'].values) / len(stock_symbols)
combined_open

array([[82.255],
       [82.795],
       [84.76 ],
       ...,
       [58.055],
       [58.01 ],
       [58.325]])

In [17]:
combined_close = (stock[stock_symbols[0]]['close'].values + stock[stock_symbols[1]]['close'].values + stock[stock_symbols[2]]['close'].values +
                 stock[stock_symbols[3]]['close'].values + stock[stock_symbols[4]]['close'].values + stock[stock_symbols[5]]['close'].values +
                 stock[stock_symbols[6]]['close'].values + stock[stock_symbols[7]]['close'].values + stock[stock_symbols[8]]['close'].values +
                 stock[stock_symbols[9]]['close'].values) / len(stock_symbols)
combined_close

array([[82.185],
       [83.725],
       [84.06 ],
       ...,
       [57.93 ],
       [58.09 ],
       [57.71 ]])

In [18]:
combined_avg = (stock[stock_symbols[0]]['avg'].values + stock[stock_symbols[1]]['avg'].values + stock[stock_symbols[2]]['avg'].values +
                 stock[stock_symbols[3]]['avg'].values + stock[stock_symbols[4]]['avg'].values + stock[stock_symbols[5]]['avg'].values +
                 stock[stock_symbols[6]]['avg'].values + stock[stock_symbols[7]]['avg'].values + stock[stock_symbols[8]]['avg'].values +
                 stock[stock_symbols[9]]['avg'].values) / len(stock_symbols)
combined_avg

array([[82.107 ],
       [83.28  ],
       [84.3326],
       ...,
       [58.148 ],
       [58.099 ],
       [58.183 ]])

In [19]:
combined_volume = (stock[stock_symbols[0]]['volume'].values + stock[stock_symbols[1]]['volume'].values + stock[stock_symbols[2]]['volume'].values +
                 stock[stock_symbols[3]]['volume'].values + stock[stock_symbols[4]]['volume'].values + stock[stock_symbols[5]]['volume'].values +
                 stock[stock_symbols[6]]['volume'].values + stock[stock_symbols[7]]['volume'].values + stock[stock_symbols[8]]['volume'].values +
                 stock[stock_symbols[9]]['volume'].values) / len(stock_symbols)
combined_volume

array([[ 747272. ],
       [ 605305.8],
       [ 643985. ],
       ...,
       [4714230. ],
       [5987070. ],
       [5748880.1]])

In [20]:
df = pd.DataFrame(combined_high)
df.head()

Unnamed: 0,0
0,82.885
1,83.95
2,85.11
3,84.75
4,83.62


In [21]:
df = df.rename(columns={0: 'high'})

df['low'] = combined_low
df['open'] = combined_open
df['close'] = combined_close
df['avg'] = combined_avg
df['volume'] = combined_volume

df.head()

Unnamed: 0,high,low,open,close,avg,volume
0,82.885,81.455,82.255,82.185,82.107,747272.0
1,83.95,82.445,82.795,83.725,83.28,605305.8
2,85.11,83.565,84.76,84.06,84.3326,643985.0
3,84.75,83.06,84.335,83.335,83.6911,727448.6
4,83.62,82.0,83.465,82.27,82.5046,722580.3


In [22]:
df = df.ewm(alpha=0.65).mean()
df.head()

Unnamed: 0,high,low,open,close,avg,volume
0,82.885,81.455,82.255,82.185,82.107,747272.0
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865


In [23]:
df['percentage_change'] = df['close'].pct_change() * 100
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change
0,82.885,81.455,82.255,82.185,82.107,747272.0,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651


In [24]:
for i in range(1, 6):
  df[f'previous{str(i)}'] = df['percentage_change'].shift(i)

df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,,
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,,
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,,
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,,


In [25]:
df['ema50'] = df['close'] / df['close'].ewm(50).mean()
df['ema21'] = df['close'] / df['close'].ewm(21).mean()
df['ema14'] = df['close'] / df['close'].ewm(14).mean()
df['ema5'] = df['close'] / df['close'].ewm(5).mean()

df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,,,1.0,1.0,1.0,1.0
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,,,1.006824,1.006731,1.006653,1.006262
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,,,1.008444,1.008268,1.00812,1.007388
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,,,1.003381,1.003206,1.003061,1.002375
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,,,0.995013,0.994947,0.994897,0.994735


In [26]:
def rsi(X, window=14):
  delta = X.diff(1)

  gains = delta.where(delta > 0, 0)
  losses = -delta.where(delta < 0, 0)

  avg_gains = gains.rolling(window=window, min_periods=1).mean()
  avg_losses = losses.rolling(window=window, min_periods=1).mean()

  rs = avg_gains / avg_losses
  rsi = 100 - (100 / (1 + rs))

  return rsi

In [27]:
df['rsi'] = rsi(df['close'])
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,,,1.0,1.0,1.0,1.0,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,,,1.006824,1.006731,1.006653,1.006262,100.0
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,,,1.008444,1.008268,1.00812,1.007388,100.0
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,,,1.003381,1.003206,1.003061,1.002375,83.542641
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,,,0.995013,0.994947,0.994897,0.994735,59.247881


In [28]:
def macd(X, short_window=12, long_window=29, signal=9):
  short_ema = X.ewm(span=short_window, adjust=False).mean()

  long_ema = X.ewm(span=long_window, adjust=False).mean()

  macd = short_ema - long_ema

  signal = macd.ewm(span=signal, adjust=False).mean()

  return signal

In [29]:
df['macd'] = macd(df['close'])
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,,,1.0,1.0,1.0,1.0,,0.0
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,,,1.006824,1.006731,1.006653,1.006262,100.0,0.01989
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,,,1.008444,1.008268,1.00812,1.007388,100.0,0.06
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,,,1.003381,1.003206,1.003061,1.002375,83.542641,0.105115
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,,,0.995013,0.994947,0.994897,0.994735,59.247881,0.13688


In [37]:
window = 6
df['roc'] = ((df['close'] - df['close'].shift(window)) / df['close'].shift(window)) * 100

df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,ema50,ema21,ema14,ema5,rsi,macd,obv,emv,mfi,roc
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,...,1.0,1.0,1.0,1.0,,0.0,0.0,,0.0,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,...,1.006824,1.006731,1.006653,1.006262,100.0,0.01989,642111.9,1.760866,46.482753,
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,...,1.008444,1.008268,1.00812,1.007388,100.0,0.06,1285496.0,2.265235,63.599917,
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,...,1.003381,1.003206,1.003061,1.002375,83.542641,0.105115,586637.4,0.028989,47.215621,
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,...,0.995013,0.994947,0.994897,0.994735,59.247881,0.13688,-127721.7,-1.618038,37.443147,


In [38]:
window = 16

df['high-low'] = df['high'] - df['low']
df['high-preclose'] = abs(df['high'] - df['close'].shift(1))
df['low-preclose'] = abs(df['low'] - df['close'].shift(1))
df['tr'] = df[[('high-low'), ('high-preclose'), ('low-preclose')]].max(axis=1)
df['atr'] = df['tr'].rolling(window=window).mean()

In [39]:
df = df.drop(['high-low', 'high-preclose', 'low-preclose', 'tr'], axis=1)

In [30]:
def obv(X):
  obv = pd.Series(index=X.index)
  obv.iloc[0] = 0

  for i in range(1, len(X)):
    if (X['close'].iloc[i] > X['close'].iloc[i-1]):
      obv.iloc[i] = obv.iloc[i-1] + X['volume'].iloc[i]
    elif (X['close'].iloc[i] < X['close'].iloc[i-1]):
      obv.iloc[i] = obv.iloc[i-1] - X['volume'].iloc[i]
    else:
      obv.iloc[i] = obv.iloc[i-1]

  return obv

In [31]:
df['obv'] = obv(df)
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,,,1.0,1.0,1.0,1.0,,0.0,0.0
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,,,1.006824,1.006731,1.006653,1.006262,100.0,0.01989,642111.9
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,,,1.008444,1.008268,1.00812,1.007388,100.0,0.06,1285496.0
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,,,1.003381,1.003206,1.003061,1.002375,83.542641,0.105115,586637.4
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,,,0.995013,0.994947,0.994897,0.994735,59.247881,0.13688,-127721.7


In [40]:
def cmf(X, window=16):
  money_flow_multiplier = ((X['close'] - X['low']) - (X['high'] - X['close']))/(X['high']-X['low'])

  money_flow_volume = money_flow_multiplier * X['volume']

  cmf = money_flow_volume.rolling(window=window).sum() / X['volume'].rolling(window=window).sum()

  return cmf

In [41]:
df['cmf'] = cmf(df)
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,ema14,ema5,rsi,macd,obv,emv,mfi,roc,atr,cmf
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,...,1.0,1.0,,0.0,0.0,,0.0,,,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,...,1.006653,1.006262,100.0,0.01989,642111.9,1.760866,46.482753,,,
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,...,1.00812,1.007388,100.0,0.06,1285496.0,2.265235,63.599917,,,
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,...,1.003061,1.002375,83.542641,0.105115,586637.4,0.028989,47.215621,,,
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,...,0.994897,0.994735,59.247881,0.13688,-127721.7,-1.618038,37.443147,,,


In [32]:
def emv(X):
  emv = pd.Series(index=X.index)
  emv.iloc[0] = np.nan

  for i in range(1, len(X)):
    dm = 0.5 * ((X['high'].iloc[i] + X['low'].iloc[i]) - (X['high'].iloc[i-1] + X['low'].iloc[i-1]))
    br = X['volume'].iloc[i] / (1000000 * (X['high'].iloc[i] - X['low'].iloc[i]))
    emv.iloc[i] = dm / br if br != 0 else 0

  return emv

In [33]:
df['emv'] = emv(df)

In [34]:
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv,emv
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,,,1.0,1.0,1.0,1.0,,0.0,0.0,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,,,1.006824,1.006731,1.006653,1.006262,100.0,0.01989,642111.9,1.760866
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,,,1.008444,1.008268,1.00812,1.007388,100.0,0.06,1285496.0,2.265235
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,,,1.003381,1.003206,1.003061,1.002375,83.542641,0.105115,586637.4,0.028989
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,,,0.995013,0.994947,0.994897,0.994735,59.247881,0.13688,-127721.7,-1.618038


In [42]:
window = 16

df['minimum_low'] = df['low'].rolling(window=window).min()
df['maximum_high'] = df['high'].rolling(window=window).max()
df['stoch'] = ((df['close'] - df['minimum_low']) / (df['maximum_high'] - df['minimum_low'])) * 100

In [43]:
df = df.drop(['minimum_low', 'maximum_high'], axis=1)
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,ema5,rsi,macd,obv,emv,mfi,roc,atr,cmf,stoch
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,...,1.0,,0.0,0.0,,0.0,,,,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,...,1.006262,100.0,0.01989,642111.9,1.760866,46.482753,,,,
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,...,1.007388,100.0,0.06,1285496.0,2.265235,63.599917,,,,
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,...,1.002375,83.542641,0.105115,586637.4,0.028989,47.215621,,,,
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,...,0.994735,59.247881,0.13688,-127721.7,-1.618038,37.443147,,,,


In [35]:
def mfi(X, window=14):
  combine_price = (X['high'] + X['low'] + X['close']) / 3

  raw_money_flow = combine_price * X['volume']

  flow_direction = (combine_price.diff() > 0).astype(int)

  positive_money_flow = flow_direction * raw_money_flow
  negative_money_flow = (1 - flow_direction) * raw_money_flow

  positive = positive_money_flow.rolling(window=window, min_periods=1).sum()
  negative = negative_money_flow.rolling(window=window, min_periods=1).sum()

  mf = positive / negative
  mfi = 100 - (100 / (1 + mf))

  return mfi

In [36]:
df['mfi'] = mfi(df)
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,previous5,ema50,ema21,ema14,ema5,rsi,macd,obv,emv,mfi
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,...,,1.0,1.0,1.0,1.0,,0.0,0.0,,0.0
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,...,,1.006824,1.006731,1.006653,1.006262,100.0,0.01989,642111.9,1.760866,46.482753
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,...,,1.008444,1.008268,1.00812,1.007388,100.0,0.06,1285496.0,2.265235,63.599917
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,...,,1.003381,1.003206,1.003061,1.002375,83.542641,0.105115,586637.4,0.028989,47.215621
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,...,,0.995013,0.994947,0.994897,0.994735,59.247881,0.13688,-127721.7,-1.618038,37.443147


In [44]:
window = 21

df['combine_price'] = (df['high'] + df['low'] + df['close']) / 3
df['sma_combine_price'] = df['combine_price'].rolling(window=window).mean()
df['mean_deviation'] = df['combine_price'].rolling(window).apply(lambda x: x.mad())
df['cci'] = (df['combine_price'] - df['sma_combine_price']) / (0.015 * df['mean_deviation'])

In [45]:
df = df.drop(['combine_price', 'sma_combine_price', 'mean_deviation'], axis=1)
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,rsi,macd,obv,emv,mfi,roc,atr,cmf,stoch,cci
0,82.885,81.455,82.255,82.185,82.107,747272.0,,,,,...,,0.0,0.0,,0.0,,,,,
1,83.673889,82.188333,82.655,83.325741,82.975889,642111.851852,1.388016,,,,...,100.0,0.01989,642111.9,1.760866,46.482753,,,,,
2,84.649177,83.123251,84.084542,83.824389,83.897255,643383.938879,0.598432,1.388016,,,...,100.0,0.06,1285496.0,2.265235,63.599917,,,,,
3,84.71571,83.081512,84.24982,83.50144,83.761213,698858.432731,-0.385269,0.598432,1.388016,,...,83.542641,0.105115,586637.4,0.028989,47.215621,,,,,
4,83.999738,82.374817,83.736993,82.696778,82.940102,714359.05865,-0.963651,-0.385269,0.598432,1.388016,...,59.247881,0.13688,-127721.7,-1.618038,37.443147,,,,,


In [46]:
df['volume'] = df['volume'] / df['volume'].ewm(5).mean()
df.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,rsi,macd,obv,emv,mfi,roc,atr,cmf,stoch,cci
0,82.885,81.455,82.255,82.185,82.107,1.0,,,,,...,,0.0,0.0,,0.0,,,,,
1,83.673889,82.188333,82.655,83.325741,82.975889,0.930716,1.388016,,,,...,100.0,0.01989,642111.9,1.760866,46.482753,,,,,
2,84.649177,83.123251,84.084542,83.824389,83.897255,0.958122,0.598432,1.388016,,,...,100.0,0.06,1285496.0,2.265235,63.599917,,,,,
3,84.71571,83.081512,84.24982,83.50144,83.761213,1.027264,-0.385269,0.598432,1.388016,,...,83.542641,0.105115,586637.4,0.028989,47.215621,,,,,
4,83.999738,82.374817,83.736993,82.696778,82.940102,1.035606,-0.963651,-0.385269,0.598432,1.388016,...,59.247881,0.13688,-127721.7,-1.618038,37.443147,,,,,


In [47]:
df = df.rename(columns={'percentage_change': 'today'})
df.replace(0, np.nan, inplace=True)
df = df.dropna()
df.head()

Unnamed: 0,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,obv,emv,mfi,roc,atr,cmf,stoch,cci
20,83.65832,82.418728,82.96783,83.123208,83.001338,1.103148,0.272851,0.411613,0.006285,-0.574878,...,46.452205,0.126914,4768241.0,0.417704,50.9216,0.332011,1.565383,0.076565,62.700835,27.843369
21,83.847912,82.562555,83.160991,83.329123,83.176553,1.143207,0.247723,0.272851,0.411613,0.006285,...,48.777489,0.132547,5835992.0,0.200685,52.854204,0.284046,1.522179,0.082589,69.289508,50.969611
22,84.281519,82.833894,83.358597,83.306943,83.497619,1.102787,-0.026617,0.247723,0.272851,0.411613,...,45.487265,0.142015,4783966.0,0.485016,54.614634,0.334349,1.498126,0.040306,68.579821,82.153863
23,84.127782,82.564863,83.294509,83.24393,83.310992,1.106205,-0.075639,-0.026617,0.247723,0.272851,...,65.064461,0.15204,3705421.0,-0.306317,52.384776,0.838154,1.510839,0.025584,66.563593,63.952689
24,84.031724,82.275702,83.060828,83.176376,83.117427,1.447191,-0.081152,-0.075639,-0.026617,0.247723,...,72.918482,0.160423,2153880.0,-0.217994,48.33721,0.749988,1.54068,0.022985,64.554787,46.214056


In [48]:
df['trend'] = (df['today'].iloc[:] > 0).astype(int)
df.head()

Unnamed: 0,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,macd,obv,emv,mfi,roc,atr,cmf,stoch,cci,trend
20,83.65832,82.418728,82.96783,83.123208,83.001338,1.103148,0.272851,0.411613,0.006285,-0.574878,...,0.126914,4768241.0,0.417704,50.9216,0.332011,1.565383,0.076565,62.700835,27.843369,1
21,83.847912,82.562555,83.160991,83.329123,83.176553,1.143207,0.247723,0.272851,0.411613,0.006285,...,0.132547,5835992.0,0.200685,52.854204,0.284046,1.522179,0.082589,69.289508,50.969611,1
22,84.281519,82.833894,83.358597,83.306943,83.497619,1.102787,-0.026617,0.247723,0.272851,0.411613,...,0.142015,4783966.0,0.485016,54.614634,0.334349,1.498126,0.040306,68.579821,82.153863,0
23,84.127782,82.564863,83.294509,83.24393,83.310992,1.106205,-0.075639,-0.026617,0.247723,0.272851,...,0.15204,3705421.0,-0.306317,52.384776,0.838154,1.510839,0.025584,66.563593,63.952689,0
24,84.031724,82.275702,83.060828,83.176376,83.117427,1.447191,-0.081152,-0.075639,-0.026617,0.247723,...,0.160423,2153880.0,-0.217994,48.33721,0.749988,1.54068,0.022985,64.554787,46.214056,0


In [49]:
df_stock = {}
df_stock = df[['volume', 'today', 'previous1', 'previous2', 'previous3', 'previous4', 'previous5',
          'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'obv', 'emv', 'mfi', 'roc', 'atr', 'cmf', 'stoch', 'cci', 'trend']]
df_stock

Unnamed: 0,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,...,macd,obv,emv,mfi,roc,atr,cmf,stoch,cci,trend
20,1.103148,0.272851,0.411613,0.006285,-0.574878,-0.076739,0.295670,1.002751,1.002948,1.003085,...,0.126914,4.768241e+06,0.417704,50.921600,0.332011,1.565383,0.076565,62.700835,27.843369,1
21,1.143207,0.247723,0.272851,0.411613,0.006285,-0.574878,-0.076739,1.004943,1.005045,1.005092,...,0.132547,5.835992e+06,0.200685,52.854204,0.284046,1.522179,0.082589,69.289508,50.969611,1
22,1.102787,-0.026617,0.247723,0.272851,0.411613,0.006285,-0.574878,1.004424,1.004445,1.004418,...,0.142015,4.783966e+06,0.485016,54.614634,0.334349,1.498126,0.040306,68.579821,82.153863,0
23,1.106205,-0.075639,-0.026617,0.247723,0.272851,0.411613,0.006285,1.003473,1.003436,1.003356,...,0.152040,3.705421e+06,-0.306317,52.384776,0.838154,1.510839,0.025584,66.563593,63.952689,0
24,1.447191,-0.081152,-0.075639,-0.026617,0.247723,0.272851,0.411613,1.002525,1.002448,1.002335,...,0.160423,2.153880e+06,-0.217994,48.337210,0.749988,1.540680,0.022985,64.554787,46.214056,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1738,0.871701,1.067019,-0.228837,0.098490,0.447074,-0.267450,-1.208497,0.921946,0.982766,0.995525,...,-0.395080,3.074456e+08,0.094754,57.626185,-0.106823,1.034749,-0.048672,43.672537,-14.611007,1
1739,1.028413,0.829263,1.067019,-0.228837,0.098490,0.447074,-0.267450,0.930876,0.991325,1.003528,...,-0.377402,3.129503e+08,0.101055,58.125553,1.953660,1.024553,-0.051005,59.117881,42.238120,1
1740,0.943041,0.220304,0.829263,1.067019,-0.228837,0.098490,0.447074,0.934156,0.993802,1.005354,...,-0.348065,3.179412e+08,0.058046,57.059636,2.452277,1.013875,-0.055124,63.255158,67.619603,1
1741,1.053903,0.256676,0.220304,0.829263,1.067019,-0.228837,0.098490,0.937720,0.996518,1.007402,...,-0.309535,3.235796e+08,0.008608,55.659520,2.258079,1.007903,-0.073406,68.086120,72.243002,1


## Logistic Regression - Prediction & Valuation Model

In [51]:
X = df_stock.loc[:, df_stock.columns != 'trend']
y = df_stock['trend']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [52]:
predictions = lr.predict(X_test_scaled)
predictions[:21]

array([1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1])

In [53]:
y_test.values[:21]

array([1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0])

In [54]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.72


## Random Forest - Prediction & Valuation Model

In [55]:
X = df_stock[['volume', 'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'obv', 'emv', 'mfi', 'roc', 'atr', 'cmf', 'stoch', 'cci']]
y = df_stock['trend']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [56]:
y_pred = rf.predict(X_test.values)
y_pred[:21]

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1])

In [57]:
y_test.values[:21]

array([0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1])

In [58]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.76


## Predict with new data

In [59]:
new_stock = {}
for i in range(len(stock_symbols)):
  data = dt.DataLoader(symbols=stock_symbols[i],
                        start="2024-01-01",
                        end="2024-03-01",
                        data_source="VND")
  new_stock[stock_symbols[i]] = data.download()

In [61]:
new_stock[stock_symbols[0]]

Attributes,high,low,open,close,avg,volume
Symbols,VIC,VIC,VIC,VIC,VIC,VIC
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2024-01-02,44.95,44.0,44.95,44.0,44.35,2324300.0
2024-01-03,44.15,43.5,43.5,44.15,43.73,2347100.0
2024-01-04,44.4,43.8,44.15,44.15,44.14,2380800.0
2024-01-05,44.2,43.9,44.15,44.1,44.03,1553600.0
2024-01-08,44.75,44.1,44.45,44.35,44.43,2577400.0
2024-01-09,44.4,43.9,44.3,43.9,44.08,1716800.0
2024-01-10,44.05,43.2,43.9,43.6,43.65,2718700.0
2024-01-11,44.0,43.4,43.6,43.65,43.77,1758100.0
2024-01-12,43.5,42.7,43.5,43.0,43.03,3009600.0
2024-01-15,43.5,43.05,43.5,43.05,43.21,1528300.0


In [62]:
for i in range(len(stock_symbols)):
  print(f"Length of {stock_symbols[i]} with index {i}: ", len(new_stock[stock_symbols[i]]))

Length of VIC with index 0:  39
Length of VCB with index 1:  39
Length of VNM with index 2:  39
Length of GAS with index 3:  39
Length of HVN with index 4:  39
Length of MWG with index 5:  39
Length of FPT with index 6:  39
Length of HPG with index 7:  39
Length of MSN with index 8:  39
Length of SAB with index 9:  39


In [64]:
combined_new_high = (new_stock[stock_symbols[0]]['high'].values + new_stock[stock_symbols[1]]['high'].values + new_stock[stock_symbols[2]]['high'].values +
                 new_stock[stock_symbols[3]]['high'].values + new_stock[stock_symbols[4]]['high'].values + new_stock[stock_symbols[5]]['high'].values +
                 new_stock[stock_symbols[6]]['high'].values + new_stock[stock_symbols[7]]['high'].values + new_stock[stock_symbols[8]]['high'].values +
                 new_stock[stock_symbols[9]]['high'].values) / len(stock_symbols)
combined_new_high[:5]

array([[58.775],
       [58.71 ],
       [59.3  ],
       [59.065],
       [59.24 ]])

In [65]:
combined_new_low = (new_stock[stock_symbols[0]]['low'].values + new_stock[stock_symbols[1]]['low'].values + new_stock[stock_symbols[2]]['low'].values +
                 new_stock[stock_symbols[3]]['low'].values + new_stock[stock_symbols[4]]['low'].values + new_stock[stock_symbols[5]]['low'].values +
                 new_stock[stock_symbols[6]]['low'].values + new_stock[stock_symbols[7]]['low'].values + new_stock[stock_symbols[8]]['low'].values +
                 new_stock[stock_symbols[9]]['low'].values) / len(stock_symbols)
combined_new_low[:5]

array([[57.69 ],
       [57.765],
       [58.295],
       [58.305],
       [58.425]])

In [66]:
combined_new_open = (new_stock[stock_symbols[0]]['open'].values + new_stock[stock_symbols[1]]['open'].values + new_stock[stock_symbols[2]]['open'].values +
                 new_stock[stock_symbols[3]]['open'].values + new_stock[stock_symbols[4]]['open'].values + new_stock[stock_symbols[5]]['open'].values +
                 new_stock[stock_symbols[6]]['open'].values + new_stock[stock_symbols[7]]['open'].values + new_stock[stock_symbols[8]]['open'].values +
                 new_stock[stock_symbols[9]]['open'].values) / len(stock_symbols)
combined_new_open[:5]

array([[58.38 ],
       [58.05 ],
       [58.535],
       [58.825],
       [58.935]])

In [67]:
combined_new_close = (new_stock[stock_symbols[0]]['close'].values + new_stock[stock_symbols[1]]['close'].values + new_stock[stock_symbols[2]]['close'].values +
                 new_stock[stock_symbols[3]]['close'].values + new_stock[stock_symbols[4]]['close'].values + new_stock[stock_symbols[5]]['close'].values +
                 new_stock[stock_symbols[6]]['close'].values + new_stock[stock_symbols[7]]['close'].values + new_stock[stock_symbols[8]]['close'].values +
                 new_stock[stock_symbols[9]]['close'].values) / len(stock_symbols)
combined_new_close[:5]

array([[58.135],
       [58.645],
       [58.745],
       [58.79 ],
       [58.575]])

In [68]:
combined_new_avg = (new_stock[stock_symbols[0]]['avg'].values + new_stock[stock_symbols[1]]['avg'].values + new_stock[stock_symbols[2]]['avg'].values +
                 new_stock[stock_symbols[3]]['avg'].values + new_stock[stock_symbols[4]]['avg'].values + new_stock[stock_symbols[5]]['avg'].values +
                 new_stock[stock_symbols[6]]['avg'].values + new_stock[stock_symbols[7]]['avg'].values + new_stock[stock_symbols[8]]['avg'].values +
                 new_stock[stock_symbols[9]]['avg'].values) / len(stock_symbols)
combined_new_avg[:5]

array([[58.192],
       [58.293],
       [58.806],
       [58.646],
       [58.766]])

In [69]:
combined_new_volume = (new_stock[stock_symbols[0]]['volume'].values + new_stock[stock_symbols[1]]['volume'].values + new_stock[stock_symbols[2]]['volume'].values +
                 new_stock[stock_symbols[3]]['volume'].values + new_stock[stock_symbols[4]]['volume'].values + new_stock[stock_symbols[5]]['volume'].values +
                 new_stock[stock_symbols[6]]['volume'].values + new_stock[stock_symbols[7]]['volume'].values + new_stock[stock_symbols[8]]['volume'].values +
                 new_stock[stock_symbols[9]]['volume'].values) / len(stock_symbols)
combined_new_volume[:5]

array([[4590397.6],
       [4504060. ],
       [6850260. ],
       [4142200. ],
       [5685750. ]])

In [70]:
df_new_stock = pd.DataFrame(combined_new_high)
df_new_stock.head()

Unnamed: 0,0
0,58.775
1,58.71
2,59.3
3,59.065
4,59.24


In [71]:
df_new_stock = df_new_stock.rename(columns={0: 'high'})

df_new_stock['low'] = combined_new_low
df_new_stock['open'] = combined_new_open
df_new_stock['close'] = combined_new_close
df_new_stock['avg'] = combined_new_avg
df_new_stock['volume'] = combined_new_volume

df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume
0,58.775,57.69,58.38,58.135,58.192,4590397.6
1,58.71,57.765,58.05,58.645,58.293,4504060.0
2,59.3,58.295,58.535,58.745,58.806,6850260.0
3,59.065,58.305,58.825,58.79,58.646,4142200.0
4,59.24,58.425,58.935,58.575,58.766,5685750.0


In [72]:
df_new_stock = df_new_stock.ewm(alpha=0.65).mean()
df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume
0,58.775,57.69,58.38,58.135,58.192,4590398.0
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0


In [73]:
df_new_stock['percentage_change'] = df_new_stock['close'].pct_change() * 100
df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change
0,58.775,57.69,58.38,58.135,58.192,4590398.0,
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922


In [74]:
for i in range(1, 6):
  df_new_stock[f'previous{str(i)}'] = df_new_stock['percentage_change'].shift(i)

df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5
0,58.775,57.69,58.38,58.135,58.192,4590398.0,,,,,,
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828,,,,,
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524,0.649828,,,,
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427,0.269524,0.649828,,,
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922,0.134427,0.269524,0.649828,,


In [75]:
df_new_stock['ema50'] = df_new_stock['close'] / df_new_stock['close'].ewm(50).mean()
df_new_stock['ema21'] = df_new_stock['close'] / df_new_stock['close'].ewm(21).mean()
df_new_stock['ema14'] = df_new_stock['close'] / df_new_stock['close'].ewm(14).mean()
df_new_stock['ema5'] = df_new_stock['close'] / df_new_stock['close'].ewm(5).mean()

df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,ema5
0,58.775,57.69,58.38,58.135,58.192,4590398.0,,,,,,,1.0,1.0,1.0,1.0
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828,,,,,,1.003206,1.003163,1.003127,1.002943
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524,0.649828,,,,,1.003893,1.003812,1.003744,1.003405
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427,0.269524,0.649828,,,,1.003888,1.003774,1.00368,1.003219
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922,0.134427,0.269524,0.649828,,,1.001537,1.001427,1.001337,1.000918


In [76]:
df_new_stock['rsi'] = rsi(df_new_stock['close'])
df_new_stock['macd'] = macd(df_new_stock['close'])
df_new_stock['obv'] = obv(df_new_stock)
df_new_stock['cmf'] = cmf(df_new_stock)
df_new_stock['emv'] = emv(df_new_stock)
df_new_stock['mfi'] = mfi(df_new_stock)

In [77]:
df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,ema50,ema21,ema14,ema5,rsi,macd,obv,cmf,emv,mfi
0,58.775,57.69,58.38,58.135,58.192,4590398.0,,,,,...,1.0,1.0,1.0,1.0,,0.0,0.0,,,0.0
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828,,,,...,1.003206,1.003163,1.003127,1.002943,100.0,0.006587,4526444.0,,0.000803,49.704344
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524,0.649828,,,...,1.003893,1.003812,1.003744,1.003405,100.0,0.019741,10631030.0,,0.06228,69.952331
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427,0.269524,0.649828,,...,1.003888,1.003774,1.00368,1.003219,100.0,0.037717,15440630.0,,0.007799,77.193109
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922,0.134427,0.269524,0.649828,...,1.001537,1.001427,1.001337,1.000918,84.356615,0.055772,10058530.0,,0.017055,82.039376


In [78]:
window = 6
df_new_stock['roc'] = ((df_new_stock['close'] - df_new_stock['close'].shift(window)) / df_new_stock['close'].shift(window)) * 100

df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,ema21,ema14,ema5,rsi,macd,obv,cmf,emv,mfi,roc
0,58.775,57.69,58.38,58.135,58.192,4590398.0,,,,,...,1.0,1.0,1.0,,0.0,0.0,,,0.0,
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828,,,,...,1.003163,1.003127,1.002943,100.0,0.006587,4526444.0,,0.000803,49.704344,
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524,0.649828,,,...,1.003812,1.003744,1.003405,100.0,0.019741,10631030.0,,0.06228,69.952331,
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427,0.269524,0.649828,,...,1.003774,1.00368,1.003219,100.0,0.037717,15440630.0,,0.007799,77.193109,
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922,0.134427,0.269524,0.649828,...,1.001427,1.001337,1.000918,84.356615,0.055772,10058530.0,,0.017055,82.039376,


In [79]:
window = 16

df_new_stock['high-low'] = df_new_stock['high'] - df_new_stock['low']
df_new_stock['high-preclose'] = abs(df_new_stock['high'] - df_new_stock['close'].shift(1))
df_new_stock['low-preclose'] = abs(df_new_stock['low'] - df_new_stock['close'].shift(1))
df_new_stock['tr'] = df_new_stock[[('high-low'), ('high-preclose'), ('low-preclose')]].max(axis=1)
df_new_stock['atr'] = df_new_stock['tr'].rolling(window=window).mean()

In [80]:
df_new_stock = df_new_stock.drop(['high-low', 'high-preclose', 'low-preclose', 'tr'], axis=1)

df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,ema14,ema5,rsi,macd,obv,cmf,emv,mfi,roc,atr
0,58.775,57.69,58.38,58.135,58.192,4590398.0,,,,,...,1.0,1.0,,0.0,0.0,,,0.0,,
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828,,,,...,1.003127,1.002943,100.0,0.006587,4526444.0,,0.000803,49.704344,,
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524,0.649828,,,...,1.003744,1.003405,100.0,0.019741,10631030.0,,0.06228,69.952331,,
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427,0.269524,0.649828,,...,1.00368,1.003219,100.0,0.037717,15440630.0,,0.007799,77.193109,,
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922,0.134427,0.269524,0.649828,...,1.001337,1.000918,84.356615,0.055772,10058530.0,,0.017055,82.039376,,


In [81]:
window = 16

df_new_stock['minimum_low'] = df_new_stock['low'].rolling(window=window).min()
df_new_stock['maximum_high'] = df_new_stock['high'].rolling(window=window).max()
df_new_stock['stoch'] = ((df_new_stock['close'] - df_new_stock['minimum_low']) / (df_new_stock['maximum_high'] - df_new_stock['minimum_low'])) * 100

In [82]:
df_new_stock = df_new_stock.drop(['minimum_low', 'maximum_high'], axis=1)
df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,ema5,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch
0,58.775,57.69,58.38,58.135,58.192,4590398.0,,,,,...,1.0,,0.0,0.0,,,0.0,,,
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828,,,,...,1.002943,100.0,0.006587,4526444.0,,0.000803,49.704344,,,
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524,0.649828,,,...,1.003405,100.0,0.019741,10631030.0,,0.06228,69.952331,,,
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427,0.269524,0.649828,,...,1.003219,100.0,0.037717,15440630.0,,0.007799,77.193109,,,
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922,0.134427,0.269524,0.649828,...,1.000918,84.356615,0.055772,10058530.0,,0.017055,82.039376,,,


In [83]:
window = 21

df_new_stock['combine_price'] = (df_new_stock['high'] + df_new_stock['low'] + df_new_stock['close']) / 3
df_new_stock['sma_combine_price'] = df_new_stock['combine_price'].rolling(window=window).mean()
df_new_stock['mean_deviation'] = df_new_stock['combine_price'].rolling(window).apply(lambda x: x.mad())
df_new_stock['cci'] = (df_new_stock['combine_price'] - df_new_stock['sma_combine_price']) / (0.015 * df_new_stock['mean_deviation'])

In [84]:
df_new_stock = df_new_stock.drop(['combine_price', 'sma_combine_price', 'mean_deviation'], axis=1)
df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci
0,58.775,57.69,58.38,58.135,58.192,4590398.0,,,,,...,,0.0,0.0,,,0.0,,,,
1,58.726852,57.745556,58.135556,58.512778,58.266815,4526444.0,0.649828,,,,...,100.0,0.006587,4526444.0,,0.000803,49.704344,,,,
2,59.116087,58.118693,58.406825,58.670484,58.632985,6104587.0,0.269524,0.649828,,,...,100.0,0.019741,10631030.0,,0.06228,69.952331,,,,
3,59.082374,58.241637,58.68278,58.749353,58.641574,4809603.0,0.134427,0.269524,0.649828,,...,100.0,0.037717,15440630.0,,0.007799,77.193109,,,,
4,59.185372,58.361452,58.847589,58.635425,58.722878,5382105.0,-0.193922,0.134427,0.269524,0.649828,...,84.356615,0.055772,10058530.0,,0.017055,82.039376,,,,


In [85]:
df_new_stock['volume'] = df_new_stock['volume'] / df_new_stock['volume'].ewm(5).mean()
df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,percentage_change,previous1,previous2,previous3,...,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci
0,58.775,57.69,58.38,58.135,58.192,1.0,,,,,...,,0.0,0.0,,,0.0,,,,
1,58.726852,57.745556,58.135556,58.512778,58.266815,0.993619,0.649828,,,,...,100.0,0.006587,4526444.0,,0.000803,49.704344,,,,
2,59.116087,58.118693,58.406825,58.670484,58.632985,1.181152,0.269524,0.649828,,,...,100.0,0.019741,10631030.0,,0.06228,69.952331,,,,
3,59.082374,58.241637,58.68278,58.749353,58.641574,0.951858,0.134427,0.269524,0.649828,,...,100.0,0.037717,15440630.0,,0.007799,77.193109,,,,
4,59.185372,58.361452,58.847589,58.635425,58.722878,1.046166,-0.193922,0.134427,0.269524,0.649828,...,84.356615,0.055772,10058530.0,,0.017055,82.039376,,,,


In [86]:
df_new_stock = df_new_stock.rename(columns={'percentage_change': 'today'})
df_new_stock.replace(0, np.nan, inplace=True)
df_new_stock = df_new_stock.dropna()
df_new_stock.head()

Unnamed: 0,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,rsi,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci
20,58.542464,57.871097,58.225648,58.19378,58.18362,0.896113,-0.028011,-0.064038,-0.214389,-0.340944,...,51.058511,0.090517,-84946.94,-0.137349,-0.016232,44.424334,-1.560265,0.8342,35.444452,-60.312614
21,58.527862,57.561634,58.247977,57.746073,58.004467,1.043712,-0.769338,-0.028011,-0.064038,-0.214389,...,42.941962,0.069902,-4985003.0,-0.147509,-0.031951,43.940832,-1.976856,0.845696,14.542379,-117.48586
22,58.617002,57.463072,57.768292,58.180876,58.129613,0.937985,0.752956,-0.769338,-0.028011,-0.064038,...,55.946389,0.051474,-636248.5,-0.109529,-0.00125,50.937147,-0.668154,0.854789,34.841994,-77.05636
23,58.888701,57.909575,58.237402,58.518306,58.427565,0.957534,0.579969,0.752956,-0.769338,-0.028011,...,59.149198,0.041131,3765186.0,-0.082901,0.079884,51.239443,0.249735,0.862848,50.59562,7.833178
24,59.110545,58.215351,58.544591,58.733907,58.674198,0.924709,0.368433,0.579969,0.752956,-0.769338,...,57.852822,0.040092,7951890.0,-0.059108,0.056407,50.890086,0.835268,0.85734,60.131143,68.044556


In [87]:
df_new_stock['trend'] = (df_new_stock['today'].iloc[:] > 0).astype(int)
df_new_stock

Unnamed: 0,high,low,open,close,avg,volume,today,previous1,previous2,previous3,...,macd,obv,cmf,emv,mfi,roc,atr,stoch,cci,trend
20,58.542464,57.871097,58.225648,58.19378,58.18362,0.896113,-0.028011,-0.064038,-0.214389,-0.340944,...,0.090517,-84946.94,-0.137349,-0.016232,44.424334,-1.560265,0.8342,35.444452,-60.312614,0
21,58.527862,57.561634,58.247977,57.746073,58.004467,1.043712,-0.769338,-0.028011,-0.064038,-0.214389,...,0.069902,-4985003.0,-0.147509,-0.031951,43.940832,-1.976856,0.845696,14.542379,-117.48586,0
22,58.617002,57.463072,57.768292,58.180876,58.129613,0.937985,0.752956,-0.769338,-0.028011,-0.064038,...,0.051474,-636248.5,-0.109529,-0.00125,50.937147,-0.668154,0.854789,34.841994,-77.05636,1
23,58.888701,57.909575,58.237402,58.518306,58.427565,0.957534,0.579969,0.752956,-0.769338,-0.028011,...,0.041131,3765186.0,-0.082901,0.079884,51.239443,0.249735,0.862848,50.59562,7.833178,1
24,59.110545,58.215351,58.544591,58.733907,58.674198,0.924709,0.368433,0.579969,0.752956,-0.769338,...,0.040092,7951890.0,-0.059108,0.056407,50.890086,0.835268,0.85734,60.131143,68.044556,1
25,59.240191,58.432873,58.840607,58.838618,58.784569,0.91483,0.178279,0.368433,0.579969,0.752956,...,0.046727,12023770.0,-0.047955,0.034416,50.230276,1.079766,0.857241,65.08564,101.326029,1
26,59.285567,58.580506,58.957212,58.979266,58.911599,0.833329,0.239041,0.178279,0.368433,0.579969,...,0.060232,15612220.0,-0.073763,0.018961,49.934407,1.349777,0.846847,71.740602,114.408419,1
27,59.515948,58.684177,59.098774,59.002493,59.03211,1.201855,0.039382,0.239041,0.178279,0.368433,...,0.077682,21006990.0,-0.098112,0.025752,50.800286,2.175767,0.844428,72.839617,121.396299,1
28,59.846832,58.821212,59.122321,59.468873,59.351838,1.115363,0.79044,0.039382,0.239041,0.178279,...,0.104718,26132500.0,-0.086334,0.046815,49.243286,2.213781,0.865518,84.144405,154.808103,1
29,60.836891,59.398924,59.709062,60.402355,60.190443,1.580469,1.5697,0.79044,0.039382,0.239041,...,0.152514,34354760.0,-0.056902,0.137092,59.939262,3.219589,0.894838,87.12036,248.476395,1


In [88]:
df_new_stock = df_new_stock[['volume', 'today', 'previous1', 'previous2', 'previous3', 'previous4', 'previous5',
          'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'obv', 'emv', 'mfi', 'roc', 'atr', 'cmf', 'stoch', 'cci', 'trend']]
df_new_stock

Unnamed: 0,volume,today,previous1,previous2,previous3,previous4,previous5,ema50,ema21,ema14,...,macd,obv,emv,mfi,roc,atr,cmf,stoch,cci,trend
20,0.896113,-0.028011,-0.064038,-0.214389,-0.340944,-0.57447,-0.347615,0.996134,0.996103,0.996093,...,0.090517,-84946.94,-0.016232,44.424334,-1.560265,0.8342,-0.137349,35.444452,-60.312614,0
21,1.043712,-0.769338,-0.028011,-0.064038,-0.214389,-0.340944,-0.57447,0.989103,0.989251,0.989407,...,0.069902,-4985003.0,-0.031951,43.940832,-1.976856,0.845696,-0.147509,14.542379,-117.48586,0
22,0.937985,0.752956,-0.769338,-0.028011,-0.064038,-0.214389,-0.340944,0.996735,0.996927,0.997119,...,0.051474,-636248.5,-0.00125,50.937147,-0.668154,0.854789,-0.109529,34.841994,-77.05636,1
23,0.957534,0.579969,0.752956,-0.769338,-0.028011,-0.064038,-0.214389,1.002385,1.002526,1.002663,...,0.041131,3765186.0,0.079884,51.239443,0.249735,0.862848,-0.082901,50.59562,7.833178,1
24,0.924709,0.368433,0.579969,0.752956,-0.769338,-0.028011,-0.064038,1.005771,1.005806,1.005838,...,0.040092,7951890.0,0.056407,50.890086,0.835268,0.85734,-0.059108,60.131143,68.044556,1
25,0.91483,0.178279,0.368433,0.579969,0.752956,-0.769338,-0.028011,1.007193,1.007103,1.007017,...,0.046727,12023770.0,0.034416,50.230276,1.079766,0.857241,-0.047955,65.08564,101.326029,1
26,0.833329,0.239041,0.178279,0.368433,0.579969,0.752956,-0.769338,1.009142,1.008901,1.008674,...,0.060232,15612220.0,0.018961,49.934407,1.349777,0.846847,-0.073763,71.740602,114.408419,1
27,1.201855,0.039382,0.239041,0.178279,0.368433,0.579969,0.752956,1.009096,1.008713,1.008358,...,0.077682,21006990.0,0.025752,50.800286,2.175767,0.844428,-0.098112,72.839617,121.396299,1
28,1.115363,0.79044,0.039382,0.239041,0.178279,0.368433,0.579969,1.016294,1.015646,1.015051,...,0.104718,26132500.0,0.046815,49.243286,2.213781,0.865518,-0.086334,84.144405,154.808103,1
29,1.580469,1.5697,0.79044,0.039382,0.239041,0.178279,0.368433,1.030791,1.029623,1.028552,...,0.152514,34354760.0,0.137092,59.939262,3.219589,0.894838,-0.056902,87.12036,248.476395,1


## Logistic Regression - Prediction & Valuation Model

In [93]:
X_train = df_stock.loc[:, df_stock.columns != 'trend']
y_train = df_stock['trend']
X_test = df_new_stock.loc[:, df_new_stock.columns != 'trend']
y_test = df_new_stock['trend']

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.transform(X_test.values)

lr = LogisticRegression(penalty='l2', C=0.01, random_state=42)

lr.fit(X_train_scaled, y_train.values)

In [94]:
predictions = lr.predict(X_test_scaled)
predictions

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [95]:
y_test.values

array([0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1])

In [96]:
accuracy = accuracy_score(y_test.values, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.79


## Random Forest - Prediction & Valuation Model

In [97]:
X_train = df_stock[['volume', 'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'obv', 'emv', 'mfi', 'roc', 'atr', 'cmf', 'stoch', 'cci']]
y_train = df_stock['trend']
X_test = df_new_stock[['volume', 'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'obv', 'emv', 'mfi', 'roc', 'atr', 'cmf', 'stoch', 'cci']]
y_test = df_new_stock['trend']

rf = RandomForestClassifier(n_estimators=110, random_state=21)
rf.fit(X_train.values, y_train.values)

In [98]:
y_pred = rf.predict(X_test.values)
y_pred

array([0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1])

In [99]:
y_test.values

array([0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1])

In [100]:
accuracy = accuracy_score(y_test.values, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.74


## THE END