In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from vnstock import *
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import yfinance as yf
import os
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import math

In [2]:
data_ticker = pd.read_excel("C:\\Users\\Chi Bao\\OneDrive\\Desktop\\Đồ án tốt nghiệp\\data_ticker.xlsx")

FI = ['GC=F','SI=F','CL=F','^GSPC','^DJI','^IXIC','^RUT','^FTSE','^N225','^NYA']
df = pd.DataFrame()
for symbol in FI:
    data = yf.download(symbol, start='2012-03-20', end='2023-09-06')
    df[symbol] = data['Adj Close']

column_mapping = {
    'GC=F': 'Gold',
    'SI=F': 'Silver',
    'CL=F': 'Crude Oil',
    '^GSPC': 'S&P 500',
    '^DJI': 'Dow Jones',
    '^IXIC': 'NASDAQ',
    '^RUT': 'Russell 2000',
    '^FTSE': 'FTSE 100',
    '^N225': 'Nikkei 225',
    '^NYA': 'NYSE'
}

df = df.rename(columns=column_mapping)
df.dropna()

returns_df = df.pct_change()*100
returns_df.columns = [col + '_return' for col in returns_df.columns]
returns_df = returns_df.dropna()

###

FI = ['VNINDEX','HNX','VN30','HNX30','UPCOM']
data = stock_historical_data('VNINDEX', '2012-03-19', '2023-09-05', "1D", "index")
d_new = data.set_index('time')
d_new['VNINDEX'] = data['close']
df1 = pd.DataFrame()
for symbol in FI:
    data = stock_historical_data(symbol, '2012-03-19', '2023-09-05', "1D", "index")
    data = data.set_index('time')
    df1[symbol] = data['close']
df1 = df1.dropna()

returns_df1 = df1.pct_change()*100
returns_df1.columns = [col + '_return' for col in returns_df1.columns]
returns_df1 = returns_df1.dropna()
returns_df1.head(42069)

###

lagreturn = pd.DataFrame()

for column in returns_df.columns:
    lagreturn[f'{column}_Lag1'] = returns_df[column].shift(1)
for column in returns_df.columns:
    lagreturn[f'{column}_Lag2'] = returns_df[column].shift(2)
lagreturn = lagreturn.dropna()

lagreturn1 = pd.DataFrame()
for column in returns_df1.columns:
    lagreturn1[f'{column}_Lag1'] = returns_df1[column].shift(1)
for column in returns_df1.columns:
    lagreturn1[f'{column}_Lag2'] = returns_df1[column].shift(2)
lagreturn1 = lagreturn1.dropna()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [3]:
def do_tre_vnstock(symbol, start_date, end_date, lags=10): 
    ts = stock_historical_data(symbol,start_date, end_date)
    ts = ts.dropna(subset=['close'])


    tsret = pd.DataFrame(index=ts.index)
    tsret['time']=ts['time']
    tsret["Today"] = ts["close"].pct_change()*100

    for i,x in enumerate(tsret["Today"]):
        if (abs(x) < 0.0001):
            tsret["Today"][i] = 0.0001

    for i in range(0, lags):
        tsret["Lag%s" % str(i+1)] = tsret["Today"].shift(i+1)

    tsret["Direction"] = np.sign(tsret["Today"])
    tsret = tsret.dropna()
    tsret = tsret.set_index('time')
    bpt = pd.merge(tsret, lagreturn, left_index=True, right_index=True)
    bpt1 = pd.merge(bpt, lagreturn1, left_index=True, right_index=True)
    bpt1.fillna(0, inplace=True)
    bpt1 = bpt1[(bpt1 != 0).all(axis=1)]
    return bpt1

In [4]:
#MODEL
list_i = []
list_return_price = []
list_rmse = []
list_mae =[]
list_mse = []
list_r2_train = []
list_r2_test = []

scaler = StandardScaler()

for i in data_ticker['ticker']:
  try:
    data = do_tre_vnstock(i, '2012-03-19', '2023-09-05', lags=10)

    X = data.loc[:, ~data.columns.isin(['Today'])]
    y = data["Today"]

    g = round(math.log(X.shape[1],2)) #fang (2009)

    model = MLPRegressor(hidden_layer_sizes=(g,), max_iter=100, random_state=12) 
    X_train, X_test,y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=12, shuffle=False)
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    model.fit(X_train,y_train)
    yhat_test = model.predict(X_test)
    r2_train = model.score(X_train, y_train)
    r2_test = model.score(X_test, y_test)
    mse = mean_squared_error(y_test, yhat_test, squared=True)
    rmse = mean_squared_error(y_test, yhat_test, squared=False)
    mae = mean_absolute_error(y_test, yhat_test)

    X_predict = [X.iloc[-1].values.tolist()]
    Y_predict = model.predict(X_predict)

    list_i.append(i)
    list_return_price.append(Y_predict)
    list_r2_train.append(r2_train)
    list_r2_test.append(r2_test)
    list_rmse.append(rmse)
    list_mse.append(mse)
    list_mae.append(mae)

    print('Stock:',i,f' Predict: {Y_predict.item():.2f}',f' R2_train: {r2_train:.2f}',f' R2_test: {r2_test:.2f}',
          f' RMSE: {mse:.2f}',f' RMSE: {mse:.2f}', f'MAE: {mae:.2f}')
  except Exception as e:
    print(f"Stock: {i} -Unpredictable!\n" f'{e}')
    continue

Stock: SSI  Predict: -0.88  R2_train: 0.38  R2_test: 0.34  RMSE: 3.09  RMSE: 3.09 MAE: 1.22
Stock: BCM  Predict: -0.29  R2_train: 0.34  R2_test: 0.16  RMSE: 0.98  RMSE: 0.98 MAE: 0.77
Stock: VHM  Predict: 2.70  R2_train: 0.37  R2_test: 0.32  RMSE: 3.39  RMSE: 3.39 MAE: 1.29
Stock: VIC  Predict: 1.26  R2_train: 0.38  R2_test: 0.20  RMSE: 3.38  RMSE: 3.38 MAE: 1.10
Stock: VRE  Predict: 0.47  R2_train: 0.41  R2_test: 0.40  RMSE: 1.91  RMSE: 1.91 MAE: 1.02
Stock: BVH  Predict: 1.44  R2_train: 0.44  R2_test: 0.42  RMSE: 0.80  RMSE: 0.80 MAE: 0.68
Stock: POW  Predict: 1.55  R2_train: 0.43  R2_test: 0.41  RMSE: 1.63  RMSE: 1.63 MAE: 0.91
Stock: GAS  Predict: 0.98  R2_train: 0.40  R2_test: 0.53  RMSE: 0.54  RMSE: 0.54 MAE: 0.58
Stock: ACB  Predict: 1.38  R2_train: 0.40  R2_test: 0.25  RMSE: 1.24  RMSE: 1.24 MAE: 0.81
Stock: BID  Predict: -0.16  R2_train: 0.39  R2_test: 0.40  RMSE: 1.67  RMSE: 1.67 MAE: 0.85
Stock: CTG  Predict: -0.00  R2_train: 0.44  R2_test: 0.39  RMSE: 1.51  RMSE: 1.51 MAE: 

In [5]:
KQ = pd.DataFrame({'Stock': list_i, 'Predict': list_return_price, 'R2_train': list_r2_train,'R2_test': list_r2_test, 'RMSE': list_mse,'MSE': list_rmse, 'MAE': list_mae,})
KQ.to_excel("Kết quả 20% test size.xlsx")
KQ.head(42069)

Unnamed: 0,Stock,Predict,R2_train,R2_test,RMSE,MSE,MAE
0,SSI,[-0.8818198175755233],0.378382,0.342038,3.091158,1.758169,1.218988
1,BCM,[-0.2921164067129217],0.341859,0.162026,0.983463,0.991697,0.772789
2,VHM,[2.6993394624184033],0.374507,0.315008,3.394859,1.842514,1.292776
3,VIC,[1.2622379725297868],0.378302,0.196117,3.377935,1.837916,1.103340
4,VRE,[0.4739567551842395],0.409304,0.404943,1.911886,1.382710,1.016034
...,...,...,...,...,...,...,...
389,TDP,[-1.1738394269525918],0.294100,0.352686,3.846751,1.961314,1.435224
390,NO1,[-1.474238420918646],0.364571,0.152351,9.901272,3.146629,2.491319
391,L10,[-5.426578413895436],0.303282,0.152294,30.714453,5.542062,4.631149
392,REE,[0.2703102811522944],0.397717,0.381676,1.124088,1.060230,0.837564
