In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from vnstock import *
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import yfinance as yf
import os
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import math

In [2]:
FI = ['GC=F','SI=F','CL=F','^GSPC','^DJI','^IXIC','^RUT','^FTSE','^N225','^NYA']
df = pd.DataFrame()
for symbol in FI:
    data = yf.download(symbol, start='2012-03-20', end='2023-09-06')
    df[symbol] = data['Adj Close']

column_mapping = {
    'GC=F': 'Gold',
    'SI=F': 'Silver',
    'CL=F': 'Crude Oil',
    '^GSPC': 'S&P 500',
    '^DJI': 'Dow Jones',
    '^IXIC': 'NASDAQ',
    '^RUT': 'Russell 2000',
    '^FTSE': 'FTSE 100',
    '^N225': 'Nikkei 225',
    '^NYA': 'NYSE'
}

df = df.rename(columns=column_mapping)
df.dropna()

returns_df = df.pct_change()*100
returns_df.columns = [col + '_return' for col in returns_df.columns]
returns_df = returns_df.dropna()

###

FI = ['VNINDEX','HNX','VN30','HNX30','UPCOM']
data = stock_historical_data('VNINDEX', '2012-03-19', '2023-09-05', "1D", "index")
d_new = data.set_index('time')
d_new['VNINDEX'] = data['close']
df1 = pd.DataFrame()
for symbol in FI:
    data = stock_historical_data(symbol, '2012-03-19', '2023-09-05', "1D", "index")
    data = data.set_index('time')
    df1[symbol] = data['close']
df1 = df1.dropna()

returns_df1 = df1.pct_change()*100
returns_df1.columns = [col + '_return' for col in returns_df1.columns]
returns_df1 = returns_df1.dropna()
returns_df1.head(42069)

###

lagreturn = pd.DataFrame()

for column in returns_df.columns:
    lagreturn[f'{column}_Lag1'] = returns_df[column].shift(1)
for column in returns_df.columns:
    lagreturn[f'{column}_Lag2'] = returns_df[column].shift(2)
lagreturn = lagreturn.dropna()

lagreturn1 = pd.DataFrame()
for column in returns_df1.columns:
    lagreturn1[f'{column}_Lag1'] = returns_df1[column].shift(1)
for column in returns_df1.columns:
    lagreturn1[f'{column}_Lag2'] = returns_df1[column].shift(2)
lagreturn1 = lagreturn1.dropna()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [3]:
def do_tre_vnstock(symbol, start_date, end_date, lags=10): 
    ts = stock_historical_data(symbol,start_date, end_date)
    ts = ts.dropna(subset=['close'])


    tsret = pd.DataFrame(index=ts.index)
    tsret['time']=ts['time']
    tsret["Today"] = ts["close"].pct_change()*100

    for i,x in enumerate(tsret["Today"]):
        if (abs(x) < 0.0001):
            tsret["Today"][i] = 0.0001

    for i in range(0, lags):
        tsret["Lag%s" % str(i+1)] = tsret["Today"].shift(i+1)

    tsret["Direction"] = np.sign(tsret["Today"])
    tsret = tsret.dropna()
    tsret = tsret.set_index('time')
    bpt = pd.merge(tsret, lagreturn, left_index=True, right_index=True)
    bpt1 = pd.merge(bpt, lagreturn1, left_index=True, right_index=True)
    bpt1.fillna(0, inplace=True)
    bpt1 = bpt1[(bpt1 != 0).all(axis=1)]
    return bpt1

In [4]:
file_path = r"C:\Users\Chi Bao\OneDrive\Desktop\Đồ án tốt nghiệp\Code\ANN\data_ticker.xlsx"
result_file_path = r"C:\Users\Chi Bao\OneDrive\Desktop\Đồ án tốt nghiệp\Code\ANN\Hiệu suất mô hình MLP thuộc ANN sau cải thiện.xlsx"

In [5]:
#MODEL
list_i = []
list_k = []
list_return_price = []
list_rmse, list_mse, list_mae = [], [], []
list_neurons = []
list_r2_train = []
list_r2_test = []

scaler = StandardScaler()

original_data = pd.read_excel(file_path)

if os.path.exists(result_file_path):
    result_df = pd.read_excel(result_file_path)
else:
    result_df = pd.DataFrame(columns=['Stock', 'Predict','R2_train','R2_test', 
                'RMSE','MSE','MAE','test size','neurons'])

try:
    for i in original_data['ticker']:
        try:
            data = do_tre_vnstock(i, '2012-03-19', '2023-09-05', lags=10)

            X = data.loc[:, ~data.columns.isin(['Today'])]
            y = data["Today"]            

            list_k1 = []
            list_score = []
            list_log_used = []

            log_fang = round(math.log(X.shape[1],2))
            log_yao = round(math.log(X.shape[1]))
            list_log = [log_fang, log_yao]
            
            for g in list_log:
                for k in np.arange(0.1, 0.5, 0.01):
                    model = MLPRegressor(hidden_layer_sizes=(g,), max_iter=100, random_state=12)                     
                    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=k, random_state=12, shuffle=False)
                    X_train = scaler.fit_transform(X_train)
                    X_test = scaler.transform(X_test)
                    model.fit(X_train, y_train)
                    yhat_test = model.predict(X_test)
                    score = model.score(X_test, y_test)
                    list_k1.append(k)
                    list_score  .append(score)
                    list_log_used.append(g)

            score_max = max(list_score)
            a = list_score.index(score_max)
            k = list_k1[a]
            g = list_log_used[a]            

            model = MLPRegressor(hidden_layer_sizes=(g,), max_iter=100, random_state=12) 
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=k, random_state=12, shuffle=False)
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            model.fit(X_train, y_train)
            yhat_test = model.predict(X_test)
            r2_train = model.score(X_train, y_train)
            r2_test = model.score(X_test, y_test)
            mse = mean_squared_error(y_test, yhat_test, squared=True)
            rmse = mean_squared_error(y_test, yhat_test, squared=False)
            mae = mean_absolute_error(y_test, yhat_test)

            X_predict = [X.iloc[-1].values.tolist()]
            X_predict = scaler.transform(X_predict)
            Y_predict = model.predict(X_predict)

            list_i.append(i)
            list_k.append(k)
            
            list_return_price.append(Y_predict)
            list_r2_train.append(r2_train)
            list_r2_test.append(r2_test)
            list_mae.append(mae)
            list_rmse.append(rmse)
            list_mse.append(mse)

            if g == log_fang:
                g = f'{g} (FANG)'
                list_neurons.append(g)
            if g == log_yao:
                g = f"{g} (YAO)"
                list_neurons.append(g)

            print('Stock:',i,f' Predict: {Y_predict.item():.2f}',f' R2_train: {r2_train:.2f}',f' R2_test: {r2_test:.2f}', 
                f' RMSE: {rmse:.2f}',f' MSE: {mse:.2f}', f' MAE: {mae:.2f}',f' test size: {k:.2f}',f' neurons: {g}')
            result_df = pd.concat([result_df, pd.DataFrame({
                'Stock': [i],
                'Predict': [Y_predict.item()],
                'R2_train': [r2_train],
                'R2_test': [r2_test],
                'RMSE': [rmse],
                'MSE': [mse],
                'MAE': [mae],
                'test size': [k],
                'neurons': [g]
            })], ignore_index=True)

            original_data = original_data[original_data['ticker'] != i]
            result_df.to_excel(result_file_path, index=False)         

        except Exception as e:
            print(f"Stock: {i} -Unpredictable!\n" f'{e}')
            result_df.to_excel(result_file_path, index=False)
            original_data.to_excel(file_path, index=False)
            continue     

except KeyboardInterrupt:
    print("Dừng vòng for. Kết quả lưu trong file:", result_file_path)
    result_df.to_excel(result_file_path, index=False)
    original_data.to_excel(file_path, index=False)

print("Đang tạm dừng!")

Stock: SSI  Predict: -0.81  R2_train: 0.38  R2_test: 0.37  RMSE: 1.67  MSE: 2.80  MAE: 1.17  test size: 0.19  neurons: 5 (FANG)
Stock: BCM  Predict: 0.02  R2_train: 0.34  R2_test: 0.38  RMSE: 1.12  MSE: 1.25  MAE: 0.82  test size: 0.10  neurons: 5 (FANG)
Stock: VHM  Predict: 1.80  R2_train: 0.39  R2_test: 0.40  RMSE: 1.57  MSE: 2.48  MAE: 1.08  test size: 0.14  neurons: 5 (FANG)
Stock: VIC  Predict: 1.48  R2_train: 0.38  R2_test: 0.23  RMSE: 2.03  MSE: 4.14  MAE: 1.29  test size: 0.25  neurons: 5 (FANG)
Stock: VRE  Predict: 0.11  R2_train: 0.42  R2_test: 0.51  RMSE: 1.10  MSE: 1.20  MAE: 0.85  test size: 0.12  neurons: 5 (FANG)
Stock: BVH  Predict: 1.30  R2_train: 0.44  R2_test: 0.48  RMSE: 0.88  MSE: 0.77  MAE: 0.66  test size: 0.10  neurons: 5 (FANG)
Stock: POW  Predict: 1.17  R2_train: 0.43  R2_test: 0.54  RMSE: 0.95  MSE: 0.90  MAE: 0.69  test size: 0.12  neurons: 5 (FANG)
Stock: GAS  Predict: 1.08  R2_train: 0.40  R2_test: 0.56  RMSE: 0.72  MSE: 0.52  MAE: 0.58  test size: 0.13  n