In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

In [2]:
df = pd.read_csv('PNJ.csv')
df.head()

Unnamed: 0,Ticker,Date/Time,Open,High,Low,Close,Volume,Open Interest
0,PNJ,2/28/2018 9:15,78.14,78.99,78.14,78.99,270,0
1,PNJ,2/28/2018 9:16,78.94,78.94,78.94,78.94,10,0
2,PNJ,2/28/2018 9:19,78.14,78.14,78.14,78.14,283,0
3,PNJ,2/28/2018 9:20,78.14,78.14,78.14,78.14,480,0
4,PNJ,2/28/2018 9:21,78.14,78.14,78.14,78.14,146,0


In [3]:
df['Date/Time'] = pd.to_datetime(df['Date/Time'])
df.sort_values('Date/Time', inplace=True)

In [4]:
# dự đoán biến động giá cổ phiếu sau 1 phút
N = 1
df['Future Price'] = df['Close'].shift(-N)
df['Price Variation'] = df['Future Price'] - df['Close']
df.head()

Unnamed: 0,Ticker,Date/Time,Open,High,Low,Close,Volume,Open Interest,Future Price,Price Variation
0,PNJ,2018-02-28 09:15:00,78.14,78.99,78.14,78.99,270,0,78.94,-0.05
1,PNJ,2018-02-28 09:16:00,78.94,78.94,78.94,78.94,10,0,78.14,-0.8
2,PNJ,2018-02-28 09:19:00,78.14,78.14,78.14,78.14,283,0,78.14,0.0
3,PNJ,2018-02-28 09:20:00,78.14,78.14,78.14,78.14,480,0,78.14,0.0
4,PNJ,2018-02-28 09:21:00,78.14,78.14,78.14,78.14,146,0,78.09,-0.05


In [5]:
df.dropna(inplace=True)

In [6]:
features = ['Open', 'High', 'Low', 'Close', 'Volume']

# Chia dữ liệu thành tập huấn luyện và tập kiểm tra
X = df[features]
y = df['Price Variation']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2023)
#chuẩn hóa dữ liệu
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# tạo mô hình Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

In [8]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

0.37307784193232923

In [9]:
# tạo model neural network với keras

import tensorflow as tf
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=15, batch_size=32, verbose=1)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x1f5ed3a2700>

In [10]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

0.37323232519684063

In [11]:
# sử dụng randomforest
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(X_train, y_train)

In [12]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

0.39987995999925324