In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

In [17]:
df = pd.read_csv('MSN.csv')
df.head()

Unnamed: 0,Ticker,Date/Time,Open,High,Low,Close,Volume,Open Interest
0,MSN,12/25/2017 9:15,73.1,73.1,73.1,73.1,4210,0
1,MSN,12/25/2017 9:16,73.0,73.0,73.0,73.0,5000,0
2,MSN,12/25/2017 9:18,73.5,73.5,73.5,73.5,210,0
3,MSN,12/25/2017 9:20,73.2,73.5,73.1,73.1,2050,0
4,MSN,12/25/2017 9:21,73.0,73.0,73.0,73.0,1380,0


In [18]:
df['Date/Time'] = pd.to_datetime(df['Date/Time'])
df.sort_values('Date/Time', inplace=True)

In [20]:
df.head()

Unnamed: 0,Ticker,Date/Time,Open,High,Low,Close,Volume,Open Interest
0,MSN,2017-12-25 09:15:00,73.1,73.1,73.1,73.1,4210,0
1,MSN,2017-12-25 09:16:00,73.0,73.0,73.0,73.0,5000,0
2,MSN,2017-12-25 09:18:00,73.5,73.5,73.5,73.5,210,0
3,MSN,2017-12-25 09:20:00,73.2,73.5,73.1,73.1,2050,0
4,MSN,2017-12-25 09:21:00,73.0,73.0,73.0,73.0,1380,0


In [21]:
# dự đoán biến động giá cổ phiếu sau 1 phút
N = 1
df['Future Price'] = df['Close'].shift(-N)
df['Price Variation'] = df['Future Price'] - df['Close']
df.head()

Unnamed: 0,Ticker,Date/Time,Open,High,Low,Close,Volume,Open Interest,Future Price,Price Variation
0,MSN,2017-12-25 09:15:00,73.1,73.1,73.1,73.1,4210,0,73.0,-0.1
1,MSN,2017-12-25 09:16:00,73.0,73.0,73.0,73.0,5000,0,73.5,0.5
2,MSN,2017-12-25 09:18:00,73.5,73.5,73.5,73.5,210,0,73.1,-0.4
3,MSN,2017-12-25 09:20:00,73.2,73.5,73.1,73.1,2050,0,73.0,-0.1
4,MSN,2017-12-25 09:21:00,73.0,73.0,73.0,73.0,1380,0,73.1,0.1


In [22]:
df.dropna(inplace=True)

In [23]:
features = ['Open', 'High', 'Low', 'Close', 'Volume']

# Chia dữ liệu thành tập huấn luyện và tập kiểm tra
X = df[features]
y = df['Price Variation']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2023)
#chuẩn hóa dữ liệu
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [24]:
# tạo mô hình Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

In [25]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

0.04599983844868949

In [26]:
# tạo model neural network với keras

import tensorflow as tf
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=15, batch_size=32, verbose=1)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x156ca9a2f70>

In [27]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

0.05173488530457273

In [28]:
# sử dụng randomforest
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(X_train, y_train)

In [29]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

0.05261427770072671