# Task 0: Import libraries and datasets

In [None]:
import pandas as pd

In [None]:
fpt_df = pd.read_csv('data/FPT.csv')
fpt_df.head()

In [None]:
msn_df = pd.read_csv('data/MSN.csv')
msn_df.head()

In [None]:
pnj_df = pd.read_csv('data/PNJ.csv')
pnj_df.head()

In [None]:
vic_df = pd.read_csv('data/VIC.csv')
vic_df.head()

# Task 1. Data preprocessing

In [None]:
fpt_df.describe()

In [None]:
fpt_df.info()

Check null values:

In [None]:
nan_values = fpt_df.isna()
nan_columns = nan_values.any()
nan_columns

So glad we don't have any null values in our dataset.

# Task 2. Data visualisation

In [None]:
import matplotlib.pyplot as plt
fig,ax = plt.subplots(figsize=(15,10))
ax.plot(fpt_df['Open'], label="FPT")
ax.plot(msn_df['Open'], label='MSN')
ax.plot(pnj_df['Open'], label='PNJ')
ax.plot(vic_df['Open'], label='VIC')
ax.set_xlabel('Date/Time')
ax.set_title('Open')
plt.legend()

In [None]:
import matplotlib.pyplot as plt
fig,ax = plt.subplots(figsize=(15,10))
ax.plot(fpt_df['Close'], label="FPT")
ax.plot(msn_df['Close'], label='MSN')
ax.plot(pnj_df['Close'], label='PNJ')
ax.plot(vic_df['Close'], label='VIC')
ax.set_xlabel('Date/Time')
ax.set_title('Close')
plt.legend()

In [None]:
import matplotlib.pyplot as plt
fig,ax = plt.subplots(figsize=(15,10))
ax.plot(fpt_df['Volume'], label="FPT")
ax.plot(msn_df['Volume'], label='MSN')
ax.plot(pnj_df['Volume'], label='PNJ')
ax.plot(vic_df['Volume'], label='VIC')
ax.set_xlabel('Date/Time')
ax.set_title('Volume')
plt.legend()

Based on the chart, we gonna use the MSN stock data to predict the stock price.

# Task 3. Predict stock price based on the MSN stock data.

In [None]:
msn_df

In [None]:
train = msn_df.iloc[:, 2:3].values
train

In [None]:
train.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(train)

In [None]:
scaled

In [None]:
import numpy as np

X_train = []
y_train = []

for i in range(60, 108283):
    X_train.append(scaled[i-60:i, 0])
    y_train.append(scaled[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [None]:
model = Sequential()
# 3 LSTM layers append and return sequence
model.add(LSTM(units=512, return_sequences=True, input_shape=(X_train.shape[1],1)))
model.add(Dropout(0.2))

model.add(LSTM(units=512, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=512, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=128, return_sequences=False))
model.add(Dropout(0.2))

# Make it go through the dense layer
model.add(Dense(units=512))

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=5, batch_size=42)

In [None]:
actual = msn_df.iloc[:, 2:3].values

In [None]:
# sourcery skip: avoid-builtin-shadow
dataset_total = pd.concat((train['Open'], pnj_df['Open'], vic_df['Open']), axis=0)
imput = dataset_total[len(dataset_total) - len(pnj_df) - 60:].values

imput = imput.reshape(-1,1)
input = scaler.transform(imput)

X_test = [input[i-60:i, 0] for i in range(60, 108283)]
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
predict = model.predict(X_test)
predict = scaler.inverse_transform(predict)
predict

In [None]:
plt.plot(actual, color='red', label='Actual')
plt.plot(predict, color='red', label='Actual')
plt.title('Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price')
plt.show()