# Finpros Online Testing PNJ

Author: Duong Huu Khoi\
Date: 24/02/2024\
Version: 5.0

## Import Libraries and Get Dataset 

In [None]:
# Import Libraries
import math
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential 
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [None]:
# Get the dataset
df = pd.read_csv('PNJ.csv')
df.head()

## Preparing Dataset

In [None]:
#Check dataframe shape and empty values
print('Dataframe shape:',df.shape)
print('Null values:',df.isnull().values.any())

In [None]:
#Set index to Date/Time
df = df.set_index('Date/Time')

In [None]:
df.index

In [None]:
#Cast the index to datetime type instead of object
df.index = pd.to_datetime(df.index)
df.index

In [None]:
#Plot the dataset based on Date/Time and Close amount
df['Close'].plot(style = '.', figsize =  (30,10), title = 'VIC Stock')
plt.show

As we can see, there are missing values on the graph but when we check there is none. We can assume these are out of trading hours and prices will be the same as the previous timestamp and the trading volumes will be 0.

In [None]:
#Fill missing date with null value
df_resample = df.resample('min').asfreq()
print('Dataframe resample shape:',df_resample.shape)

In [None]:
print('Null values:',df_resample.isnull().values.any())

In [None]:
#Fill out Open, Low, Close and with presvious timestamp
cols = ['Open', 'Low','High','Close','Ticker']
df_resample.loc[:,cols] = df_resample.loc[:,cols].ffill()

In [None]:
#Check empty rows
df_resample

In [None]:
#Fill Volume and Open Interest with 0s since there is no trade happening 
df_resample['Volume'] = df_resample['Volume'].fillna(0)
df_resample['Open Interest'] = df_resample['Open Interest'].fillna(0)

In [None]:
#Check NaN values
df_resample.isna().sum()

## Build Model

In [None]:
#Create a new datafrane with Close
data = df_resample.filter(['Close'])
#Convert the dataframe to a numpy array
dataset = data.values
#Get number of rows for train data
training_data_len = math.ceil(len(dataset) * .8)
training_data_len

In [None]:
#Scale the data
scaler = MinMaxScaler(feature_range = (0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data

In [None]:
#Create training data set
train_data = scaled_data[0:training_data_len , :]
#Split the data into x_train and y_train
x_train = []
y_train = []

for i in range ( 60 , len(train_data)):
    x_train.append(train_data[i-60:i,0])
    y_train.append(train_data[i,0])

In [None]:
#Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train),np.array(y_train)

In [None]:
#Reshape the x_train data
x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
x_train.shape

In [None]:
#Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences = True, input_shape = (x_train.shape[1],1)))
model.add(LSTM(50, return_sequences = False))
model.add(Dense(25))
model.add(Dense(1))

In [None]:
#Compile model
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [None]:
#Train the model
model.fit(x_train, y_train, batch_size = 200, epochs =2)

In [None]:
#Create testing data set
test_data = scaled_data[training_data_len - 60: , :]
#Create x_test and y_test
x_test = []
y_test = dataset[training_data_len: , :]
for i in range (60, len(test_data)):
    x_test.append(test_data[i-60:i ,0])

In [None]:
#Convert data to a numpy array
x_test = np.array(x_test)
x_test.shape

In [None]:
#Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1))
x_test.shape

In [None]:
#Get the models prediction
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

In [None]:
#Evaluate the model 
#Get RMSE
rmse = np.sqrt(np.mean(predictions - y_test)**2)
print('R2 Score: ',r2_score(y_test, predictions))
print('RMSE score:',rmse)

In [None]:
#Plot the data 
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
#Visualize the data
plt.figure(figsize = (30,10))
plt.title('Model')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.plot(train['Close'])
plt.plot(valid[['Close','Predictions']])
plt.legend(['Train' , 'Val', 'Predictions'], loc = 'lower right')
plt.show

In [None]:
def sliding_window(dataset,time):
    
    len_dataset=len(dataset)
    
    #Create a future input from test data with only 60 latest input
    future_inp  = dataset[(len_dataset-60):]
    
    #Create a temporary input from future input
    tmp_inp = list(future_inp)
    
    #Creating list of the last 60 data
    tmp_inp = tmp_inp[0].tolist()
    
    #Predicting next 30 days price suing the current data
    #It will predict in sliding window manner (algorithm) with stride 1
    lst_output=[]
    n_steps=60
    i=0
    while(i<time):
    
        if(len(tmp_inp)>60):
            future_inp = np.array(tmp_inp[1:])
            future_inp=future_inp.reshape(1,-1)
            future_inp = future_inp.reshape((1, n_steps, 1))
            yhat = model.predict(future_inp, verbose=0)
            tmp_inp.extend(yhat[0].tolist())
            tmp_inp = tmp_inp[1:]
            lst_output.extend(yhat.tolist())
            i=i+1
        else:
            future_inp = future_inp.reshape((1, n_steps,1))
            yhat = model.predict(future_inp, verbose=0)
            tmp_inp.extend(yhat[0].tolist())
            lst_output.extend(yhat.tolist())
            i=i+1
    lst_output = scaler.inverse_transform(lst_output)
    print("The change in stock price after", time,"minutes is:",lst_output[-1]-scaler.inverse_transform(dataset)[-1])

In [None]:
sliding_window(test_data,120)