# Importing the Libraries

In [239]:
import lightningchart as lc
import random

lc.set_license('my-license-key')

import numpy as np
import pandas as pd
import time

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
import tensorflow as tf
from keras import Model
from keras.layers import Input, Dense, Dropout
from keras.layers import LSTM

# Reading the Dataset

In [240]:
df = pd.read_csv('./gold_data.csv')

# Dataset Overview

In [241]:
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,12/30/2022,1826.20,1821.80,1832.40,1819.80,107.50K,0.01%
1,12/29/2022,1826.00,1812.30,1827.30,1811.20,105.99K,0.56%
2,12/28/2022,1815.80,1822.40,1822.80,1804.20,118.08K,-0.40%
3,12/27/2022,1823.10,1808.20,1841.90,1808.00,159.62K,0.74%
4,12/26/2022,1809.70,1805.80,1811.95,1805.55,,0.30%
...,...,...,...,...,...,...,...
2578,01/08/2013,1663.20,1651.50,1662.60,1648.80,0.13K,0.97%
2579,01/07/2013,1647.20,1657.30,1663.80,1645.30,0.09K,-0.16%
2580,01/04/2013,1649.90,1664.40,1664.40,1630.00,0.31K,-1.53%
2581,01/03/2013,1675.60,1688.00,1689.30,1664.30,0.19K,-0.85%


__As you can see, the data set includes daily gold price information including daily Open, High and Low prices and the final price of each day (Price) along with the volume of transactions and price changes in each day.__

__Dataset Basic Information:__

In [242]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2583 entries, 0 to 2582
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Date      2583 non-null   object
 1   Price     2583 non-null   object
 2   Open      2583 non-null   object
 3   High      2583 non-null   object
 4   Low       2583 non-null   object
 5   Vol.      2578 non-null   object
 6   Change %  2583 non-null   object
dtypes: object(7)
memory usage: 141.4+ KB


__All variables are stored as object.__

# Data Preparation

## Feature the Subset Selection
__Since we will not use Vol. and Change % features to predict Price, we will drop these two features:__

In [243]:
df.drop(['Vol.', 'Change %'], axis=1, inplace=True)

## Transforming the Data

__Date feature is stored as object in the data frame. To increase the speed of calculations, we convert it's data type to datetime and then sort this feature in ascending order:__

In [244]:
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values(by='Date', ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)

__The " , " sign is redundant in the dataset. First, we remove it from the entire dataset and then change the data type of the numerical variables to float:__

In [245]:
NumCols = df.columns.drop(['Date'])
df[NumCols] = df[NumCols].replace({',': ''}, regex=True)
df[NumCols] = df[NumCols].astype('float64')

__Result:__

In [246]:
df.head()

Unnamed: 0,Date,Price,Open,High,Low
0,2013-01-02,1689.9,1675.8,1695.0,1672.1
1,2013-01-03,1675.6,1688.0,1689.3,1664.3
2,2013-01-04,1649.9,1664.4,1664.4,1630.0
3,2013-01-07,1647.2,1657.3,1663.8,1645.3
4,2013-01-08,1663.2,1651.5,1662.6,1648.8


## Checking Duplicates
__There are no duplicate samples in Date feature:__

In [9]:
df.duplicated().sum()

0

## Checking Missing Values
__There are no missing values in the dataset:__

In [10]:
df.isnull().sum().sum()

0

# Visualizing the Data Using LightningChart

In [290]:
# Using current time as origin to manage large timestamp values better
time_origin = time.time() * 1000  # current time in milliseconds

# Extracting x and y values
x_values = (df['Date'].astype('int64') / 10**6 - time_origin).tolist()  # milliseconds since the epoch, adjusted by origin
y_values = df['Price'].values.tolist()

# Creating a chart
chart = lc.ChartXY(
    theme=lc.Themes.CyberSpace,
    title='Gold Price History Data'
)

# Adding and configuring the x-axis for datetime display
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime', time_origin=time_origin)

# Adding a line series
series = chart.add_line_series()
series.add(x=x_values, y=y_values)

# Customizing axes titles
x_axis.set_title('Date')
chart.get_default_y_axis().set_title('Scaled Price')

# Opening the chart
chart.open()

127.0.0.1 - - [30/Jul/2024 10:13:16] "GET / HTTP/1.1" 200 -


In [289]:
# Initialize a 2D chart
chart = lc.ChartXY(
    theme=lc.Themes.CyberSpace, 
    title='Gold Price Analysis'
    )

# Dispose the default x-axis and create a new one suited for datetime
chart.get_default_x_axis().dispose()
axis_x = chart.add_x_axis(axis_type='linear-highPrecision')
axis_x.set_tick_strategy('DateTime')
axis_x.set_interval(start=df['Date'].min().timestamp() * 1000, end=df['Date'].max().timestamp() * 1000)

# Adding legend to the chart
legend = chart.add_legend()

# Add series for each stock parameter
params = ['Price', 'Open', 'High', 'Low']
for param in params:
    series = chart.add_line_series()
    series.set_line_thickness(1)
    x_values = df['Date'].apply(lambda x: x.timestamp() * 1000).tolist()
    y_values = df[param].tolist()
    series.add(x_values, y_values)
    # Hypothetical method to set series name
    if hasattr(series, 'set_name'):
        series.set_name(param)
    legend.add(series)

# Open the chart
chart.open()


127.0.0.1 - - [30/Jul/2024 10:13:09] "GET / HTTP/1.1" 200 -


In [151]:
# Initialize chart
chart = lc.ChartXY(
    theme=lc.Themes.CyberSpace, 
    title='Open and Price Comparison'
    )

# Extract data
x_values = df['Date'].apply(lambda x: x.timestamp() * 1000).tolist()
open_prices = df['Open'].tolist()
close_prices = df['Price'].tolist()

# Prepare series data
series_data = [open_prices, close_prices]
legend_labels = ['Open', 'Price']

# Add legend
legend = chart.add_legend()

# Add series
for i, data in enumerate(series_data):
    series = chart.add_area_series()
    series.add(x_values, data)
    series.set_name(legend_labels[i])
    legend.add(series)

# Axis configuration
axis_x = chart.get_default_x_axis()
axis_x.set_tick_strategy('DateTime')
axis_x.set_interval(start=min(x_values), end=max(x_values))

axis_y = chart.get_default_y_axis()
max_y_value = max([max(data) for data in series_data if data])
axis_y.set_interval(0, max_y_value + 10)

# Display the chart
chart.open()

127.0.0.1 - - [26/Jul/2024 11:51:28] "GET / HTTP/1.1" 200 -


In [214]:
# High and Low prices comparison in 2022
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month_name()

# Filter data for the year 2022
df_2022 = df[df['Year'] == 2022]

# Group by Month and get max and min values for 2022
monthly_stats = df_2022.groupby('Month').agg({
    'High': 'max',
    'Low': 'min'
}).reset_index()


# Initialize the bar chart with the title including the year range
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.CyberSpace,
    title='High and Low Prices in 2022'
)

# Prepare data for the chart
months = monthly_stats['Month'].tolist()
high_prices = monthly_stats['High'].tolist()
low_prices = monthly_stats['Low'].tolist()

# Set grouped data
chart.set_data_grouped(
    months,
    [
        {'subCategory': 'High Price', 'values': high_prices},
        {'subCategory': 'Low Price', 'values': low_prices}
    ]
)

chart.set_sorting('descending')

# Adding legend for High Price and Low Price
legend = chart.add_legend()
legend.add(chart)

# Open the chart
chart.open()


127.0.0.1 - - [26/Jul/2024 13:24:36] "GET / HTTP/1.1" 200 -


In [255]:

df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month_name()

# Filter data for the years 2021 and 2022
df_2022 = df[df['Year'] == 2022]

monthly_stats_2022 = df_2022.groupby('Month').agg({
    'High': 'max',
    'Low': 'min'
}).reset_index()

# Define the correct order of months
months_order = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

# Add a column to the DataFrames to use for sorting
monthly_stats_2022['MonthOrder'] = monthly_stats_2022['Month'].apply(lambda x: months_order.index(x))

# Sort the DataFrames based on the new column
monthly_stats_2022.sort_values('MonthOrder', inplace=True)

# Prepare data for the chart
months = months_order  # Using the full ordered list of months
high_prices_2022 = monthly_stats_2022['High'].tolist()
low_prices_2022 = monthly_stats_2022['Low'].tolist()

# Initialize the stacked bar chart with the title including the year range
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.CyberSpace,
    title='High and Low Prices Comparison in 2022'
)

# Prepare the data for stacked bar chart
chart.set_data_stacked(
    months,
    [
        {'subCategory': '2022 High Price', 'values': high_prices_2022},
        {'subCategory': '2022 Low Price', 'values': low_prices_2022},
    ]
)

chart.set_sorting('descending')

# Adding legend for better readability
legend = chart.add_legend()
legend.add(chart)

# Open the chart
chart.open()


127.0.0.1 - - [26/Jul/2024 13:51:21] "GET / HTTP/1.1" 200 -


In [256]:

df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month_name()

# Filter data for the years 2021 and 2022
df_2021 = df[df['Year'] == 2021]
df_2022 = df[df['Year'] == 2022]

# Group by Month and get max and min values for 2021 and 2022
monthly_stats_2021 = df_2021.groupby('Month').agg({
    'High': 'max',
    'Low': 'min'
}).reset_index()

monthly_stats_2022 = df_2022.groupby('Month').agg({
    'High': 'max',
    'Low': 'min'
}).reset_index()

# Define the correct order of months
months_order = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

# Add a column to the DataFrames to use for sorting
monthly_stats_2021['MonthOrder'] = monthly_stats_2021['Month'].apply(lambda x: months_order.index(x))
monthly_stats_2022['MonthOrder'] = monthly_stats_2022['Month'].apply(lambda x: months_order.index(x))

# Sort the DataFrames based on the new column
monthly_stats_2021.sort_values('MonthOrder', inplace=True)
monthly_stats_2022.sort_values('MonthOrder', inplace=True)

# Prepare data for the chart
months = months_order  # Using the full ordered list of months
high_prices_2021 = monthly_stats_2021['High'].tolist()
low_prices_2021 = monthly_stats_2021['Low'].tolist()
high_prices_2022 = monthly_stats_2022['High'].tolist()
low_prices_2022 = monthly_stats_2022['Low'].tolist()

# Initialize the bar chart with the title including the year range
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.CyberSpace,
    title='High and Low Prices Comparison: 2021 vs 2022'
)

# Prepare the data for grouped bar chart
chart.set_data_grouped(
    months,
    [
        {'subCategory': '2021 High Price', 'values': high_prices_2021},
        {'subCategory': '2021 Low Price', 'values': low_prices_2021},
        {'subCategory': '2022 High Price', 'values': high_prices_2022},
        {'subCategory': '2022 Low Price', 'values': low_prices_2022},
    ]
)

chart.set_sorting('alphabetical')

# Adding legend for better readability
legend = chart.add_legend()
legend.add(chart)

# Open the chart
chart.open()


127.0.0.1 - - [26/Jul/2024 13:52:08] "GET / HTTP/1.1" 200 -


In [267]:

df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month_name()

# Filter data for the years 2013 to 2022
df_2013 = df[df['Year'] == 2013]
df_2014 = df[df['Year'] == 2014]
df_2015 = df[df['Year'] == 2015]
df_2016 = df[df['Year'] == 2016]
df_2017 = df[df['Year'] == 2017]
df_2018 = df[df['Year'] == 2018]
df_2019 = df[df['Year'] == 2019]
df_2020 = df[df['Year'] == 2020]
df_2021 = df[df['Year'] == 2021]
df_2022 = df[df['Year'] == 2022]

# Group by Month and get max and min values for each year
def get_monthly_stats(df_year):
    monthly_stats = df_year.groupby('Month').agg({
        'High': 'max',
        'Low': 'min'
    }).reset_index()
    # Ensure all months are present
    for month in months_order:
        if month not in monthly_stats['Month'].values:
            monthly_stats = monthly_stats.append({'Month': month, 'High': None, 'Low': None}, ignore_index=True)
    # Add MonthOrder for sorting
    monthly_stats['MonthOrder'] = monthly_stats['Month'].apply(lambda x: months_order.index(x))
    # Sort by MonthOrder
    monthly_stats.sort_values('MonthOrder', inplace=True)
    return monthly_stats

months_order = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

monthly_stats_2013 = get_monthly_stats(df_2013)
monthly_stats_2014 = get_monthly_stats(df_2014)
monthly_stats_2015 = get_monthly_stats(df_2015)
monthly_stats_2016 = get_monthly_stats(df_2016)
monthly_stats_2017 = get_monthly_stats(df_2017)
monthly_stats_2018 = get_monthly_stats(df_2018)
monthly_stats_2019 = get_monthly_stats(df_2019)
monthly_stats_2020 = get_monthly_stats(df_2020)
monthly_stats_2021 = get_monthly_stats(df_2021)
monthly_stats_2022 = get_monthly_stats(df_2022)

# Prepare data for the chart
months = months_order  # Using the full ordered list of months
high_prices_2013 = monthly_stats_2013['High'].tolist()
low_prices_2013 = monthly_stats_2013['Low'].tolist()
high_prices_2014 = monthly_stats_2014['High'].tolist()
low_prices_2014 = monthly_stats_2014['Low'].tolist()
high_prices_2015 = monthly_stats_2015['High'].tolist()
low_prices_2015 = monthly_stats_2015['Low'].tolist()
high_prices_2016 = monthly_stats_2016['High'].tolist()
low_prices_2016 = monthly_stats_2016['Low'].tolist()
high_prices_2017 = monthly_stats_2017['High'].tolist()
low_prices_2017 = monthly_stats_2017['Low'].tolist()
high_prices_2018 = monthly_stats_2018['High'].tolist()
low_prices_2018 = monthly_stats_2018['Low'].tolist()
high_prices_2019 = monthly_stats_2019['High'].tolist()
low_prices_2019 = monthly_stats_2019['Low'].tolist()
high_prices_2020 = monthly_stats_2020['High'].tolist()
low_prices_2020 = monthly_stats_2020['Low'].tolist()
high_prices_2021 = monthly_stats_2021['High'].tolist()
low_prices_2021 = monthly_stats_2021['Low'].tolist()
high_prices_2022 = monthly_stats_2022['High'].tolist()
low_prices_2022 = monthly_stats_2022['Low'].tolist()

# Handle NaN values if any
def handle_nan_values(prices):
    return [0 if pd.isna(x) else x for x in prices]

high_prices_2013 = handle_nan_values(high_prices_2013)
low_prices_2013 = handle_nan_values(low_prices_2013)
high_prices_2014 = handle_nan_values(high_prices_2014)
low_prices_2014 = handle_nan_values(low_prices_2014)
high_prices_2015 = handle_nan_values(high_prices_2015)
low_prices_2015 = handle_nan_values(low_prices_2015)
high_prices_2016 = handle_nan_values(high_prices_2016)
low_prices_2016 = handle_nan_values(low_prices_2016)
high_prices_2017 = handle_nan_values(high_prices_2017)
low_prices_2017 = handle_nan_values(low_prices_2017)
high_prices_2018 = handle_nan_values(high_prices_2018)
low_prices_2018 = handle_nan_values(low_prices_2018)
high_prices_2019 = handle_nan_values(high_prices_2019)
low_prices_2019 = handle_nan_values(low_prices_2019)
high_prices_2020 = handle_nan_values(high_prices_2020)
low_prices_2020 = handle_nan_values(low_prices_2020)
high_prices_2021 = handle_nan_values(high_prices_2021)
low_prices_2021 = handle_nan_values(low_prices_2021)
high_prices_2022 = handle_nan_values(high_prices_2022)
low_prices_2022 = handle_nan_values(low_prices_2022)

# Initialize the stacked bar chart with the title including the year range
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.CyberSpace,
    title='High and Low Prices Comparison: 2013 to 2022'
)

# Prepare the data for stacked bar chart
chart.set_data_stacked(
    months,
    [
        {'subCategory': '2013 High Price', 'values': high_prices_2013},
        {'subCategory': '2013 Low Price', 'values': low_prices_2013},
        {'subCategory': '2014 High Price', 'values': high_prices_2014},
        {'subCategory': '2014 Low Price', 'values': low_prices_2014},
        {'subCategory': '2015 High Price', 'values': high_prices_2015},
        {'subCategory': '2015 Low Price', 'values': low_prices_2015},
        {'subCategory': '2016 High Price', 'values': high_prices_2016},
        {'subCategory': '2016 Low Price', 'values': low_prices_2016},
        {'subCategory': '2017 High Price', 'values': high_prices_2017},
        {'subCategory': '2017 Low Price', 'values': low_prices_2017},
        {'subCategory': '2018 High Price', 'values': high_prices_2018},
        {'subCategory': '2018 Low Price', 'values': low_prices_2018},
        {'subCategory': '2019 High Price', 'values': high_prices_2019},
        {'subCategory': '2019 Low Price', 'values': low_prices_2019},
        {'subCategory': '2020 High Price', 'values': high_prices_2020},
        {'subCategory': '2020 Low Price', 'values': low_prices_2020},
        {'subCategory': '2021 High Price', 'values': high_prices_2021},
        {'subCategory': '2021 Low Price', 'values': low_prices_2021},
        {'subCategory': '2022 High Price', 'values': high_prices_2022},
        {'subCategory': '2022 Low Price', 'values': low_prices_2022},
    ]
)

chart.set_value_label_display_mode('hidden')
chart.set_sorting('disabled')

# Adding legend for better readability
legend = chart.add_legend()
legend.add(chart)

# Open the chart
chart.open()


127.0.0.1 - - [26/Jul/2024 14:08:14] "GET / HTTP/1.1" 200 -


In [285]:
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month_name()
df['Month_Number'] = df['Date'].dt.month

# Group by Year and Month, and get max and min values
monthly_stats = df.groupby(['Year', 'Month', 'Month_Number']).agg({
    'High': 'max',
    'Low': 'min'
}).reset_index().sort_values(by=['Year', 'Month_Number'])

# Initialize the bar chart
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.CyberSpace,
    title='High and Low Prices by Month'
)

# Prepare month labels with year information
month_labels = [f"{row['Month']} {row['Year']}" for index, row in monthly_stats.iterrows()]

# Set grouped data
chart.set_data_grouped(
    month_labels,
    [
        {'subCategory': 'High Price', 'values': monthly_stats['High'].tolist()},
        {'subCategory': 'Low Price', 'values': monthly_stats['Low'].tolist()}
    ]
)

chart.set_sorting('descending')

# Adding legend
legend = chart.add_legend()
legend.add(chart)

# Open the chart
chart.open()


127.0.0.1 - - [26/Jul/2024 14:18:36] "GET / HTTP/1.1" 200 -


# Splitting Data to Training & Test Sets
__Since we cannot train on future data in time series data, we should not divide the time series data randomly. In time series splitting, testing set is always later than training set. We consider the last year for testing and everything else for training:__

In [12]:
test_size = df[df.Date.dt.year==2022].shape[0]
test_size

260

__Gold Price Training and Test Sets Plot:__

In [288]:
# Assuming df is already preprocessed and sorted by date
test_size = df[df.Date.dt.year == 2022].shape[0]

# Calculate the timestamp origin for datetime conversion
time_origin = time.time() * 1000  # current time in milliseconds

# Convert dates to timestamps and adjust by the origin
x_values = (df['Date'].astype('int64') / 10**6 - time_origin).tolist()
y_values = df['Price'].values.tolist()

# Creating a chart
chart = lc.ChartXY(
    theme=lc.Themes.CyberSpace,
    title='Gold Price Training and Test Sets'
)

# Adding and configuring the x-axis for datetime display
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime', time_origin=time_origin)  # Configuring datetime ticks
x_axis.set_title('Date')
y_axis = chart.get_default_y_axis()
y_axis.set_title('Price')

# Add line series for training set
train_series = chart.add_line_series()
train_series.add(x=x_values[:-test_size], y=y_values[:-test_size])
train_series.set_name('Training Set')

# Add line series for test set
test_series = chart.add_line_series()
test_series.add(x=x_values[-test_size:], y=y_values[-test_size:])
test_series.set_name('Test Set')

# Add legend to the chart
legend = chart.add_legend()
legend.add(train_series)
legend.add(test_series)

# Open the chart
chart.open()


127.0.0.1 - - [30/Jul/2024 10:12:51] "GET / HTTP/1.1" 200 -


# Data Scaling
__Since we aim to predict Price only based on its historical data, we scale Price using MinMaxScaler to avoid intensive computations:__

In [35]:
scaler = MinMaxScaler()
scaler.fit(df.Price.values.reshape(-1,1))

# Restructure Data & Create Sliding Window
__The use of prior time steps to predict the next time step is called sliding window. In this way, time series data can be expressed as supervised learning. We can do this by using previous time steps as input variables and use the next time step as the output variable. The number of previous time steps is called the window width. Here we set window width to 60. Therefore, X_train and X_test will be nested lists containing lists of 60 time-stamp prices. y_train and y_test are also lists of gold prices containing the next day's gold price corresponds to each list in X_train and X_test respectively:__

In [36]:
window_size = 60

__Training Set:__

In [37]:
train_data = df.Price[:-test_size]
train_data = scaler.transform(train_data.values.reshape(-1,1))

In [38]:
X_train = []
y_train = []

for i in range(window_size, len(train_data)):
    X_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])

__Test Set:__

In [39]:
test_data = df.Price[-test_size-60:]
test_data = scaler.transform(test_data.values.reshape(-1,1))

In [40]:
X_test = []
y_test = []

for i in range(window_size, len(test_data)):
    X_test.append(test_data[i-60:i, 0])
    y_test.append(test_data[i, 0])

# Converting Data to Numpy Arrays

__Now X_train and X_test are nested lists (two-dimensional lists) and y_train is a one-dimensional list. We need to convert them to numpy arrays with a higher dimension, which is the data format accepted by TensorFlow when training the neural network:__

In [41]:
X_train = np.array(X_train)
X_test  = np.array(X_test)
y_train = np.array(y_train)
y_test  = np.array(y_test)

In [43]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test  = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
y_train = np.reshape(y_train, (-1,1))
y_test  = np.reshape(y_test, (-1,1))

In [45]:
print('X_train Shape: ', X_train.shape)
print('y_train Shape: ', y_train.shape)
print('X_test Shape:  ', X_test.shape)
print('y_test Shape:  ', y_test.shape)

X_train Shape:  (2263, 60, 1)
y_train Shape:  (2263, 1)
X_test Shape:   (260, 60, 1)
y_test Shape:   (260, 1)


# Creating an LSTM Network 
__We build an LSTM network, which is a type of Recurrent Neural Networks designed to solve vanishing gradient problem:__

__Model Definition:__

In [46]:
def define_model():
    input1 = Input(shape=(window_size,1))
    x = LSTM(units = 64, return_sequences=True)(input1)  
    x = Dropout(0.2)(x)
    x = LSTM(units = 64, return_sequences=True)(x)
    x = Dropout(0.2)(x)
    x = LSTM(units = 64)(x)
    x = Dropout(0.2)(x)
    x = Dense(32, activation='softmax')(x)
    dnn_output = Dense(1)(x)

    model = Model(inputs=input1, outputs=[dnn_output])
    model.compile(loss='mean_squared_error', optimizer='Nadam')
    model.summary()
    
    return model

__Model Training:__

In [47]:
model = define_model()
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1)

Epoch 1/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 64ms/step - loss: 0.0746 - val_loss: 0.0712
Epoch 2/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step - loss: 0.0127 - val_loss: 0.0311
Epoch 3/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step - loss: 0.0081 - val_loss: 0.0131
Epoch 4/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step - loss: 0.0047 - val_loss: 0.0054
Epoch 5/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step - loss: 0.0035 - val_loss: 0.0024
Epoch 6/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 57ms/step - loss: 0.0027 - val_loss: 0.0023
Epoch 7/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 58ms/step - loss: 0.0020 - val_loss: 0.0031
Epoch 8/150
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 57ms/step - loss: 0.0015 - val_loss: 0.0053
Epoch 9/150
[1m64/64[0m [32m━━━━━━━━

# Model Evaluation
__Next, we evaluate our time series forecast using MAPE (Mean Absolute Percentage Error) metric:__

In [48]:
result = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test) 

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 8.9724e-04
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step


In [49]:
MAPE = mean_absolute_percentage_error(y_test, y_pred)
Accuracy = 1 - MAPE

In [50]:
print("Test Loss:", result)
print("Test MAPE:", MAPE)
print("Test Accuracy:", Accuracy)

Test Loss: 0.0011404815595597029
Test MAPE: 0.03790873037787999
Test Accuracy: 0.96209126962212


# Visualizing the Final Result Using LightningChart

__Returning the actual and predicted Price values to their primary scale:__

In [52]:
y_test_true = scaler.inverse_transform(y_test)
y_test_pred = scaler.inverse_transform(y_pred)

__Investigating the closeness of the prices predicted by the model to the actual prices:__

In [287]:
from datetime import datetime

# Setup chart
chart = lc.ChartXY(
    theme=lc.Themes.CyberSpace,
    title='Model Performance on Gold Price Prediction'
)

# Create x-axis with DateTime formatting
time_origin = datetime.now().timestamp() * 1000  # Using current time as origin
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime', time_origin=time_origin)
x_axis.set_title('Date')

# Create y-axis
y_axis = chart.get_default_y_axis()
y_axis.set_title('Price')

# Convert dates to timestamps for plotting
date_timestamps = df['Date'].apply(lambda x: x.timestamp() * 1000 - time_origin).tolist()

# Inverse transform the data
train_prices = scaler.inverse_transform(train_data)
y_test_true_prices = scaler.inverse_transform(y_test)
y_test_pred_prices = scaler.inverse_transform(y_pred)

# Add line series for training data
train_series = chart.add_line_series()
train_series.add(x=date_timestamps[:-test_size], y=train_prices.flatten().tolist())
train_series.set_name('Training Data')

# Add line series for actual test data
test_true_series = chart.add_line_series()
test_true_series.add(x=date_timestamps[-test_size:], y=y_test_true_prices.flatten().tolist())
test_true_series.set_name('Actual Test Data')

# Add line series for predicted test data
test_pred_series = chart.add_line_series()
test_pred_series.add(x=date_timestamps[-test_size:], y=y_test_pred_prices.flatten().tolist())
test_pred_series.set_name('Predicted Test Data')

# Customize the appearance
chart.add_legend().add(train_series).add(test_true_series).add(test_pred_series)

# Open the chart
chart.open()


127.0.0.1 - - [30/Jul/2024 10:12:38] "GET / HTTP/1.1" 200 -


____
# Conclusion:
### As can be seen, the price predicted by the LSTM model follows the actual prices greatly! The value of Loss and Accuracy (1-MAPE) obtained on the test data also confirm the great performance of the model:

## 🏆 Loss: 0.001
## 🏆 Accuracy: 96%