In [1]:
import lightningchart as lc
import random
lc.set_license('my-license-key')

import numpy as np
import pandas as pd

import math
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM


In [2]:
# Import dataset
df_googl = pd.read_csv('./Alphabet Inc - Class A (GOOGL).csv')
df_googl.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2004-08-19,2.508,2.61,2.406,2.516,44659000
1,2004-08-20,2.533,2.735,2.52,2.716,22834300
2,2004-08-23,2.778,2.846,2.735,2.743,18256100
3,2004-08-24,2.79,2.799,2.597,2.63,15247300
4,2004-08-25,2.627,2.708,2.605,2.658,9188600


In [3]:
# Rename columns
df_googl.rename(columns={"Date":"date","Open":"open","High":"high","Low":"low","Close":"close"}, inplace= True)
df_googl.head()

Unnamed: 0,date,open,high,low,close,Volume
0,2004-08-19,2.508,2.61,2.406,2.516,44659000
1,2004-08-20,2.533,2.735,2.52,2.716,22834300
2,2004-08-23,2.778,2.846,2.735,2.743,18256100
3,2004-08-24,2.79,2.799,2.597,2.63,15247300
4,2004-08-25,2.627,2.708,2.605,2.658,9188600


In [4]:
# convert date field from string to Date format and make it index
df_googl['date'] = pd.to_datetime(df_googl.date)
df_googl.head()

Unnamed: 0,date,open,high,low,close,Volume
0,2004-08-19,2.508,2.61,2.406,2.516,44659000
1,2004-08-20,2.533,2.735,2.52,2.716,22834300
2,2004-08-23,2.778,2.846,2.735,2.743,18256100
3,2004-08-24,2.79,2.799,2.597,2.63,15247300
4,2004-08-25,2.627,2.708,2.605,2.658,9188600


In [5]:
# Sorting dataset by date format
df_googl.sort_values(by='date', inplace=True)
df_googl.head()

Unnamed: 0,date,open,high,low,close,Volume
0,2004-08-19,2.508,2.61,2.406,2.516,44659000
1,2004-08-20,2.533,2.735,2.52,2.716,22834300
2,2004-08-23,2.778,2.846,2.735,2.743,18256100
3,2004-08-24,2.79,2.799,2.597,2.63,15247300
4,2004-08-25,2.627,2.708,2.605,2.658,9188600


In [6]:
df_googl.shape

(4968, 6)

In [7]:
# Get the duration of dataset
# Setting specific start and end dates
specified_start_date = pd.to_datetime('2020-01-01')
specified_end_date = pd.to_datetime('2024-05-14')

# Filter the DataFrame to include only data within the specified date range
filtered_df = df_googl[(df_googl['date'] >= specified_start_date) & (df_googl['date'] <= specified_end_date)]

# Print the actual start and end date from the filtered DataFrame
actual_start_date = filtered_df['date'].iloc[0]
actual_end_date = filtered_df['date'].iloc[-1]

print("Actual starting date: ", actual_start_date)
print("Actual ending date: ", actual_end_date)
print("Duration: ", actual_end_date - actual_start_date)


Actual starting date:  2020-01-02 00:00:00
Actual ending date:  2024-05-14 00:00:00
Duration:  1594 days 00:00:00


In [8]:
# Monthvise comparision between open and close price
monthvise= filtered_df.groupby(filtered_df['date'].dt.strftime('%B'))[['open','close']].mean().sort_values(by='close')
monthvise.head()

Unnamed: 0_level_0,open,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
June,106.549814,106.650419
March,106.775658,106.912838
January,106.929069,107.117842
July,109.155,109.40641
February,110.76851,110.740146


In [9]:
# Group the data by year and month and calculate the average open and close prices
filtered_df.loc[:, 'year_month'] = filtered_df['date'].dt.to_period('M')
monthwise = filtered_df.groupby('year_month')[['open', 'close']].mean().reset_index()

# Extract year-month names, open prices, and close prices
months = monthwise['year_month'].dt.strftime('%Y-%m').tolist()
opens = monthwise['open'].tolist()
closes = monthwise['close'].tolist()

# Initialize the bar chart
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.White,
    title='Stacked Bar Chart: Monthwise Comparison of Open and Close Prices'
)

# Set data for the chart using the aggregated values
chart.set_data_stacked(
    months,
    [
        {'subCategory': 'Open Price', 'values': opens},
        {'subCategory': 'Close Price', 'values': closes}
    ]
)

chart.set_value_label_display_mode('hidden')  # Hides value labels
chart.add_legend().add(chart)

# Open the chart
chart.open()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.loc[:, 'year_month'] = filtered_df['date'].dt.to_period('M')


In [10]:

# Sample DataFrame assuming 'filtered_df' with 'date', 'open', and 'close' columns
filtered_df = pd.DataFrame({
    'date': pd.date_range(start='2020-01-01', periods=2000, freq='D'),
    'open': np.random.rand(2000) * 100,
    'close': np.random.rand(2000) * 100
})

# Define the date range
start_date = '2020-01-01'
end_date = '2024-05-14'

# Filter the DataFrame to include only data within the specified date range
filtered_df = filtered_df[(filtered_df['date'] >= start_date) & (filtered_df['date'] <= end_date)]

# Convert dates to timestamps for x-axis
filtered_df['Timestamp'] = filtered_df['date'].apply(lambda x: x.timestamp() * 1000)

# Initialize and configure the chart
chart = lc.ChartXY(
    theme=lc.Themes.White,
    title='Area Chart: Open and Close Prices'
)

# Define the x values (timestamps) and y values (open and close prices)
x_values = filtered_df['Timestamp'].tolist()
open_prices = filtered_df['open'].tolist()
close_prices = filtered_df['close'].tolist()

# Prepare series data for open and close prices
series_data = [open_prices, close_prices]

legend_labels = ['Open Prices', 'Close Prices']  # Labels for the legend

# Add the legend
legend = chart.add_legend()

for i, data in enumerate(series_data):
    series = chart.add_area_series()
    if i > 0:
        # Offset the subsequent series by the previous series
        data = [data[j] + series_data[i-1][j] for j in range(len(data))]
    series.add(x_values, data)
    series.set_name(legend_labels[i])  # Set the name for each series in the legend
    legend.add(series)  # Add series to the legend

# Configure the x-axis to display datetime
axis_x = chart.get_default_x_axis()
axis_x.set_tick_strategy('DateTime')

# Configure the y-axis to start from 0
max_y_value = max(max(open_prices), max(close_prices))  # Calculate the maximum y value from the data
axis_y = chart.get_default_y_axis()
axis_y.set_interval(0, max_y_value + 10)  # Setting minimum to 0 and slightly above the max value for padding

# Open the chart
chart.open()


127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [11]:

# Filter the DataFrame to include only data within the specified date range
filtered_df = df_googl[(df_googl['date'] >= start_date) & (df_googl['date'] <= end_date)]

# Group the data by year and month and calculate the average high and low prices
filtered_df.loc[:, 'year_month'] = filtered_df['date'].dt.to_period('M')
monthwise_high = filtered_df.groupby('year_month')['high'].mean().reset_index()
monthwise_low = filtered_df.groupby('year_month')['low'].mean().reset_index()

# Merge high and low price DataFrames
monthwise = pd.merge(monthwise_high, monthwise_low, on='year_month')
monthwise.columns = ['year_month', 'high', 'low']

# Extract year-month names, high prices, and low prices
months = monthwise['year_month'].dt.strftime('%Y-%m').tolist()
highs = monthwise['high'].tolist()
lows = monthwise['low'].tolist()

# Initialize the bar chart
chart = lc.BarChart(
    vertical=True,  # Vertical bars
    theme=lc.Themes.White,  # White theme
    title='Grouped Bar Chart: Monthwise High and Low Prices'  # Title of the chart
)

# Group the data for high and low prices
chart.set_data_grouped(
    months,
    [
        {'subCategory': 'High Price', 'values': highs},
        {'subCategory': 'Low Price', 'values': lows}
    ]
)

# Set sorting to alphabetical, which will sort by month names
chart.set_sorting('alphabetical')

# Add a legend to the chart to differentiate between high and low prices
chart.add_legend().add(chart)

# Open the chart to display it
chart.open()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.loc[:, 'year_month'] = filtered_df['date'].dt.to_period('M')


127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [12]:
# Line Series
# Assuming df_google is already set up and filtered
df_googl['date'] = pd.to_datetime(df_googl['date'])
specified_start_date = pd.to_datetime('2020-01-01')
specified_end_date = pd.to_datetime('2024-05-14')
filtered_df = df_googl[(df_googl['date'] >= specified_start_date) & (df_googl['date'] <= specified_end_date)]

# Initialize a 2D chart
chart = lc.ChartXY(
    theme=lc.Themes.Black,
    title='Line Chart: Stock Analysis Chart'
)

# Dispose the default x-axis and create a new one suited for datetime
chart.get_default_x_axis().dispose()
axis_x = chart.add_x_axis(axis_type='linear-highPrecision')
axis_x.set_tick_strategy('DateTime')

# Configure the x-axis to display dates
axis_x.set_interval(
    start=filtered_df['date'].min().timestamp() * 1000, 
    end=filtered_df['date'].max().timestamp() * 1000
)

# Adding legend to the chart
legend = chart.add_legend()

# Add series for each stock parameter
params = ['open', 'close', 'high', 'low']
for param in params:
    series = chart.add_line_series()
    series.set_line_thickness(2)
    x_values = filtered_df['date'].apply(lambda x: x.timestamp() * 1000).tolist()
    y_values = filtered_df[param].tolist()
    series.add(x_values, y_values)
    # Hypothetical method to set series name
    if hasattr(series, 'set_name'):
        series.set_name(param)
    legend.add(series)

# Open the chart
chart.open()

127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [13]:
# Point Line Series
import lightningchart as lc
import pandas as pd

# Assuming df_google is already set up and filtered
df_googl['date'] = pd.to_datetime(df_googl['date'])
specified_start_date = pd.to_datetime('2020-01-01')
specified_end_date = pd.to_datetime('2024-05-14')
filtered_df = df_googl[(df_googl['date'] >= specified_start_date) & (df_googl['date'] <= specified_end_date)]

# Initialize the chart
chart = lc.ChartXY(
    theme=lc.Themes.White,
    title='Point Line Series: Stock Analysis Chart'
)

# Dispose the default x-axis and create a high precision datetime axis
chart.get_default_x_axis().dispose()
axis_x = chart.add_x_axis(axis_type='linear-highPrecision')
axis_x.set_tick_strategy('DateTime')
axis_x.set_interval(start=filtered_df['date'].min().timestamp() * 1000, end=filtered_df['date'].max().timestamp() * 1000)

# Adding legend to the chart
legend = chart.add_legend()

# Create a list of series, each with a different point shape
shapes = ['circle', 'triangle', 'square', 'diamond']
params = ['open', 'close', 'high', 'low']
colors = [lc.Color(255, 0, 0), lc.Color(0, 255, 0), lc.Color(0, 0, 255), lc.Color(255, 255, 0)]

series_list = []
for shape, param, color in zip(shapes, params, colors):
    series = chart.add_point_line_series()
    series.set_point_shape(shape)
    series.set_point_size(10)
    series.set_point_color(color)
    series.set_line_thickness(2)
    # Assuming a method exists to set the name
    series.set_name(param)
    legend.add(series)
    series_list.append(series)

# Populate each series with data
for series, param in zip(series_list, params):
    x_values = filtered_df['date'].apply(lambda x: x.timestamp() * 1000).tolist()
    y_values = filtered_df[param].tolist()
    series.add(x=x_values, y=y_values)

# Open the chart to display it
chart.open()




127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [14]:
# 3D Line Chart
# Assuming df_google is your DataFrame and includes the 'date', 'open', 'close', 'high', and 'low' columns
df_googl['date'] = pd.to_datetime(df_googl['date'])

# Set the date range and filter the DataFrame
specified_start_date = pd.to_datetime('2020-01-01')
specified_end_date = pd.to_datetime('2024-05-14')
filtered_df = df_googl[(df_googl['date'] >= specified_start_date) & (df_googl['date'] <= specified_end_date)]

# Initialize the 3D chart
chart = lc.Chart3D(
    theme=lc.Themes.White,
    title='Stock Line Series 3D'
)

# Parameters to plot
params = ['open', 'close', 'high', 'low']
series_list = []

# Adding legend to the chart
legend = chart.add_legend()

for idx, param in enumerate(params):
    series = chart.add_line_series()
    series.set_line_thickness(2)
    series_list.append(series)
    # Assuming an API call to set a name or description if it exists:
    if hasattr(series, 'set_name'):
        series.set_name(param)
    legend.add(series)  # Adding each series to the legend

# Generate data for each series
for idx, param in enumerate(params):
    x_values = filtered_df['date'].apply(lambda x: x.timestamp() * 1000).tolist()  # Convert to milliseconds
    y_values = filtered_df[param].tolist()
    z_values = [idx] * len(x_values)  # Use index as Z-value to separate lines vertically
    for x, y, z in zip(x_values, y_values, z_values):
        series_list[idx].add(x, y, z)

# Configure the x-axis to handle datetime values correctly
try:
    axis_x = chart.get_default_x_axis()
    axis_x.set_tick_strategy('DateTime')
    axis_x.set_interval(
        start=filtered_df['date'].min().timestamp() * 1000, 
        end=filtered_df['date'].max().timestamp() * 1000
    )
except AttributeError:
    print("Custom tick labels are not directly supported in this context.")

# Open the chart to display it
chart.open()

127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [15]:
# Make separate dataframe with close price
closedf = filtered_df[['date','close']]
print("Shape of close dataframe:", closedf.shape)

Shape of close dataframe: (1099, 2)


In [16]:
# Line chart
# Assuming df_google is already set up and filtered
df_googl['date'] = pd.to_datetime(df_googl['date'])
specified_start_date = pd.to_datetime('2020-01-01')
specified_end_date = pd.to_datetime('2024-05-14')
filtered_df = df_googl[(df_googl['date'] >= specified_start_date) & (df_googl['date'] <= specified_end_date)]

# Convert dates to timestamps in milliseconds
filtered_df['timestamp'] = filtered_df['date'].apply(lambda x: x.timestamp() * 1000)

# Initialize a 2D chart with high precision datetime axis
chart = lc.ChartXY(
    theme=lc.Themes.White,
    title='Line Chart: Stock Analysis Chart'
)

# Dispose the default x-axis and create a new one suited for datetime
chart.get_default_x_axis().dispose()
axis_x = chart.add_x_axis(axis_type='linear-highPrecision')
axis_x.set_tick_strategy('DateTime')
axis_x.set_interval(
    start=filtered_df['timestamp'].min(), 
    end=filtered_df['timestamp'].max()
)

# Add a line series for 'close' prices
close_series = chart.add_line_series()
x_values = filtered_df['timestamp'].tolist()
y_values = filtered_df['close'].tolist()
close_series.add(x_values, y_values)
close_series.set_line_thickness(2)

# Name the series to correctly address the legend
close_series.set_name('Close Prices')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(close_series)  # Attach the series to the legend with the correct label

# Open the chart
chart.open()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['timestamp'] = filtered_df['date'].apply(lambda x: x.timestamp() * 1000)


127.0.0.1 - - [04/Jun/2024 13:26:33] "GET / HTTP/1.1" 200 -


In [17]:
# Normalizing / scaling close value between 0 to 1
# Assuming df_google is already set up and filtered
df_googl['date'] = pd.to_datetime(df_googl['date'])
specified_start_date = pd.to_datetime('2020-01-01')
specified_end_date = pd.to_datetime('2024-05-14')
filtered_df = df_googl[(df_googl['date'] >= specified_start_date) & (df_googl['date'] <= specified_end_date)]

# Extract 'close' column and create a copy for normalization
closedf = filtered_df[['date', 'close']].copy()
close_stock = closedf.copy()

# Remove the 'date' column for normalization
close_stock_values = close_stock['close'].values.reshape(-1, 1)

# Normalize/scale the close values between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
normalized_close_values = scaler.fit_transform(close_stock_values)

# Update the DataFrame with normalized values
normalized_close_df = pd.DataFrame(normalized_close_values, columns=['normalized_close'])
normalized_close_df['date'] = closedf['date'].values

# Print the shape of the normalized DataFrame
print(normalized_close_df.shape)

# Optionally, you can inspect the first few rows to verify
print(normalized_close_df.head())

(1099, 2)
   normalized_close       date
0          0.131890 2020-01-02
1          0.128888 2020-01-03
2          0.144109 2020-01-06
3          0.142977 2020-01-07
4          0.147137 2020-01-08


In [18]:
# 2D chart for Normalization
chart = lc.ChartXY(
    theme=lc.Themes.White,
    title='Normalized Close Prices'
)

# Dispose the default x-axis and create a new one suited for datetime
chart.get_default_x_axis().dispose()
axis_x = chart.add_x_axis(axis_type='linear-highPrecision')
axis_x.set_tick_strategy('DateTime')
axis_x.set_interval(
    start=normalized_close_df['date'].min().timestamp() * 1000, 
    end=normalized_close_df['date'].max().timestamp() * 1000
)

# Add a line series for normalized 'close' prices
normalized_series = chart.add_line_series()
x_values = normalized_close_df['date'].apply(lambda x: x.timestamp() * 1000).tolist()
y_values = normalized_close_df['normalized_close'].tolist()
normalized_series.add(x_values, y_values)
normalized_series.set_line_thickness(2)
normalized_series.set_name('Normalized Close Prices')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(normalized_series)

# Open the chart
chart.open()


127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [19]:
# Split the data into training and testing sets
training_size = int(len(normalized_close_df) * 0.65)
test_size = len(normalized_close_df) - training_size
train_data, test_data = normalized_close_df.iloc[0:training_size, :], normalized_close_df.iloc[training_size:len(normalized_close_df), :]

# Print the shapes of the training and testing datasets
print("train_data: ", train_data.shape)
print("test_data: ", test_data.shape)

# Optionally, you can inspect the first few rows to verify
print(train_data.head())
print(test_data.head())


train_data:  (714, 2)
test_data:  (385, 2)
   normalized_close       date
0          0.131890 2020-01-02
1          0.128888 2020-01-03
2          0.144109 2020-01-06
3          0.142977 2020-01-07
4          0.147137 2020-01-08
     normalized_close       date
714          0.316689 2022-11-01
715          0.287338 2022-11-02
716          0.257650 2022-11-03
717          0.284067 2022-11-04
718          0.300085 2022-11-07


In [20]:
# Function to create dataset matrix for time-series prediction
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

# Reshape the data into X=t,t+1,t+2,t+3 and Y=t+4
time_step = 15
X_train, y_train = create_dataset(train_data[['normalized_close']].values, time_step)
X_test, y_test = create_dataset(test_data[['normalized_close']].values, time_step)

# Print the shapes of the datasets
print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)


X_train:  (698, 15)
y_train:  (698,)
X_test:  (369, 15)
y_test (369,)


In [21]:
# Visualization of Training Data
# Initialize a new 2D chart for training data visualization
chart_train = lc.ChartXY(
    theme=lc.Themes.White,
    title='Training Data: Normalized Close Prices'
)

# Dispose the default x-axis and create a new one suited for datetime
chart_train.get_default_x_axis().dispose()
axis_x_train = chart_train.add_x_axis(axis_type='linear-highPrecision')
axis_x_train.set_tick_strategy('DateTime')
axis_x_train.set_interval(
    start=train_data['date'].min().timestamp() * 1000, 
    end=train_data['date'].max().timestamp() * 1000
)

# Add a line series for training 'close' prices
train_series = chart_train.add_line_series()
x_values_train = train_data['date'].apply(lambda x: x.timestamp() * 1000).tolist()
y_values_train = train_data['normalized_close'].tolist()
train_series.add(x_values_train, y_values_train)
train_series.set_line_thickness(2)
train_series.set_name('Training Close Prices')

# Add a legend to the chart
legend_train = chart_train.add_legend()
legend_train.add(train_series)

# Open the training data chart
chart_train.open()


127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [22]:
# Visualization of Testing Data
# Initialize a new 2D chart for testing data visualization
chart_test = lc.ChartXY(
    theme=lc.Themes.White,
    title='Testing Data: Normalized Close Prices'
)

# Dispose the default x-axis and create a new one suited for datetime
chart_test.get_default_x_axis().dispose()
axis_x_test = chart_test.add_x_axis(axis_type='linear-highPrecision')
axis_x_test.set_tick_strategy('DateTime')
axis_x_test.set_interval(
    start=test_data['date'].min().timestamp() * 1000, 
    end=test_data['date'].max().timestamp() * 1000
)

# Add a line series for testing 'close' prices
test_series = chart_test.add_line_series()
x_values_test = test_data['date'].apply(lambda x: x.timestamp() * 1000).tolist()
y_values_test = test_data['normalized_close'].tolist()
test_series.add(x_values_test, y_values_test)
test_series.set_line_thickness(2)
test_series.set_name('Testing Close Prices')

# Add a legend to the chart
legend_test = chart_test.add_legend()
legend_test.add(test_series)

# Open the testing data chart
chart_test.open()


127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


**LSTM Model**

In [23]:
# Preparing the Data
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

X_train:  (698, 15, 1)
X_test:  (369, 15, 1)


In [24]:

# Define the input shape based on your training data
input_shape = (X_train.shape[1], 1)  # Replace 1 with the number of features in your data

# Create the Sequential model
model = keras.Sequential()

# Use Input layer to define the input shape
model.add(keras.Input(shape=input_shape))

# Add LSTM layers with return_sequences for the first layer only
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50, return_sequences=False))

# Add Dense layers for prediction
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()


127.0.0.1 - - [04/Jun/2024 13:26:03] "GET / HTTP/1.1" 200 -


In [25]:
# Training the Model
model.fit(X_train, y_train, batch_size=64, epochs=100, validation_data=(X_test, y_test), verbose=2)

Epoch 1/100


11/11 - 3s - 294ms/step - loss: 0.0741 - val_loss: 0.0249
Epoch 2/100
11/11 - 0s - 17ms/step - loss: 0.0131 - val_loss: 0.0230
Epoch 3/100
11/11 - 0s - 17ms/step - loss: 0.0067 - val_loss: 0.0032
Epoch 4/100
11/11 - 0s - 19ms/step - loss: 0.0033 - val_loss: 0.0049
Epoch 5/100
11/11 - 0s - 17ms/step - loss: 0.0018 - val_loss: 0.0016
Epoch 6/100
11/11 - 0s - 17ms/step - loss: 0.0014 - val_loss: 0.0017
Epoch 7/100
11/11 - 0s - 20ms/step - loss: 0.0013 - val_loss: 0.0019
Epoch 8/100
11/11 - 0s - 17ms/step - loss: 0.0013 - val_loss: 0.0016
Epoch 9/100
11/11 - 0s - 18ms/step - loss: 0.0012 - val_loss: 0.0017
Epoch 10/100
11/11 - 0s - 18ms/step - loss: 0.0013 - val_loss: 0.0022
Epoch 11/100
11/11 - 0s - 19ms/step - loss: 0.0012 - val_loss: 0.0017
Epoch 12/100
11/11 - 0s - 19ms/step - loss: 0.0012 - val_loss: 0.0017
Epoch 13/100
11/11 - 0s - 17ms/step - loss: 0.0012 - val_loss: 0.0017
Epoch 14/100
11/11 - 0s - 18ms/step - loss: 0.0012 - val_loss: 0.0016
Epoch 15/100
11/11 - 0s - 19ms/step - lo

<keras.src.callbacks.history.History at 0x259dcaa9d00>

In [26]:
# Evaluating the Model
# Making predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverting predictions back to original scale
train_predict = scaler.inverse_transform(train_predict)
y_train_inv = scaler.inverse_transform(y_train.reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict)
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate RMSE, MSE, and MAE for training data
train_rmse = math.sqrt(mean_squared_error(y_train_inv, train_predict))
train_mse = mean_squared_error(y_train_inv, train_predict)
train_mae = mean_absolute_error(y_train_inv, train_predict)

# Calculate RMSE, MSE, and MAE for testing data
test_rmse = math.sqrt(mean_squared_error(y_test_inv, test_predict))
test_mse = mean_squared_error(y_test_inv, test_predict)
test_mae = mean_absolute_error(y_test_inv, test_predict)

# Print training and testing metrics
print("----Training Data Metrics----")
print("Train RMSE: ", train_rmse)
print("Train MSE: ", train_mse)
print("Train MAE: ", train_mae)
print("-------------------------------")

print("----Testing Data Metrics----")
print("Test RMSE: ", test_rmse)
print("Test MSE: ", test_mse)
print("Test MAE: ", test_mae)

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
----Training Data Metrics----
Train RMSE:  2.9984303439480646
Train MSE:  8.990584527508508
Train MAE:  2.2894742145702276
-------------------------------
----Testing Data Metrics----
Test RMSE:  3.516342319495535
Test MSE:  12.36466330787524
Test MAE:  2.714201904958503


In [27]:
# R2 score for regression
train_r2 = r2_score(y_train_inv, train_predict)
test_r2 = r2_score(y_test_inv, test_predict)

# Display the metrics
print("Train R2 Score: ", train_r2)
print("Test R2 Score: ", test_r2)


Train R2 Score:  0.987583614648427
Test R2 Score:  0.9721360902179305


In [28]:
# Comparison of Actual vs Predicted Close Prices
chart = lc.ChartXY(title='Comparison of Actual vs Predicted Close Prices')

# Dispose the default x-axis and create a high precision datetime axis
chart.get_default_x_axis().dispose()
axis_x = chart.add_x_axis(axis_type='linear-highPrecision')
axis_x.set_tick_strategy('DateTime')

# Convert datetime to timestamps for plotting
date_timestamps = [x.timestamp() * 1000 for x in filtered_df['date']]

# Convert predictions, ensuring they are flat and adjusted for visualization
trainPredictPlot_adjusted = [float(x[0]) if not np.isnan(x[0]) else np.nan for x in train_predict]
testPredictPlot_adjusted = [float(x[0]) if not np.isnan(x[0]) else np.nan for x in test_predict]

# Plot actual prices
series_actual = chart.add_line_series()
series_actual.add(x=date_timestamps, y=filtered_df['close'].tolist())
series_actual.set_name('Actual Prices')  # Assuming set_name() or similar method is available

# Plot train predicted prices if valid data exists
if not np.all(np.isnan(trainPredictPlot_adjusted)):
    series_train_predicted = chart.add_line_series()
    series_train_predicted.add(x=date_timestamps[:len(trainPredictPlot_adjusted)], y=trainPredictPlot_adjusted)
    series_train_predicted.set_name('Train Predictions')  # Assuming set_name() or similar method is available

# Plot test predicted prices if valid data exists
if not np.all(np.isnan(testPredictPlot_adjusted)):
    series_test_predicted = chart.add_line_series()
    series_test_predicted.add(x=date_timestamps[-len(testPredictPlot_adjusted):], y=testPredictPlot_adjusted)
    series_test_predicted.set_name('Test Predictions')  # Assuming set_name() or similar method is available

# Add a legend to the chart
legend = chart.add_legend()
legend.add(series_actual)  # Attach the actual series to the legend
legend.add(series_train_predicted)  # Attach the train prediction series to the legend
legend.add(series_test_predicted)  # Attach the test prediction series to the legend

# Open the chart
chart.open()


127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [29]:

filtered_df = df_googl[(df_googl['date'] >= specified_start_date) & (df_googl['date'] <= specified_end_date)]

# Normalize/scale the close values between 0 and 1
close_stock_values = filtered_df['close'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
normalized_close_values = scaler.fit_transform(close_stock_values)

# Prepare the input data for prediction
time_step = 15  # Assuming time_step from previous context
test_data = normalized_close_values[-time_step:]
x_input = test_data.reshape(1, -1)
temp_input = list(x_input)
temp_input = temp_input[0].tolist()

# Predict next 10 days
lst_output = []
n_steps = time_step
pred_days = 10

for i in range(pred_days):
    if len(temp_input) > time_step:
        x_input = np.array(temp_input[1:])
        x_input = x_input.reshape(1, -1)
        x_input = x_input.reshape((1, n_steps, 1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        temp_input = temp_input[1:]
        lst_output.extend(yhat.tolist())
    else:
        x_input = x_input.reshape((1, n_steps, 1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        lst_output.extend(yhat.tolist())

# Invert the predictions back to original scale
predicted_values = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))

# Create a DataFrame to display the results
future_dates = pd.date_range(start=filtered_df['date'].iloc[-1], periods=pred_days + 1, inclusive='right')
prediction_df = pd.DataFrame({'date': future_dates, 'predicted_close': predicted_values.flatten()})

# Display the prediction DataFrame
print(prediction_df)

# Prepare data for LC chart
actual_dates = filtered_df['date'].tolist()
actual_close = filtered_df['close'].tolist()
predicted_dates = prediction_df['date'].tolist()
predicted_close = prediction_df['predicted_close'].tolist()

# Initialize LightningChart and set the license key
chart = lc.ChartXY(title='Actual vs Predicted Close Prices')

# Dispose the default x-axis and create a high precision datetime axis
chart.get_default_x_axis().dispose()
axis_x = chart.add_x_axis(axis_type='linear-highPrecision')
axis_x.set_tick_strategy('DateTime')

# Convert datetime to timestamps for plotting
actual_date_timestamps = [x.timestamp() * 1000 for x in actual_dates]
predicted_date_timestamps = [x.timestamp() * 1000 for x in predicted_dates]

# Plot actual prices
series_actual = chart.add_line_series()
series_actual.add(x=actual_date_timestamps, y=actual_close)
series_actual.set_name('Actual Prices')

# Plot predicted prices
series_predicted = chart.add_line_series()
series_predicted.add(x=predicted_date_timestamps, y=predicted_close)
series_predicted.set_name('Predicted Prices')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(series_actual)
legend.add(series_predicted)

# Open the chart
chart.open()


        date  predicted_close
0 2024-05-15       167.756357
1 2024-05-16       167.388284
2 2024-05-17       166.830364
3 2024-05-18       166.305530
4 2024-05-19       165.699770
5 2024-05-20       165.047876
6 2024-05-21       164.388690
7 2024-05-22       163.739560
8 2024-05-23       163.090843
9 2024-05-24       162.448594


127.0.0.1 - - [04/Jun/2024 13:37:29] "GET / HTTP/1.1" 200 -


In [30]:
# Final Prediction
# Convert predictions, ensuring they are flat and adjusted for visualization
trainPredictPlot_adjusted = [float(x[0]) if not np.isnan(x[0]) else np.nan for x in train_predict]
testPredictPlot_adjusted = [float(x[0]) if not np.isnan(x[0]) else np.nan for x in test_predict]

# Plot actual prices
series_actual = chart.add_line_series()
series_actual.add(x=date_timestamps, y=filtered_df['close'].tolist())
series_actual.set_name('Actual Prices')  # Assuming set_name() or similar method is available

# Plot train predicted prices if valid data exists
if not np.all(np.isnan(trainPredictPlot_adjusted)):
    series_train_predicted = chart.add_line_series()
    series_train_predicted.add(x=date_timestamps[:len(trainPredictPlot_adjusted)], y=trainPredictPlot_adjusted)
    series_train_predicted.set_name('Train Predictions')  # Assuming set_name() or similar method is available

# Plot test predicted prices if valid data exists
if not np.all(np.isnan(testPredictPlot_adjusted)):
    series_test_predicted = chart.add_line_series()
    series_test_predicted.add(x=date_timestamps[-len(testPredictPlot_adjusted):], y=testPredictPlot_adjusted)
    series_test_predicted.set_name('Test Predictions')  # Assuming set_name() or similar method is available

# Plot future predicted prices
series_future_predicted = chart.add_line_series()
series_future_predicted.add(x=predicted_date_timestamps, y=predicted_close)
series_future_predicted.set_name('Future Predictions')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(series_actual)  # Attach the actual series to the legend
legend.add(series_train_predicted)  # Attach the train prediction series to the legend
legend.add(series_test_predicted)  # Attach the test prediction series to the legend
legend.add(series_future_predicted)

# Add text boxes to display metrics
textbox1 = chart.add_textbox()
textbox1.set_text(f'Train RMSE: {train_rmse:.2f}')
textbox1.set_position(0.05, 0.95)

textbox2 = chart.add_textbox()
textbox2.set_text(f'Train MSE: {train_mse:.2f}')
textbox2.set_position(0.05, 0.90)

textbox3 = chart.add_textbox()
textbox3.set_text(f'Train MAE: {train_mae:.2f}')
textbox3.set_position(0.05, 0.85)

textbox4 = chart.add_textbox()
textbox4.set_text(f'Test RMSE: {test_rmse:.2f}')
textbox4.set_position(0.05, 0.80)

textbox5 = chart.add_textbox()
textbox5.set_text(f'Test MSE: {test_mse:.2f}')
textbox5.set_position(0.05, 0.75)

textbox6 = chart.add_textbox()
textbox6.set_text(f'Test MAE: {test_mae:.2f}')
textbox6.set_position(0.05, 0.70)

textbox7 = chart.add_textbox()
textbox7.set_text(f'Train R²: {train_r2:.2f}')
textbox7.set_position(0.05, 0.65)

textbox8 = chart.add_textbox()
textbox8.set_text(f'Test R²: {test_r2:.2f}')
textbox8.set_position(0.05, 0.60)

# Open the chart
chart.open()

127.0.0.1 - - [04/Jun/2024 13:26:34] "GET / HTTP/1.1" 200 -
