In [None]:
# Import the modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mplfinance as mpf
import plotly.express as px
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics


In [None]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
file_path = Path("Resources/gld_price_data.csv")
df_gld = pd.read_csv(file_path)

# Review the DataFrame
df_gld.head()

In [None]:
# Separate the data into labels and features

# Separate the y variable, the labels
y = df_gld["GLD"]

# Separate the X variable, the features
x = df_gld.drop(columns=["Date", "GLD"], axis=1)

In [None]:
# Review the y variable Series
print(y)

In [None]:
# Review the X variable DataFrame
x.head()

In [None]:
x.info()

In [None]:
# Check the target values
y.value_counts()

In [None]:
# Split the data using train_test_split
# Assign a random_state of 2 to the function
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=2)

In [None]:
y

In [None]:
# Instantiate the Random Forest Regressor model
regressor = RandomForestRegressor(n_estimators=100)

In [None]:
# Fit the model using training data
regressor.fit(x_train,y_train)

In [None]:
# Make a prediction using the testing data
test_data_prediction = regressor.predict(x_test)

In [None]:
# Calculate the R-Squared error from the predicted value
error_score = metrics.r2_score(y_test, test_data_prediction)
print("R squared error : ", error_score)

In [None]:
# Convert Y_test values into a list
y_test = list(y_test)

In [None]:
# Plotting values of actual prices versus the predicted prices 
plt.figure(figsize=(10, 6))
plt.plot(y_test, color='blue', label='Actual Value')
plt.plot(test_data_prediction, color='green', label='Predicted Value')
plt.title('Actual Price vs Predicted Price')
plt.xlabel('Number of values')
plt.ylabel('Gold Price')
plt.legend()
plt.show()

In [None]:
# Data Visualization - Interactive Time Series using plotly
fig = px.line(df_gld, x=df_gld.index, y='GLD', title='GLD Price Time Series')
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='GLD Price')
fig.show()

In [None]:
# Data Visualization - Correlation Heatmap
correlation_matrix = df_gld.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()