In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

## Importing Data

In [4]:
gold_data=pd.read_csv('gold_price_data_2008_2024.csv')
gold_data.tail(10)

Unnamed: 0,Date,Gold Price
5657,08-05-2024,7118
5658,07-05-2024,7122
5659,06-05-2024,7124
5660,05-05-2024,7068
5661,04-05-2024,7068
5662,03-05-2024,7039
5663,02-05-2024,7069
5664,01-05-2024,7047
5665,02-06-2024,7205
5666,01-06-2024,7205


## Data cleaning

In [5]:
gold_data.shape

(5667, 2)

In [6]:
gold_data.dtypes

Date          object
Gold Price     int64
dtype: object

In [7]:
gold_data.isna().sum()

Date          0
Gold Price    0
dtype: int64

In [8]:
gold_data['Date'] = pd.to_datetime(gold_data['Date'], format='%d-%m-%Y')
gold_data['Dates'] = gold_data['Date'].dt.day
gold_data['Month'] = gold_data['Date'].dt.month
gold_data['Year'] = gold_data['Date'].dt.year
print(gold_data)

           Date  Gold Price  Dates  Month  Year
0    2008-08-31        1190     31      8  2008
1    2008-08-30        1188     30      8  2008
2    2008-08-29        1186     29      8  2008
3    2008-09-30        1350     30      9  2008
4    2008-09-29        1317     29      9  2008
...         ...         ...    ...    ...   ...
5662 2024-05-03        7039      3      5  2024
5663 2024-05-02        7069      2      5  2024
5664 2024-05-01        7047      1      5  2024
5665 2024-06-02        7205      2      6  2024
5666 2024-06-01        7205      1      6  2024

[5667 rows x 5 columns]


In [9]:
gold_data.dtypes

Date          datetime64[ns]
Gold Price             int64
Dates                  int32
Month                  int32
Year                   int32
dtype: object

## Train the Model

In [10]:
x=gold_data[['Year','Month','Dates']]
y=gold_data[['Gold Price']]

In [11]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
linear_model=LinearRegression()
linear_model.fit(x_train,y_train)

In [12]:
y_pred=linear_model.predict(x_test)

In [13]:
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)
mae=mean_absolute_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)

n=x_test.shape[0]
p=x_test.shape[1]
adj_r_square=1-(1-r2)*(n-1)/(n-p-1)


print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")
print(f"Adjusted R-squared: {adj_r_square}")

Mean Absolute Error (MAE): 426.58526437841425
Mean Squared Error (MSE): 282641.38550092146
Root Mean Squared Error (RMSE): 531.640278290614
R-squared (R²): 0.8398670776748802
Adjusted R-squared: 0.8394419460226896


# THE END