In [5]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

gold_data_india = pd.read_csv('Daily_Gold_Price_on_World.csv\Daily_Gold_Price_on_World.csv')
gold_data_india.fillna(method='ffill', inplace=True)
gold_data_india['Date'] = pd.to_datetime(gold_data_india['Date'])
gold_data_india.set_index('Date', inplace=True)

gold_data_india['MA50'] = gold_data_india['US dollar (USD)'].rolling(window=50).mean()
gold_data_india['MA200'] = gold_data_india['US dollar (USD)'].rolling(window=200).mean()
gold_data_india.dropna(inplace=True)

features_india = gold_data_india[['MA50', 'MA200']]
target_india = gold_data_india['US dollar (USD)']

X_train_india, X_test_india, y_train_india, y_test_india = train_test_split(features_india, target_india, test_size=0.2, random_state=42)

model_india = LinearRegression()
model_india.fit(X_train_india, y_train_india)

y_pred_india = model_india.predict(X_test_india)

mse_india = mean_squared_error(y_test_india, y_pred_india)
r2_india = r2_score(y_test_india, y_pred_india)
print(f'Mean Squared Error for India: {mse_india}')
print(f'R^2 Score for India: {r2_india}')

fig = px.line(gold_data_india, x=gold_data_india.index, y='US dollar (USD)', title='Gold Prices in India Over Time')
fig.add_scatter(x=y_test_india.index, y=y_test_india, mode='lines', name='Actual Prices (INR)')
fig.add_scatter(x=y_test_india.index, y=y_pred_india, mode='lines', name='Predicted Prices (INR)')
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Gold Price (INR)')
fig.show()

fig_ma = go.Figure()
fig_ma.add_trace(go.Scatter(x=gold_data_india.index, y=gold_data_india['MA50'], mode='lines', name='MA50 (INR)'))
fig_ma.add_trace(go.Scatter(x=gold_data_india.index, y=gold_data_india['MA200'], mode='lines', name='MA200 (INR)'))
fig_ma.update_layout(title='50-day and 200-day Moving Averages of Gold Prices in India',
                     xaxis_title='Date',
                     yaxis_title='Gold Price (INR)')
fig_ma.show()

residuals = y_test_india - y_pred_india
fig_res = px.scatter(x=y_pred_india, y=residuals, labels={'x': 'Predicted Prices (INR)', 'y': 'Residuals (INR)'},
                     title='Residual Plot of Predicted Prices in India')
fig_res.update_layout(showlegend=False)
fig_res.show()

future_dates = pd.date_range(start=gold_data_india.index[-1], periods=30, freq='D')
future_features = pd.DataFrame(index=future_dates, columns=['MA50', 'MA200'])
future_features['MA50'] = gold_data_india['US dollar (USD)'].rolling(window=50).mean()[-1]
future_features['MA200'] = gold_data_india['US dollar (USD)'].rolling(window=200).mean()[-1]

future_predictions = model_india.predict(future_features)

fig_forecast = go.Figure()
fig_forecast.add_trace(go.Scatter(x=gold_data_india.index, y=gold_data_india['US dollar (USD)'], mode='lines', name='Historical Prices (INR)'))
fig_forecast.add_trace(go.Scatter(x=future_dates, y=future_predictions, mode='lines', name='Forecasted Prices (INR)'))
fig_forecast.update_layout(title='Forecasting Gold Prices in India for the Next 30 Days',
                           xaxis_title='Date',
                           yaxis_title='Gold Price (INR)')
fig_forecast.show()


Mean Squared Error for India: 1161.124301579812
R^2 Score for India: 0.9952758080380611
