In [100]:
import yfinance as yf
import pandas as pd

# Define tickers
tickers = {'SPX': 'SPY', 'USO': 'USO', 'SLV': 'SLV', 'EUR/USD': 'EURUSD=X', 'GLD': 'GLD'}

# Download data for 2026
data = yf.download(list(tickers.values()), start="2010-01-01", end="2026-01-03")
df_new = data['Close'].rename(columns={v: k for k, v in tickers.items()})

# Save to the CURRENT folder (the one you are working in right now)
df_new.dropna().to_csv('gold_price_data_2026.csv')
print("SUCCESS: File 'gold_price_data_2026.csv' created in your current folder.")


[*********************100%***********************]  5 of 5 completed

SUCCESS: File 'gold_price_data_2026.csv' created in your current folder.





In [101]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import joblib

# 1. Load Data (Rerun your yfinance script first if the file was corrupted)
# Use the full path where the file is actually located
#file_path = 'c:/Users/MAHIR/prise/gold_prise.pred/data/gold_price_data_2026.csv' 
# Assuming you saved it in the 'data' subfolder from your yfinance script

df = pd.read_csv('gold_price_data_2026.csv')
if 'Unnamed: 0' in df.columns:
    df = df.drop(columns=['Unnamed: 0'])


df = df.round({
    'SPX': 2, 
    'USO': 2, 
    'SLV': 2, 
    'GLD': 2, 
    'EUR/USD': 4
})


In [102]:
column_order = ['Date', 'SPX', 'USO', 'SLV', 'EUR/USD', 'GLD']
df = df[column_order]

In [103]:
df['Date'] = pd.to_datetime(df['Date']).dt.date


In [104]:
df.to_csv('gold_price_data_2026_CLEAN.csv', index=False)

print("SUCCESS: Dataset transformed to clean table format.")
print(df.head()) # This will now look like a clean grid

SUCCESS: Dataset transformed to clean table format.
         Date    SPX     USO    SLV  EUR/USD     GLD
0  2010-01-04  85.03  322.16  17.23   1.4424  109.80
1  2010-01-05  85.25  323.28  17.51   1.4366  109.70
2  2010-01-06  85.31  327.76  17.86   1.4404  111.51
3  2010-01-07  85.67  325.76  17.89   1.4318  110.82
4  2010-01-08  85.96  327.44  18.15   1.4411  111.37


In [105]:
x = df.drop(['GLD'], axis=1)
if 'Date' in x.columns:
    x = x.drop(['Date'], axis=1)
y = df['GLD']

# 2. Train Model
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

# 3. Save Model Correctly (Use .pkl or .joblib)
joblib.dump(model, 'gold_model_2026.pkl')

# 4. Evaluation
predictions = model.predict(x_test)


mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2_score = metrics.r2_score(y_test, predictions)

 #5. Output Results
print("--- Random Forest Model Summary ---")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared Score: {r2_score:.4f}")

#print(f"R-squared Score: {metrics.r2_score(y_test, predictions):.4f}")

--- Random Forest Model Summary ---
Mean Absolute Error (MAE): 1.6017
Mean Squared Error (MSE): 9.2984
Root Mean Squared Error (RMSE): 3.0493
R-squared Score: 0.9967


In [106]:
import pandas as pd

# Use the column names from your specific model instance
correct_columns = model.feature_names_in_

# Create a dictionary with your values
# Ensure the keys match the names in 'correct_columns' exactly
data_dict = {
    'SPX': 6932.05,
    'USO': 70.20,
    'SLV': 65.22,
    'EUR/USD': 1.1796
}

# Create the DataFrame and REORDER it to match the training data
new_data = pd.DataFrame([data_dict])
new_data = new_data[correct_columns]

predicted_gld_price = model.predict(new_data)
print(f"Predicted Gold Price (GLD): {predicted_gld_price[0]:.2f}")



Predicted Gold Price (GLD): 410.44


In [107]:
df.head

<bound method NDFrame.head of             Date     SPX     USO    SLV  EUR/USD     GLD
0     2010-01-04   85.03  322.16  17.23   1.4424  109.80
1     2010-01-05   85.25  323.28  17.51   1.4366  109.70
2     2010-01-06   85.31  327.76  17.86   1.4404  111.51
3     2010-01-07   85.67  325.76  17.89   1.4318  110.82
4     2010-01-08   85.96  327.44  18.15   1.4411  111.37
...          ...     ...     ...    ...      ...     ...
4014  2025-12-26  690.31   68.48  71.12   1.1785  416.74
4015  2025-12-29  687.85   69.61  66.01   1.1773  398.60
4016  2025-12-30  687.01   69.74  68.98   1.1773  398.89
4017  2025-12-31  681.92   69.16  64.42   1.1747  396.31
4018  2026-01-02  683.17   68.96  65.75   1.1750  398.28

[4019 rows x 6 columns]>

In [108]:
import pandas as pd
import streamlit as st

# Load the dataset
df = pd.read_csv('gold_price_data_2026.csv')

# Clean and Reorder: Move GLD to the end and ensure features come first
# This makes it readable and matching your model's expectation
correct_order = ['Date', 'SPX', 'USO', 'SLV', 'EUR/USD', 'GLD']
df = df[correct_order]

# Display the "Dataform" in Streamlit
st.subheader("Historical Gold Price Data (2026 Updated)")

# Option A: Interactive Dataframe (Scrollable and Sortable)
st.dataframe(df.head(10), use_container_width=True) 

# Option B: Static Table (Cleaner for small subsets)
# st.table(df.head(5))


2026-01-03 12:47:56.808 Please replace `use_container_width` with `width`.

`use_container_width` will be removed after 2025-12-31.

For `use_container_width=True`, use `width='stretch'`. For `use_container_width=False`, use `width='content'`.


DeltaGenerator()

In [109]:
df.head

<bound method NDFrame.head of             Date         SPX         USO        SLV   EUR/USD         GLD
0     2010-01-04   85.027954  322.160004  17.230000  1.442398  109.800003
1     2010-01-05   85.253029  323.279999  17.510000  1.436596  109.699997
2     2010-01-06   85.313049  327.760010  17.860001  1.440403  111.510002
3     2010-01-07   85.673180  325.760010  17.889999  1.431803  110.820000
4     2010-01-08   85.958305  327.440002  18.150000  1.441109  111.370003
...          ...         ...         ...        ...       ...         ...
4014  2025-12-26  690.309998   68.480003  71.120003  1.178536  416.739990
4015  2025-12-29  687.849976   69.610001  66.010002  1.177274  398.600006
4016  2025-12-30  687.010010   69.739998  68.980003  1.177288  398.890015
4017  2025-12-31  681.919983   69.160004  64.419998  1.174729  396.309998
4018  2026-01-02  683.169983   68.959999  65.750000  1.175047  398.279999

[4019 rows x 6 columns]>

In [110]:
import pandas as pd
import streamlit as st

# Use read_csv to actually open the file
df = pd.read_csv("gold_price_data_2026.csv")

# Optional: Clean up the display for 2026 standards
# Remove the 'Unnamed: 0' index column if it exists
if 'Unnamed: 0' in df.columns:
    df = df.drop(columns=['Unnamed: 0'])

# Display the "Dataform" in your Streamlit App
st.subheader("Readable Dataset View")
st.dataframe(df.head(10)) 




DeltaGenerator()

In [111]:
df.head

<bound method NDFrame.head of             Date   EUR/USD         GLD        SLV         SPX         USO
0     2010-01-04  1.442398  109.800003  17.230000   85.027954  322.160004
1     2010-01-05  1.436596  109.699997  17.510000   85.253029  323.279999
2     2010-01-06  1.440403  111.510002  17.860001   85.313049  327.760010
3     2010-01-07  1.431803  110.820000  17.889999   85.673180  325.760010
4     2010-01-08  1.441109  111.370003  18.150000   85.958305  327.440002
...          ...       ...         ...        ...         ...         ...
4014  2025-12-26  1.178536  416.739990  71.120003  690.309998   68.480003
4015  2025-12-29  1.177274  398.600006  66.010002  687.849976   69.610001
4016  2025-12-30  1.177288  398.890015  68.980003  687.010010   69.739998
4017  2025-12-31  1.174729  396.309998  64.419998  681.919983   69.160004
4018  2026-01-02  1.175047  398.279999  65.750000  683.169983   68.959999

[4019 rows x 6 columns]>