# How is BTC price related to commodities?

We want to find out if there is a correlation between the price of BTCUSD and commodities such as gold, silver, oil and wheat

### Imports

In [19]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.optimize import minimize
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

### Styling

In [20]:
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")


### Init data (grab only the closing price of the product)

In [21]:
INCLUDE_COVID = False

tickers = {
    'BTC-USD': 'Bitcoin',
    'GC=F': 'Gold Futures',
    'SI=F': 'Silver Futures',
    'CL=F': 'Crude Oil',
    'HE=F': 'Lean Hogs (Pigs)',
    'HG=F': 'Copper',
    'ZW=F' : 'Wheat (Chicago)',
    'OJ=F' : 'Orange Juice'
}


data = yf.download(list(tickers.keys()), start='2018-01-01', end='2025-01-01')

data = pd.DataFrame(data)['Close']

#rename columns to their tickers' full names
data.columns = [
    (tickers[col[1]] if col[1] in tickers else col[1]) if isinstance(col, tuple) else (tickers[col] if col in tickers else col)
    for col in data.columns
]


[*********************100%***********************]  8 of 8 completed


### Describe the data

In [22]:
print(f"Data shape: {data.shape}")
print(f"Date range: {data.index[0].date()} to {data.index[-1].date()}")
missing_counts = data.isnull().sum()
missing_pct = data.isnull().mean() * 100
print("Missing values (count):")
print(missing_counts)
print("\nMissing values (percent):")
print(missing_pct.round(2).astype(str) + '%')


Data shape: (2557, 8)
Date range: 2018-01-01 to 2024-12-31
Missing values (count):
Bitcoin               0
Crude Oil           796
Gold Futures        797
Lean Hogs (Pigs)    796
Copper              796
Orange Juice        795
Silver Futures      797
Wheat (Chicago)     796
dtype: int64

Missing values (percent):
Bitcoin               0.0%
Crude Oil           31.13%
Gold Futures        31.17%
Lean Hogs (Pigs)    31.13%
Copper              31.13%
Orange Juice        31.09%
Silver Futures      31.17%
Wheat (Chicago)     31.13%
dtype: object


### Impute the data with forward fill, dropna

In [23]:
data = data.ffill().dropna() #this will only affect the first value since forward fill grabs the previous value 

### Data preview

In [24]:
data.head(10)


Unnamed: 0_level_0,Bitcoin,Crude Oil,Gold Futures,Lean Hogs (Pigs),Copper,Orange Juice,Silver Futures,Wheat (Chicago)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-02,14982.099609,60.369999,1313.699951,70.724998,3.256,135.699997,17.121,433.5
2018-01-03,15201.0,61.630001,1316.199951,71.050003,3.237,139.5,17.184999,436.0
2018-01-04,15599.200195,62.009998,1319.400024,71.375,3.2425,139.5,17.188999,434.0
2018-01-05,17429.5,61.439999,1320.300049,71.425003,3.207,138.850006,17.205,430.75
2018-01-06,17527.0,61.439999,1320.300049,71.425003,3.207,138.850006,17.205,430.75
2018-01-07,16477.599609,61.439999,1320.300049,71.425003,3.207,138.850006,17.205,430.75
2018-01-08,15170.099609,61.73,1318.599976,72.974998,3.201,138.0,17.063999,427.75
2018-01-09,14595.400391,62.959999,1311.699951,73.175003,3.1945,135.399994,16.936001,432.25
2018-01-10,14973.299805,63.57,1317.400024,72.525002,3.215,134.850006,16.962999,434.25
2018-01-11,13405.799805,63.799999,1320.599976,70.974998,3.2135,136.5,16.893999,433.25


### Normalize data $[0, 100]$

In [25]:
normalized = (data / data.iloc[0]) * 100

### Plot normalized prices

In [26]:
fig = px.line(normalized, x=normalized.index, y=normalized.columns,
              title='Normalized Price Movements (Base 100)',
              labels={'value': 'Normalized Price', 'variable': 'Asset', 'index': 'Date'})
fig.update_layout(height=600, hovermode='x unified')
fig.show()


### Summary statistics

In [27]:
print("Summary Statistics (% Total Return):")
returns_summary = ((data.iloc[-1] / data.iloc[0] - 1) * 100).sort_values(ascending=False)
print(returns_summary.to_string())

returns = data.pct_change().dropna()

print("Returns Statistics:\n")
print(returns.describe().round(4))


Summary Statistics (% Total Return):
Bitcoin             523.605540
Orange Juice        266.617547
Gold Futures        100.137021
Silver Futures       69.032183
Wheat (Chicago)      27.220300
Copper               22.420148
Crude Oil            18.800733
Lean Hogs (Pigs)     14.952287
Returns Statistics:

         Bitcoin  Crude Oil  Gold Futures  Lean Hogs (Pigs)     Copper  \
count  2555.0000  2555.0000     2555.0000         2555.0000  2555.0000   
mean      0.0013    -0.0011        0.0003            0.0003     0.0001   
std       0.0352     0.0703        0.0078            0.0236     0.0118   
min      -0.3717    -3.0597       -0.0498           -0.2352    -0.0669   
25%      -0.0138    -0.0048       -0.0016           -0.0044    -0.0042   
50%       0.0008     0.0000        0.0000            0.0000     0.0000   
75%       0.0162     0.0083        0.0030            0.0043     0.0043   
max       0.1875     0.3766        0.0595            0.2666     0.0746   

       Orange Juice  Silver

### Return distributions

In [28]:
fig = make_subplots(rows=3, cols=3, subplot_titles=returns.columns.tolist())

for idx, col in enumerate(returns.columns):
    row = idx // 3 + 1
    col_pos = idx % 3 + 1
    
    fig.add_trace(
        go.Histogram(x=returns[col], name=col, nbinsx=100, showlegend=False),
        row=row, col=col_pos
    )
    
    # Add mean line
    fig.add_vline(x=0, line_dash="dash", line_color="red", 
                  row=row, col=col_pos)

fig.update_layout(height=1000, title_text="Daily Return Distributions", showlegend=False)
fig.update_xaxes(title_text="Daily Return")
fig.update_yaxes(title_text="Frequency")
fig.show()


### Correlation matrix

In [29]:
corr_matrix = returns.corr()

fig = px.imshow(corr_matrix, 
                text_auto='.3f',
                aspect='auto',
                color_continuous_scale='RdBu_r',
                zmin=-1, zmax=1,
                title='Correlation Matrix: Daily Returns (Full Period)')
fig.update_layout(height=700)
fig.show()

# Bitcoin correlations specifically
btc_corr = corr_matrix['Bitcoin'].drop('Bitcoin').sort_values(ascending=False)
print("\nBitcoin Correlations:\n")
print(btc_corr.to_string())

# Visualize BTC correlations
fig = px.bar(btc_corr, orientation='h',
             title='Bitcoin Correlation with Assets',
             labels={'value': 'Correlation Coefficient', 'index': 'Asset'},
             color=btc_corr.values,
             color_continuous_scale='RdYlGn',
             color_continuous_midpoint=0)
fig.update_layout(height=500, showlegend=False)
fig.show()



Bitcoin Correlations:

Silver Futures      0.135385
Gold Futures        0.105935
Copper              0.096507
Crude Oil           0.050636
Wheat (Chicago)     0.029020
Orange Juice        0.021110
Lean Hogs (Pigs)    0.003947


### Rolling 90-day correlations

In [30]:
window = 90

# Calculate rolling correlations for the 4 key commodities
rolling_corr_gold = returns['Bitcoin'].rolling(window).corr(returns['Gold Futures'])
rolling_corr_silver = returns['Bitcoin'].rolling(window).corr(returns['Silver Futures'])
rolling_corr_oil = returns['Bitcoin'].rolling(window).corr(returns['Crude Oil'])
rolling_corr_pigs = returns['Bitcoin'].rolling(window).corr(returns['Lean Hogs (Pigs)'])

# Create dataframe for plotting
rolling_corr_df = pd.DataFrame({
    'BTC vs Gold': rolling_corr_gold,
    'BTC vs Silver': rolling_corr_silver,
    'BTC vs Crude Oil': rolling_corr_oil,
    'BTC vs Pigs': rolling_corr_pigs
})

# Main rolling correlation plot
fig = px.line(rolling_corr_df, 
              x=rolling_corr_df.index, 
              y=rolling_corr_df.columns,
              title=f'{window}-Day Rolling Correlation: Bitcoin vs Key Commodities',
              labels={'value': 'Correlation Coefficient', 'variable': 'Comparison', 'index': 'Date'})

# Add zero line
fig.add_hline(y=0, line_dash="dash", line_color="black", opacity=0.5)

# Add event markers
events = {
    '2020-03-15': 'COVID Crash',
    '2021-11-10': 'BTC ATH',
    '2022-11-10': 'FTX Collapse',
    '2022-03-01': 'Ukraine War',
    '2024-03-01': '2024 Bull Run'
}

for date, event in events.items():
    try:
        fig.add_vline(x=pd.to_datetime(date), line_dash="dot", 
                      line_color="red", opacity=0.5,
                      annotation_text=event, 
                      annotation_position="top",
                      annotation_textangle=-90)
    except:
        pass

fig.update_layout(height=600, hovermode='x unified')
fig.show()


### Statistical summary

In [31]:
print("\nRolling Correlation Statistics:\n")
stats_df = pd.DataFrame({
    'Mean': rolling_corr_df.mean(),
    'Std': rolling_corr_df.std(),
    'Min': rolling_corr_df.min(),
    'Max': rolling_corr_df.max(),
    'Current': rolling_corr_df.iloc[-1]
}).round(3)
print(stats_df)



Rolling Correlation Statistics:

                   Mean    Std    Min    Max  Current
BTC vs Gold       0.095  0.163 -0.291  0.544    0.048
BTC vs Silver     0.124  0.150 -0.198  0.522   -0.003
BTC vs Crude Oil  0.044  0.145 -0.272  0.379   -0.217
BTC vs Pigs       0.009  0.126 -0.325  0.314    0.059


### Individual compariston plots

In [32]:
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=['BTC vs Gold', 'BTC vs Silver', 'BTC vs Crude Oil', 'BTC vs Pigs'],
    vertical_spacing=0.12,
    horizontal_spacing=0.1
)

comparisons = [
    ('BTC vs Gold', 1, 1, 'blue'),
    ('BTC vs Silver', 1, 2, 'gray'),
    ('BTC vs Crude Oil', 2, 1, 'green'),
    ('BTC vs Pigs', 2, 2, 'red')
]

for comp, row, col, color in comparisons:
    fig.add_trace(
        go.Scatter(x=rolling_corr_df.index, 
                   y=rolling_corr_df[comp],
                   name=comp,
                   line=dict(color=color, width=2),
                   showlegend=False),
        row=row, col=col
    )
    
    # Add zero line for each subplot
    fig.add_hline(y=0, line_dash="dash", line_color="black", 
                  opacity=0.3, row=row, col=col)
    
    # Add shaded regions for positive/negative correlation
    fig.add_hrect(y0=0, y1=1, fillcolor="green", opacity=0.1, 
                  layer="below", line_width=0, row=row, col=col)
    fig.add_hrect(y0=-1, y1=0, fillcolor="red", opacity=0.1, 
                  layer="below", line_width=0, row=row, col=col)

fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Correlation", range=[-1, 1])
fig.update_layout(height=800, title_text=f"{window}-Day Rolling Correlations (Individual Views)")
fig.show()


### Compare current vs historical correlation

In [33]:
current_vs_historical = pd.DataFrame({
    'Current (90d)': rolling_corr_df.iloc[-1],
    'Historical Avg': rolling_corr_df.mean(),
    'Difference': rolling_corr_df.iloc[-1] - rolling_corr_df.mean()
}).round(3)

print("\nCurrent vs Historical Average Correlation:\n")
print(current_vs_historical)

fig = go.Figure()

fig.add_trace(go.Bar(
    name='Current (90d)',
    x=current_vs_historical.index,
    y=current_vs_historical['Current (90d)'],
    marker_color='lightblue'
))

fig.add_trace(go.Bar(
    name='Historical Avg',
    x=current_vs_historical.index,
    y=current_vs_historical['Historical Avg'],
    marker_color='navy'
))

fig.update_layout(
    title='Current vs Historical Average Correlation with Bitcoin',
    xaxis_title='Asset',
    yaxis_title='Correlation Coefficient',
    barmode='group',
    height=500
)
fig.add_hline(y=0, line_dash="dash", line_color="black")
fig.show()



Current vs Historical Average Correlation:

                  Current (90d)  Historical Avg  Difference
BTC vs Gold               0.048           0.095      -0.047
BTC vs Silver            -0.003           0.124      -0.127
BTC vs Crude Oil         -0.217           0.044      -0.261
BTC vs Pigs               0.059           0.009       0.050


In [34]:
quarterly_corr = pd.DataFrame()

for asset in ['Gold Futures', 'Silver Futures', 'Crude Oil', 'Lean Hogs (Pigs)']:
    quarterly_corr[asset] = returns['Bitcoin'].rolling(window).corr(returns[asset])

# Resample to quarters
quarterly_corr_resampled = quarterly_corr.resample('Q').mean()

fig = px.imshow(quarterly_corr_resampled.T,
                labels=dict(x="Quarter", y="Asset", color="Correlation"),
                x=quarterly_corr_resampled.index.strftime('%Y-Q%q'),
                y=quarterly_corr_resampled.columns,
                color_continuous_scale='RdBu_r',
                zmin=-0.5, zmax=0.5,
                aspect='auto',
                title='Quarterly Average Correlation with Bitcoin')
fig.update_layout(height=400)
fig.show()


In [35]:
print("\n" + "="*80)
print("üê∑ FUN FACTS ABOUT BITCOIN vs PIGS CORRELATION üê∑")
print("="*80)

pigs_positive_pct = (rolling_corr_pigs > 0).sum() / len(rolling_corr_pigs.dropna()) * 100
avg_corr_pigs = rolling_corr_pigs.mean()
max_corr_pigs = rolling_corr_pigs.max()
min_corr_pigs = rolling_corr_pigs.min()

print(f"\nüìä Pigs were POSITIVELY correlated with BTC: {pigs_positive_pct:.1f}% of the time")
print(f"üìä Average correlation: {avg_corr_pigs:.3f}")
print(f"üìä Highest correlation: {max_corr_pigs:.3f} on {rolling_corr_pigs.idxmax().date()}")
print(f"üìä Lowest correlation: {min_corr_pigs:.3f} on {rolling_corr_pigs.idxmin().date()}")

if abs(avg_corr_pigs) < 0.1:
    print("\nü§î Bitcoin and Pigs are basically UNCORRELATED!")
    print("   This makes sense - pork belly futures are driven by")
    print("   agricultural supply/demand, not monetary policy or tech sentiment.")
elif avg_corr_pigs > 0.3:
    print("\nüöÄ Surprisingly, Bitcoin and Pigs show POSITIVE correlation!")
elif avg_corr_pigs < -0.3:
    print("\nüìâ Bitcoin and Pigs show NEGATIVE correlation!")

# Compare pigs to other commodities
print("\nüìä How do PIGS compare to other commodities?")
avg_corrs = {
    'Gold': rolling_corr_gold.mean(),
    'Silver': rolling_corr_silver.mean(),
    'Oil': rolling_corr_oil.mean(),
    'Pigs': rolling_corr_pigs.mean()
}

sorted_corrs = sorted(avg_corrs.items(), key=lambda x: abs(x[1]), reverse=True)
print("\nRanked by absolute correlation strength:")
for i, (asset, corr) in enumerate(sorted_corrs, 1):
    emoji = "ü•á" if i == 1 else "ü•à" if i == 2 else "ü•â" if i == 3 else "üê∑" if asset == "Pigs" else "üìä"
    print(f"  {emoji} {i}. {asset}: {corr:.3f}")

print("\n" + "="*80)

# %% [markdown]
# ## 6. Volatility Analysis

# %% Calculate annualized volatility
volatility = returns.std() * np.sqrt(252) * 100
avg_returns = returns.mean() * 252 * 100
sharpe_ratio = avg_returns / volatility

risk_return = pd.DataFrame({
    'Annual Return (%)': avg_returns,
    'Annual Volatility (%)': volatility,
    'Sharpe Ratio': sharpe_ratio
}).sort_values('Sharpe Ratio', ascending=False)

print("üìä Risk-Return Profile:\n")
print(risk_return.round(2))

# Visualize
fig = px.scatter(risk_return, 
                 x='Annual Volatility (%)', 
                 y='Annual Return (%)',
                 text=risk_return.index,
                 title='Risk-Return Profile',
                 color='Sharpe Ratio',
                 color_continuous_scale='Viridis',
                 size=[300]*len(risk_return))

fig.update_traces(textposition='top center', marker=dict(line=dict(width=2, color='black')))
fig.update_layout(height=600)
fig.add_hline(y=0, line_dash="dash", line_color="black", opacity=0.5)
fig.show()

# %% [markdown]
# ## 7. Summary & Key Findings

# %% Generate summary
print("\n" + "="*80)
print(" "*20 + "KEY FINDINGS: BTC vs COMMODITIES")
print("="*80)

print("\n1Ô∏è‚É£  PERFORMANCE (Full Period)")
print("-" * 50)
total_returns = ((data.iloc[-1] / data.iloc[0] - 1) * 100).sort_values(ascending=False)
for asset, ret in total_returns.items():
    print(f"   {asset:25s}: {ret:>8.1f}%")

print("\n2Ô∏è‚É£  CORRELATION WITH BITCOIN (Average)")
print("-" * 50)
print(f"   Gold:        {rolling_corr_gold.mean():>6.3f}")
print(f"   Silver:      {rolling_corr_silver.mean():>6.3f}")
print(f"   Crude Oil:   {rolling_corr_oil.mean():>6.3f}")
print(f"   Pigs üê∑:     {rolling_corr_pigs.mean():>6.3f}")

print("\n3Ô∏è‚É£  CURRENT CORRELATION (Last 90 days)")
print("-" * 50)
print(f"   Gold:        {rolling_corr_gold.iloc[-1]:>6.3f}")
print(f"   Silver:      {rolling_corr_silver.iloc[-1]:>6.3f}")
print(f"   Crude Oil:   {rolling_corr_oil.iloc[-1]:>6.3f}")
print(f"   Pigs üê∑:     {rolling_corr_pigs.iloc[-1]:>6.3f}")

print("\n4Ô∏è‚É£  VOLATILITY RANKING")
print("-" * 50)
for asset, vol in volatility.sort_values(ascending=False).items():
    print(f"   {asset:25s}: {vol:>6.2f}%")

print("\n" + "="*80)
print("\n‚úÖ Analysis complete!")



üê∑ FUN FACTS ABOUT BITCOIN vs PIGS CORRELATION üê∑

üìä Pigs were POSITIVELY correlated with BTC: 51.9% of the time
üìä Average correlation: 0.009
üìä Highest correlation: 0.314 on 2023-02-16
üìä Lowest correlation: -0.325 on 2018-04-11

ü§î Bitcoin and Pigs are basically UNCORRELATED!
   This makes sense - pork belly futures are driven by
   agricultural supply/demand, not monetary policy or tech sentiment.

üìä How do PIGS compare to other commodities?

Ranked by absolute correlation strength:
  ü•á 1. Silver: 0.124
  ü•à 2. Gold: 0.095
  ü•â 3. Oil: 0.044
  üê∑ 4. Pigs: 0.009

üìä Risk-Return Profile:

                  Annual Return (%)  Annual Volatility (%)  Sharpe Ratio
Gold Futures                   7.62                  12.42          0.61
Bitcoin                       33.90                  55.91          0.61
Orange Juice                  17.11                  29.24          0.59
Silver Futures                 8.36                  25.19          0.33
Lean Ho


                    KEY FINDINGS: BTC vs COMMODITIES

1Ô∏è‚É£  PERFORMANCE (Full Period)
--------------------------------------------------
   Bitcoin                  :    523.6%
   Orange Juice             :    266.6%
   Gold Futures             :    100.1%
   Silver Futures           :     69.0%
   Wheat (Chicago)          :     27.2%
   Copper                   :     22.4%
   Crude Oil                :     18.8%
   Lean Hogs (Pigs)         :     15.0%

2Ô∏è‚É£  CORRELATION WITH BITCOIN (Average)
--------------------------------------------------
   Gold:         0.095
   Silver:       0.124
   Crude Oil:    0.044
   Pigs üê∑:      0.009

3Ô∏è‚É£  CURRENT CORRELATION (Last 90 days)
--------------------------------------------------
   Gold:         0.048
   Silver:      -0.003
   Crude Oil:   -0.217
   Pigs üê∑:      0.059

4Ô∏è‚É£  VOLATILITY RANKING
--------------------------------------------------
   Crude Oil                : 111.64%
   Bitcoin                  :  55.91%
   