# Correlation and Diversification #

George is a trading enthusiast. For several months now, he has been spending a considerable amount of time and money on the stock markets. He enjoys investing in stocks, but he admits he is not an expert. He has made very good money on some investments, but he has also lost money on others. He understands that some of the gains have offset some of the losses... George has discovered the benefits of diversification.

In [2]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt

# Optimization
import statsmodels.api as sm 

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import import_daily_financial_data

### Benefits of Diversification ###

In [3]:
# Import Data

# Apple Data
df_1 = import_daily_financial_data(
    ticker='AAPL', 
    start_date='2015-01-01', 
    end_date='2025-01-01', 
    returns=True
)

# Amazon Data
df_2 =  import_daily_financial_data(
    ticker='AMZN', 
    start_date='2015-01-01', 
    end_date='2025-01-01', 
    returns=True
)

# Meta Data
df_3 =  import_daily_financial_data(
    ticker='META', 
    start_date='2015-01-01', 
    end_date='2025-01-01', 
    returns=True
)

# Microsoft Data
df_4 =  import_daily_financial_data(
    ticker='MSFT', 
    start_date='2015-01-01', 
    end_date='2025-01-01', 
    returns=True
)

# Walmart Data
df_5 =  import_daily_financial_data(
    ticker='WMT', 
    start_date='2015-01-01', 
    end_date='2025-01-01', 
    returns=True
)

In [4]:
# Create the joint dataframe

df_returns = pd.DataFrame()

df_returns['AAPL'] = df_1['returns']
df_returns['AMZN'] = df_2['returns']
df_returns['META'] = df_3['returns']
df_returns['MSFT'] = df_4['returns']
df_returns['WMT'] = df_5['returns']

df_returns

In [5]:
# Theoretically we could use the average as the expected returns (these are daily returns)

expected_returns = df_returns.mean() * 252
expected_returns.name = 'annualized_returns'

expected_returns

In [6]:
# The volatility is calculated with the standard deviations (also annualized volatility)

volatility = df_returns.dropna().std() * np.sqrt(252)
volatility.name = 'annualized_volatiliy'

volatility

In [7]:
# Covariance Matrix

cov_matrix = df_returns.dropna().cov() * 252

cov_matrix

In [8]:
# Create the Equal-Weighted Portfolio

n_assets = len(df_returns.columns)
weights = np.array([1/n_assets] * n_assets)  # Equal weights

weights

In [9]:
# Portfolio expected return
portfolio_return = np.dot(weights, expected_returns)

# Portfolio variance and volatility
portfolio_variance = np.dot(weights.T, np.dot(cov_matrix, weights))
portfolio_volatility = np.sqrt(portfolio_variance)

print("\nPortfolio Expected Return:", portfolio_variance)
print("Portfolio Volatility:", portfolio_volatility)

In [10]:
# Plot the Standard Deviations

plt.figure(figsize=(10, 6))
plt.bar(df_returns.columns, volatility, color='skyblue', label='Individual Asset Volatility')
plt.axhline(y=portfolio_volatility, color='red', linestyle='--', label='Portfolio Volatility')
plt.ylabel('Annualized Volatility')
plt.title('Diversification: Individual vs. Portfolio Volatility')
plt.legend()
plt.show()

In [11]:
# Efficiency Ratios

eff_ratios = expected_returns/volatility
portafolio_effciency = portfolio_return/portfolio_volatility

plt.figure(figsize=(12, 8))
plt.bar(df_returns.columns, eff_ratios, color='salmon', label='Individual Asset Volatility')
plt.axhline(y=portafolio_effciency, color='black', linestyle='--', label='Portfolio Volatility')
plt.ylabel('Annualized Volatility')
plt.title('Diversification: Individual vs. Portfolio Volatility')
plt.legend()
plt.show()

In [12]:
# Adjusted Returns

print(f"Portfolio Risk-Adjusted Returns: {portafolio_effciency}")
print(f"Stocks Risk-Adjusted Returns:\n{eff_ratios}")

### The Correlation ###

In [13]:
# Correlation is the Scale of Covariance [-1, 1]

pearson = df_returns.corr()

pearson

In [14]:
# Non-Linear Correlations

spearman = df_returns.corr(method='spearman')

spearman

In [15]:
# Let us get the Residual Returns (we show how to obtain them in Module 4)
res_returns = pd.read_csv(r'..\additional_data\capm_residual_returns.csv')
res_returns = res_returns.rename(columns={'Unnamed: 0': 'Date'})

res_returns = res_returns.set_index('Date')
res_returns.index = pd.to_datetime(res_returns.index)

# Just the stocks we need
res_returns = res_returns[df_returns.columns]
res_returns

In [16]:
res_returns.corr()

In [17]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# AMZN Residual Returns
res_returns['AMZN'].cumsum().plot(color = 'blue', ax = ax1)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'AMZN Residual Returns', 
    color='blue'
    )

# WMT Residual Returns
ax2 = ax1.twinx()

res_returns['WMT'].cumsum().plot(color = 'red', ax = ax2)
ax2.set_ylabel(
    'WMT Residual Returns', 
    color='red'
    )

plt.show()

In [18]:
def detrending_series(
        y: pd.Series(),
        residuals=True
):
    Y = y.dropna()

    trend = pd.Series(
        np.arange(1, len(Y) + 1),
        index=Y.index
    )

    models = [
        sm.OLS(Y, sm.add_constant(np.ones_like(Y))),
        sm.OLS(Y, sm.add_constant(trend)),
        sm.OLS(Y, sm.add_constant(pd.DataFrame({"trend": trend, "trend_sq": trend ** 2}))),
        # sm.OLS(Y, sm.add_constant(pd.DataFrame({"trend": trend, "trend_sq": trend ** 2, "trend_cb": trend ** 3}))),
        # sm.OLS(Y, sm.add_constant(
        #    pd.DataFrame({"trend": trend, "trend_sq": trend ** 2, "trend_cb": trend ** 3, "trend_qua": trend ** 4}))),
    ]

    results = [model.fit() for model in models]
    aics = [result.aic for result in results]

    best_model_index = np.argmin(aics)
    best_result = results[best_model_index]

    # print(best_result.summary())

    if residuals:
        return best_result.resid

    else:
        return best_result.fittedvalues

In [19]:
# Let us detrend

cumsum_detrended = res_returns.cumsum().apply(detrending_series)

cumsum_detrended

In [20]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# AMZN Residual Returns
cumsum_detrended['AMZN'].plot(color = 'blue', ax = ax1)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'AMZN Residual Returns', 
    color='blue'
    )

# WMT Residual Returns
ax2 = ax1.twinx()

cumsum_detrended['WMT'].plot(color = 'red', ax = ax2)
ax2.set_ylabel(
    'WMT Residual Returns', 
    color='red'
    )

plt.show()

In [31]:
# Before 2020: all data up to 2019-12-31
corr_before_2020 = cumsum_detrended['AMZN'].loc[:'2019-12-31'].corr(
    cumsum_detrended['WMT'].loc[:'2019-12-31'])

# After 2020: all data from 2020-01-01 onwards
corr_after_2020 = cumsum_detrended['AMZN'].loc['2020-01-01':].corr(
    cumsum_detrended['WMT'].loc['2020-01-01':])

print(f"The correlation between AMZN and WMT before 2020: {corr_before_2020}")
print(f"The correlation between AMZN and WMT after 2020: {corr_after_2020}")

In [22]:
# Calculate Rolling Correlation

rolling_corr_252d_cumsum = cumsum_detrended['AMZN'].rolling(window=252).corr(cumsum_detrended['WMT'])
rolling_corr_252d = res_returns['AMZN'].rolling(window=252).corr(res_returns['WMT'])

In [23]:
# Plot Residuals

# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(rolling_corr_252d_cumsum, label='Correlation AMZN vs WMT', color='red', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Rolling Correlation Time Series')
plt.xlabel('Time')
plt.ylabel('Correlation')
plt.legend()
plt.grid()

# Show
plt.show()

In [24]:
# Plot Residuals

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(rolling_corr_252d, label='Correlation AMZN vs WMT', color='red', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Rolling Correlation Time Series')
plt.xlabel('Time')
plt.ylabel('Correlation')
plt.legend()
plt.grid()

# Show
plt.show()

In [25]:
def portfolio_variance(
    weights, 
    returns
):
    weights = np.array(weights)
    if isinstance(returns, pd.DataFrame):
        returns = returns.values
    
    cov_matrix = np.cov(returns.T)  # covariance matrix
    port_var = weights.T @ cov_matrix @ weights
    return port_var

In [26]:
# Create the portfolio
portfolio_df = df_returns[['AMZN', 'WMT']]
weights = [0.5, 0.5]

port_var = portfolio_variance(
    weights,
    portfolio_df.mul(100)
)

print(f'Portfolio Variance: {port_var:.4f}')

In [27]:
# Define the function for the rolling portfolio variance
def rolling_portfolio_variance(returns_df, weights, window=252):
    weights = np.array(weights)

    rolling_vars = []
    index = returns_df.index

    for i in range(window - 1, len(returns_df)):
        window_returns = returns_df.iloc[i - window + 1 : i + 1]
        cov_matrix = np.cov(window_returns.T)
        var = weights.T @ cov_matrix @ weights
        rolling_vars.append(var)

    result = pd.Series([np.nan] * (window - 1) + rolling_vars, index=index)
    return result


In [28]:
# Calculate the Rolling Variance

rolling_var = rolling_portfolio_variance(portfolio_df, weights)
rolling_var = rolling_var * 100

rolling_var.dropna()

In [29]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# Portfolio Variance
rolling_var.dropna().plot(color = 'turquoise', ax = ax1)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'Portfolio Variance', 
    color='turquoise'
    )

# Rolling Correlation
ax2 = ax1.twinx()

rolling_corr_252d.plot(color = 'orange', ax = ax2)
ax2.set_ylabel(
    'Rolling Correlation', 
    color='orange'
    )

plt.show()