# Correlation and Diversification #

In [430]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt

# Optiminization
import statsmodels.api as sm 
from scipy.optimize import minimize

# Pretty Notation
from IPython.display import display, Math

### Benefits of Diversification ###

In [433]:
# Import Data

# Apple Data
df_1 = import_financial_data("AAPL")

# Amazon Data
df_2 =  import_financial_data("AMZN")

# Meta Data
df_3 =  import_financial_data("META")

# Microsoft Data
df_4 =  import_financial_data("MSFT")

# Walmart Data
df_5 =  import_financial_data("WMT")

In [434]:
# Create the joint dataframe

df_data = pd.DataFrame()

df_data['AAPL'] = df_1['Close']
df_data['AMZN'] = df_2['Close']
df_data['META'] = df_3['Close']
df_data['MSFT'] = df_4['Close']
df_data['WMT'] = df_5['Close']

df_data = df_data.dropna()

df_returns = df_data.pct_change(1).dropna()

df_returns

In [435]:
# Theoretically we could use the average as the expected returns (these are daily returns)

expected_returns = df_returns.mean() * 252

expected_returns

In [436]:
# The volatility is calculated with the standard deviations (also daily volatilities)

volat = df_returns.dropna().std() * np.sqrt(252)

volat

In [437]:
# Covariance Matrix

cov_matrix = df_returns.dropna().cov() * 252

cov_matrix

In [438]:
# Create the Equal Weighted Portfolio

n_assets = 5
weights = np.array([1/n_assets] * n_assets)  # Equal weights

weights

In [439]:
# Portfolio expected return
portfolio_return = np.dot(weights, expected_returns)

# Portfolio variance and volatility
portfolio_variance = np.dot(weights.T, np.dot(cov_matrix, weights))
portfolio_volatility = np.sqrt(portfolio_variance)

print("\nPortfolio Expected Return:", portfolio_return)
print("Portfolio Volatility:", portfolio_volatility)

In [440]:
# Plot the Volatilities

plt.figure(figsize=(10, 6))
plt.bar(df_data.columns, volat, color='skyblue', label='Individual Asset Volatility')
plt.axhline(y=portfolio_volatility, color='red', linestyle='--', label='Portfolio Volatility')
plt.ylabel('Annualized Volatility')
plt.title('Diversification: Individual vs. Portfolio Volatility')
plt.legend()
plt.show()

In [441]:
# Efficiency Ratios

eff_ratios = expected_returns/volat
portafolio_effciency = portfolio_return/portfolio_volatility

plt.figure(figsize=(12, 8))
plt.bar(df_data.columns, eff_ratios, color='salmon', label='Individual Asset Volatility')
plt.axhline(y=portafolio_effciency, color='black', linestyle='--', label='Portfolio Volatility')
plt.ylabel('Annualized Volatility')
plt.title('Diversification: Individual vs. Portfolio Volatility')
plt.legend()
plt.show()

In [442]:
# Adjusted Returns

print(f"Portfolio Risk-Adjusted Returns: {portafolio_effciency}")
print(f"Stocks Risk-Adjusted Returns:\n{eff_ratios}")

### The Correlation ###

In [444]:
# Correlation is the Scale of Covariance [-1, 1]

pearson = df_returns.corr()

pearson

In [445]:
# Non Linear Correlations

spearman = df_returns.corr(method='spearman')

spearman

In [446]:
# Letus get the CAPM residuals

capm_residuals = pd.read_csv(r'..\additional_data\capm_residuals.csv')
capm_residuals = capm_residuals.set_index('Date')
capm_residuals.index = pd.to_datetime(capm_residuals.index)

# Just the stocks we need
capm_residuals = capm_residuals[df_returns.columns]

capm_residuals

In [447]:
capm_residuals.corr()

In [448]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# AMZN Residual Returns
capm_residuals['AMZN'].cumsum().plot(color = 'blue', ax = ax1)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'AMZN Residual Returns', 
    color='blue'
    )

# WMT Residual Returns
ax2 = ax1.twinx()

capm_residuals['WMT'].cumsum().plot(color = 'red', ax = ax2)
ax2.set_ylabel(
    'WMT Residual Returns', 
    color='red'
    )

plt.show()

In [449]:
def detrending_series(
        y: pd.Series(),
        residuals=True
):
    Y = y.dropna()

    trend = pd.Series(
        np.arange(1, len(Y) + 1),
        index=Y.index
    )

    models = [
        sm.OLS(Y, sm.add_constant(np.ones_like(Y))),
        sm.OLS(Y, sm.add_constant(trend)),
        sm.OLS(Y, sm.add_constant(pd.DataFrame({"trend": trend, "trend_sq": trend ** 2}))),
        # sm.OLS(Y, sm.add_constant(pd.DataFrame({"trend": trend, "trend_sq": trend ** 2, "trend_cb": trend ** 3}))),
        # sm.OLS(Y, sm.add_constant(
        #    pd.DataFrame({"trend": trend, "trend_sq": trend ** 2, "trend_cb": trend ** 3, "trend_qua": trend ** 4}))),
    ]

    results = [model.fit() for model in models]
    aics = [result.aic for result in results]

    best_model_index = np.argmin(aics)
    best_result = results[best_model_index]

    # print(best_result.summary())

    if residuals:
        return best_result.resid

    else:
        return best_result.fittedvalues

In [450]:
# Let us detrend

cumsum_detrended = capm_residuals.cumsum().apply(detrending_series)

cumsum_detrended

In [451]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# AMZN Residual Returns
cumsum_detrended['AMZN'].plot(color = 'blue', ax = ax1)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'AMZN Residual Returns', 
    color='blue'
    )

# WMT Residual Returns
ax2 = ax1.twinx()

cumsum_detrended['WMT'].plot(color = 'red', ax = ax2)
ax2.set_ylabel(
    'WMT Residual Returns', 
    color='red'
    )

plt.show()

In [452]:
# Before 2020: all data up to 2019-12-31
corr_before_2020 = cumsum_detrended['AMZN'].loc[:'2019-12-31'].corr(
    cumsum_detrended['WMT'].loc[:'2019-12-31'])

# After 2020: all data from 2020-01-01 onwards
corr_after_2020 = cumsum_detrended['AMZN'].loc['2020-01-01':].corr(
    cumsum_detrended['WMT'].loc['2020-01-01':])

print(f"The correlation between AMZN and WMT before 2020: {corr_before_2020}")
print(f"The correlation between AMZN and WMT after 2020: {corr_after_2020}")

In [453]:
# Calculate Rolling Correlation

rolling_corr_252d_cumsum = cumsum_detrended['AMZN'].rolling(window=252).corr(cumsum_detrended['WMT'])
rolling_corr_252d = capm_residuals['AMZN'].rolling(window=252).corr(capm_residuals['WMT'])

In [454]:
# Plot Residuals

# Crear el gráfico de la serie de tiempo y las medias móviles exponenciales
plt.figure(figsize=(10, 6))
plt.plot(rolling_corr_252d_cumsum, label='Correlation AMZN vs WMT', color='red', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Añadir título y etiquetas
plt.title('Rolling Correlation Time Series')
plt.xlabel('Time')
plt.ylabel('Correlation')
plt.legend()
plt.grid()

# Mostrar el gráfico
plt.show()

In [455]:
# Plot Residuals

# Crear el gráfico de la serie de tiempo y las medias móviles exponenciales
plt.figure(figsize=(10, 6))
plt.plot(rolling_corr_252d, label='Correlation AMZN vs WMT', color='red', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Añadir título y etiquetas
plt.title('Rolling Correlation Time Series')
plt.xlabel('Time')
plt.ylabel('Correlation')
plt.legend()
plt.grid()

# Mostrar el gráfico
plt.show()

In [456]:
def portfolio_variance(
    weights, 
    returns
):
    weights = np.array(weights)
    if isinstance(returns, pd.DataFrame):
        returns = returns.values
    
    cov_matrix = np.cov(returns.T)  # matriz de varianzas-covarianzas
    port_var = weights.T @ cov_matrix @ weights
    return port_var

In [457]:
# Create the portfolio
portfolio_df = df_returns[['AMZN', 'WMT']]
weights = [0.5, 0.5]

port_var = portfolio_variance(
    weights,
    portfolio_df
)

port_var

In [458]:
def rolling_portfolio_variance(returns_df, weights, window=252):
    weights = np.array(weights)

    rolling_vars = []
    index = returns_df.index

    for i in range(window - 1, len(returns_df)):
        window_returns = returns_df.iloc[i - window + 1 : i + 1]
        cov_matrix = np.cov(window_returns.T)
        var = weights.T @ cov_matrix @ weights
        rolling_vars.append(var)

    result = pd.Series([np.nan] * (window - 1) + rolling_vars, index=index)
    return result


In [493]:
# Calculamos la varianza rolling
rolling_var = rolling_portfolio_variance(portfolio_df, weights)
rolling_var = rolling_var * 100

rolling_var.dropna()

In [498]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# Potfolio Variance
rolling_var.dropna().plot(color = 'turquoise', ax = ax1)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'Portfolio Variance', 
    color='turquoise'
    )

# Rolling Correlation
ax2 = ax1.twinx()

rolling_corr_252d.plot(color = 'orange', ax = ax2)
ax2.set_ylabel(
    'Rolling Correlation', 
    color='orange'
    )

plt.show()