<a href="https://colab.research.google.com/github/alexandreib/QuantDesign/blob/main/QD%20%7C%20SP500%20Portfolio%20Allocation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Convex Sharpe Ratio Optimization, to Portfolio Optimization, on S&P 500 Past Returns**



## Imports

In [45]:
import pandas as pd
import numpy as np
import yfinance as yf
import cvxpy as cp
import scipy.optimize as opt

## Download SP500 Constituents over the last 10 years and refactor to get daily log returns

In [11]:
# Download the S&P 500 constituents from Wikipedia
try:
    table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    df = table[0]
    tickers = df['Symbol'].tolist()
except Exception as e:
    print(f"Error downloading S&P 500 tickers: {e}")
    tickers = [] # Handle the error gracefully, e.g., provide a default list

# Print or use the tickers list
print(tickers)

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AMTM', 'AEE', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BAX', 'BDX', 'BRK.B', 'BBY', 'TECH', 'BIIB', 'BLK', 'BX', 'BK', 'BA', 'BKNG', 'BWA', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BLDR', 'BG', 'BXP', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CPAY', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CRWD', 'CCI', 'CSX', 'CMI', 'CVS', 'DHR', '

In [12]:
# Define the start and end dates for the data
end_date = pd.Timestamp.today()
start_date = end_date - pd.Timedelta(days=365 * 10)

# Download the data
df = yf.download(tickers, start=start_date, end=end_date)

# Print the data (optional)
df.head()

[*********************100%***********************]  503 of 503 completed
ERROR:yfinance:
2 Failed downloads:
ERROR:yfinance:['BF.B']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2014-10-31 02:34:17.216021 -> 2024-10-28 02:34:17.216021)')
ERROR:yfinance:['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Price,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-10-31 00:00:00+00:00,36.393883,23.973495,41.956585,,36.026062,18.773333,68.768219,70.120003,40.179722,35.525318,...,479916,4841500,1527800,3632900,17493700,1161000,8379384,1530786,562800,2589500
2014-11-03 00:00:00+00:00,37.588356,24.284267,41.817745,,35.778122,18.799999,68.641083,69.910004,40.260708,35.714294,...,504566,3013800,1222900,3243800,13410000,781100,4277047,813082,833000,4071300
2014-11-04 00:00:00+00:00,36.980907,24.106682,41.427677,,36.042591,18.91,68.793655,71.07,40.042061,37.445213,...,440467,2763400,2088000,3709800,13498500,693800,3248402,1050703,1544000,9723700
2014-11-05 00:00:00+00:00,36.934875,24.1644,41.348331,,36.083912,18.906668,69.921135,71.370003,40.511719,38.027218,...,309965,3056200,2938200,4556000,12480000,823700,2750842,1231983,1475600,11878900
2014-11-06 00:00:00+00:00,38.076153,24.233509,41.460724,,36.108704,19.026667,70.565422,72.099998,40.665585,37.770222,...,398564,2924200,3081700,8182500,14731400,1218200,3727741,764054,816600,4457100


In [13]:
# Reshape the DataFrame
df = df['Adj Close'].reset_index()
df = pd.melt(df, id_vars='Date', value_vars=tickers, var_name='Ticker', value_name='Adj Close')
df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.rename(columns={'index': 'Ticker'})
# Print or use the transformed DataFrame
df.head()

Unnamed: 0,Date,Ticker,Adj Close
0,2014-10-31,MMM,91.776772
1,2014-11-03,MMM,91.496262
2,2014-11-04,MMM,92.540749
3,2014-11-05,MMM,92.922722
4,2014-11-06,MMM,93.137581


In [19]:
# Calculate daily log returns
df['Daily_Log_Return'] = np.log(df['Adj Close'] / df['Adj Close'].shift(1))

# Calculate quarterly log returns
df['Quarterly_Log_Return'] = df.groupby('Ticker')['Daily_Log_Return'].rolling(window=63, min_periods=1).sum().reset_index(0,drop=True)

In [88]:
def calculate_shrink_cov_matrix (df) :
    masked_arr = np.ma.array(df, mask=np.isnan(df))
    cov_numpy = np.ma.cov(masked_arr, rowvar=False, allow_masked=True, ddof=1).data
    n_samples, n_features = df.shape
    alpha = np.mean(cov_numpy**2)
    mu = np.trace(cov_numpy) / n_features
    mu_squared = mu**2
    num = alpha + mu_squared
    den = (n_samples + 1) * (alpha - mu_squared / n_features)
    shrinkage = 1.0 if den == 0 else min(num / den, 1.0)
    shrunk_cov = (1.0 - shrinkage) * cov_numpy
    shrunk_cov.flat[:: n_features + 1] += shrinkage * mu
    return shrunk_cov

returns_matrix = pd.pivot_table(df[['Date','Ticker', 'Quarterly_Log_Return']], index ='Date', columns = 'Ticker', aggfunc='mean')
cov_matrix = calculate_shrink_cov_matrix(returns_matrix)

In [89]:
def calculate_portfolio_variance(weights, cov_matrix):
    return np.dot(weights.T, np.dot(cov_matrix, weights))

def calculate_portfolio_returns(weights, returns):
    return np.dot(weights, returns)

### Using Negative Sharpe Ratio, as we will use scipy.optimize.minimize
def neg_sharpe_ratio_objective(weights, returns, cov_matrix, risk_free_rate = 0):
    portfolio_returns = np.squeeze(calculate_portfolio_returns(weights, returns))
    portfolio_variance = np.squeeze(calculate_portfolio_variance(weights, cov_matrix))
    return -((portfolio_returns - risk_free_rate)/np.sqrt(portfolio_returns))

In [90]:
returns = returns_matrix.iloc[-1].values
#returns_matrix.mean().values.T @ weights # Can you the mean returns (but for 10 years quaterly return, I don't think it makes sense)

init_guess = np.array([1/len(returns) for _ in range(len(returns))])

result = opt.minimize(fun=neg_sharpe_ratio_objective,
                        x0=init_guess,
                        args=(returns, cov_matrix),
                        method='SLSQP',
                        bounds=tuple((0,1) for _ in range(len(returns))),
                        constraints=({'type': 'eq', 'fun': lambda x: np.sum(x) - 1}))

print(result)

In [91]:
result

 message: Optimization terminated successfully
 success: True
  status: 0
     fun: -0.7462198715083406
       x: [ 2.752e-16  3.809e-16 ...  0.000e+00  0.000e+00]
     nit: 10
     jac: [ 3.495e-02 -4.004e-02 ... -4.821e-02  6.741e-03]
    nfev: 5020
    njev: 10