In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import time



# Ticker returns

In [None]:

# Get the tickers for SP-100
url = "https://en.wikipedia.org/wiki/S%26P_100"

# Read the tables on the page
tables = pd.read_html(url)

tickers = tables[2]['Symbol'].tolist()
tickers = [ticker.replace('.', '-') for ticker in tickers]


# Download weekly adjusted close prices
stock_prices         = yf.download(tickers, start="2020-01-01", end="2023-12-31", auto_adjust = False)
stock_prices = stock_prices.resample('W').last()
stock_prices.index   = stock_prices.index.tz_localize(None)      # change yf date format to match pdr
stock_prices         = stock_prices.filter(like='Adj Close')

# Drop columns with too many missing values (e.g. due to IPOs)
stock_prices = stock_prices.dropna(axis=1, thresh=int(0.9 * len(stock_prices)))

returns = stock_prices.pct_change().dropna().rename(columns={"Adj Close": "Return"})
returns = returns.xs('Return', axis=1, level=0)
returns.columns.name = None


returns.to_csv("data/returns.csv")


# SP100 Returns

In [6]:
split_point = int(0.7 * len(returns))
out_sample =returns.iloc[split_point:].copy()
out_sample_tall = out_sample.reset_index().melt(id_vars=["Date"], var_name="Ticker", value_name="Return")
market_caps = {}
selected_tickers = returns.columns
for ticker in selected_tickers:
    try:
        info = yf.Ticker(ticker).info
        market_caps[ticker] = info.get('marketCap', 0)
    except Exception as e:
        market_caps[ticker] = 0
total_market_value = sum(market_caps.values())

tickers = []
weights_list = []
for ticker in selected_tickers:
    cap = market_caps[ticker]
    weight = cap / total_market_value
    tickers.append(ticker)
    weights_list.append(weight)

sp_weights = pd.DataFrame({'Ticker': tickers, 'Weight': weights_list})
sp_weights.to_csv("sp100weights.csv")
sp100 = pd.merge(out_sample_tall, sp_weights, on=['Ticker'], how='inner')
sp100['Weighted_Return'] = sp100['Return'] * sp100['Weight']
sp100_returns = sp100.groupby('Date')['Weighted_Return'].sum().reset_index()
sp100_returns = sp100_returns.rename(columns={'Weighted_Return': 'Portfolio_Return'})

sp100_returns.to_csv('data/sp100returns.csv')

# Attributes

In [None]:
tickers = list(returns.columns)[1:]  

data = []

for ticker in tickers:
    try:
        info = yf.Ticker(ticker).info
        row = {
            'Ticker': ticker,
            'MarketCap': info.get('marketCap', 0),
            'Sector': info.get('sector', 'Unknown'),
            'Country': info.get('country', 'Unknown')
        }
        data.append(row)
        time.sleep(1)  
    except Exception as e:
        print(f"Error retrieving {ticker}: {e}")
        data.append({'Ticker': ticker, 'MarketCap': 0, 'Sector': 'Unknown', 'Country': 'Unknown'})
        
        
df = pd.DataFrame(data)

# Size classification
df['SmallCap'] = (df['MarketCap'] < 2e9).astype(int)
df['MidCap'] = ((df['MarketCap'] >= 2e9) & (df['MarketCap'] < 1e10)).astype(int)
df['LargeCap'] = (df['MarketCap'] >= 1e10).astype(int)



# Sector classification
df['Tech'] = df['Sector'].str.contains('Technology', case=False, na=False).astype(int)
df['Finance'] = df['Sector'].str.contains('Financial|Bank', case=False, na=False).astype(int)
df['Healthcare'] = df['Sector'].str.contains('Health', case=False, na=False).astype(int)
df['Consumer'] = df['Sector'].str.contains('Consumer', case=False, na=False).astype(int)
df['Energy'] = df['Sector'].str.contains('Energy|Oil|Gas', case=False, na=False).astype(int)
df['Industrial'] = df['Sector'].str.contains('Industrials', case=False, na=False).astype(int)
df['Utilities'] = df['Sector'].str.contains('Utilities', case=False, na=False).astype(int)

# Country classification
df['International'] = (df['Country'] != 'United States').astype(int)
df['Domestic'] = (df['Country'] == 'United States').astype(int)

binary_df = df[['Ticker', 'SmallCap', 'MidCap', 'LargeCap',
                'Tech', 'Finance', 'Healthcare', 'Consumer',
                'Energy', 'Industrial', 'Utilities',
                'International', 'Domestic']]

binary_df.to_csv("data/ticker_attributes.csv", index=False)


# Get Index Results

In [6]:
from attribution import attribute_index
from max_corr import max_corr_index

qs=[2, 3, 5, 7, 10, 15, 20, 25, 50]
ms=[2]

mc_corr, mc_returns = max_corr_index(qs, ms)

corrs_df = pd.DataFrame(
    [(q, m, corr) for (q, m), corr in mc_corr.items()],
    columns=['q', 'm', 'Correlation']
)

corrs_df.to_csv('data/mc_corr.csv')
mc_returns.to_csv('data/mc_ret.csv')

# attributes

att_corrs, combined_returns = attribute_index(qs, ms)

corrs_df = pd.DataFrame(
    [(q, m, corr) for (q, m), corr in att_corrs.items()],
    columns=['q', 'm', 'Correlation']
)

corrs_df.to_csv('data/att_corr.csv')
combined_returns.to_csv('data/att_ret.csv')

Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1:Gurobi 12.0.1: