In [1]:
import yfinance as yf
import pandas as pd
import numpy as np


# Get the tickers for SP-100
url = "https://en.wikipedia.org/wiki/S%26P_100"

# Read the tables on the page
tables = pd.read_html(url)

tickers = tables[2]['Symbol'].tolist()
tickers = [ticker.replace('.', '-') for ticker in tickers]


# Download weekly adjusted close prices
stock_prices         = yf.download(tickers, start="2020-01-01", end="2023-12-31", auto_adjust = False)
stock_prices = stock_prices.resample('W').last()
stock_prices.index   = stock_prices.index.tz_localize(None)      # change yf date format to match pdr
stock_prices         = stock_prices.filter(like='Adj Close')

# Drop columns with too many missing values (e.g. due to IPOs)
stock_prices = stock_prices.dropna(axis=1, thresh=int(0.9 * len(stock_prices)))

returns = stock_prices.pct_change().dropna().rename(columns={"Adj Close": "Return"})
returns = returns.xs('Return', axis=1, level=0)
returns.columns.name = None


returns.to_csv("returns.csv")


[*********************100%***********************]  101 of 101 completed


In [5]:
split_point = int(0.7 * len(returns))
out_sample =returns.iloc[split_point:].copy()
out_sample_tall = out_sample.reset_index().melt(id_vars=["Date"], var_name="Ticker", value_name="Return")
market_caps = {}
selected_tickers = returns.columns
for ticker in selected_tickers:
    try:
        info = yf.Ticker(ticker).info
        market_caps[ticker] = info.get('marketCap', 0)
    except Exception as e:
        market_caps[ticker] = 0
total_market_value = sum(market_caps.values())

tickers = []
weights_list = []
for ticker in selected_tickers:
    cap = market_caps[ticker]
    weight = cap / total_market_value
    tickers.append(ticker)
    weights_list.append(weight)

sp_weights = pd.DataFrame({'Ticker': tickers, 'Weight': weights_list})
sp100 = pd.merge(out_sample_tall, sp_weights, on=['Ticker'], how='inner')
sp100['Weighted_Return'] = sp100['Return'] * sp100['Weight']
sp100_returns = sp100.groupby('Date')['Weighted_Return'].sum().reset_index()
sp100_returns = sp100_returns.rename(columns={'Weighted_Return': 'Portfolio_Return'})

sp100_returns.to_csv('sp100returns.csv')