In [1]:
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
import yfinance as yf
import numpy as np

  _empty_series = pd.Series()


In [2]:
sp500_constituents = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
sp500_constituents.drop(sp500_constituents.columns[[1, 2, 3, 4, 5, 6, 7]], axis=1, inplace=True)

In [3]:
sp500_list = sp500_constituents['Symbol'].to_list()

sp500_list.remove('BRK.B')
sp500_list.remove('BF.B')

In [4]:
final_df = pd.DataFrame({'Stock Code': sp500_list,
                             'Momentum Score (C)': np.nan})

In [5]:
start = dt.datetime(2019, 1, 1)
end = dt.datetime(2023, 12, 31)

In [6]:
# Creating an empty dataframe for all of the stocks of S&P 500
sp500_histdata = pd.DataFrame({
    'Date':pd.date_range(start, end)
    }
)
#sp500_histdata[sp500_list] = np.nan

In [None]:
sp500_histdata.tail()

In [None]:
# Data extraction using STOOQ

name = 'AMZN'
df = web.DataReader(name, 'stooq', start, end)
df.rename(columns={'Close':name}, inplace=True)
df.reset_index(inplace=True)
#df.drop(df.columns[[1, 2, 3, 5]], axis=1, inplace=True)

df.head()

In [9]:
# Data extraction using YFINANCE

amazon = yf.Ticker('AMZN')
amazon_data = amazon.history(period='5y')
amazon_data.reset_index(inplace=True)
amazon_data.drop(amazon_data.columns[[1, 2, 3, 5, 6, 7]], axis=1, inplace=True)

In [10]:
 # Download stock close prices of a list of stocks using yfinance

for stock in sp500_list:
    try:
        df = yf.Ticker(stock).history(period='5y')
        df.rename(columns={'Close':stock}, inplace=True)
        df.reset_index(inplace=True)
        df.drop(df.columns[[1, 2, 3, 5, 6, 7]], axis=1, inplace=True)
        df.to_csv(stock + '.csv')
    except:
        pass

In [29]:
# Changing format of Date column to datetime type

for stock in sp500_list:
    try:
        df = pd.read_csv(stock+'.csv')
        df['Date'] = pd.to_datetime(df['Date'], utc=True)
        df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
        df.to_csv(stock+'.csv')
    except:
        print ('error')


In [83]:
# Historical data prepared by joining stock price data of all S&P 500 stocks

for stock in sp500_list:
    df = pd.read_csv(stock + '.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df.drop(df.columns[[0, 1]], axis=1, inplace=True)
    sp500_histdata = pd.merge(left=sp500_histdata,
                     right=df,
                     left_on="Date",
                     right_on="Date",
                     how="outer")

sp500_histdata.to_csv('sp500_histdata.csv')
    

In [85]:
df = pd.read_csv('sp500_histdata.csv')
df.drop(df.columns[[0]], axis=1, inplace=True)
df.index = df['Date']
df.drop(df.columns[[0]], axis=1, inplace=True)
df.dropna(axis=0, how='all', inplace=True)

df.to_csv('sp500_histdata.csv')

In [None]:
df = pd.read_csv('sp500_histdata.csv')
df.index = df['Date']
df.drop(df.columns[[0]], axis=1, inplace=True)
df.index = pd.to_datetime(df.index)
df.head()

In [None]:
'''
Renaming pandas columns

for stock in sp500_list:
    df.rename(columns={stock+'_x':stock}, inplace=True)

'''

In [126]:
df['Date'] = pd.to_datetime(df['Date'])
df = df.resample('M', on='Date').last()

In [None]:
df = pd.read_csv('sp500_histdata.csv')
df.index = df['Date']
df.drop(df.columns[[0]], axis=1, inplace=True)

In [None]:
### Calculating 6 month and 12 month returns

#df['MMM_t-6'] = df['MMM'].shift(6)
#df['MMM_t-12'] = df['MMM'].shift(12)

In [150]:
# Adding 6m and 12m shift column to further calculate returns for the period

for stock in sp500_list:
    df = pd.read_csv(stock+'.csv')
    df[stock+'_6m'] = df[stock].shift(6)
    df[stock+'_12m'] = df[stock].shift(12)
    df.to_csv(stock+'.csv')

print('lets gooo!')

lets gooo!


In [None]:
# 6m and 12m returns minus 3 month us t bill rate = 5.22% as of 5th feb 2024

# Need to deduct t-bill return

for stock in sp500_list:
    df = pd.read_csv(stock+'.csv')
    df[stock+'_6m_ret'] = (( df[stock]/df[stock+'_6m'] ) - 1 )
    df[stock+'_12m_ret'] = (( df[stock]/df[stock+'_12m'] ) - 1 )
    df.to_csv(stock+'.csv')