In [1]:
# !pip install yahoo_fin
from yahoo_fin.stock_info import get_data
import pandas as pd

In [2]:
# write a function to loop through the list of tickers and get the data
def get_stock_data(ticker_list, start_date, end_date):
    data = pd.DataFrame()
    bad_tickers = []
    for ticker in ticker_list:
        try: 
            df = get_data(ticker=ticker, start_date=start_date, end_date=end_date, 
                          index_as_date=False, interval='1d')
            df = df[['date', 'ticker', 'adjclose']]
            data = pd.concat([data, df], axis=0)
        except:
            bad_tickers.append(ticker)
            continue
        
    data['date'] = pd.to_datetime(data['date'])
    return data, bad_tickers

# data = get_stock_data(['amzn'], '04/29/2014', '04/28/2024')
# data

In [3]:
# get the stock prices for the S&P 500
stock_prices, bad_tickers = get_stock_data(['^GSPC'], '01/01/2014', '04/29/2024')
stock_prices.head()

Unnamed: 0,date,ticker,adjclose
0,2014-01-02,^GSPC,1831.97998
1,2014-01-03,^GSPC,1831.369995
2,2014-01-06,^GSPC,1826.77002
3,2014-01-07,^GSPC,1837.880005
4,2014-01-08,^GSPC,1837.48999


In [4]:
# calculate the daily returns
stock_prices['daily_return'] = stock_prices.groupby('ticker')['adjclose'].pct_change()
stock_prices.head()

Unnamed: 0,date,ticker,adjclose,daily_return
0,2014-01-02,^GSPC,1831.97998,
1,2014-01-03,^GSPC,1831.369995,-0.000333
2,2014-01-06,^GSPC,1826.77002,-0.002512
3,2014-01-07,^GSPC,1837.880005,0.006082
4,2014-01-08,^GSPC,1837.48999,-0.000212


In [5]:
# calculate the cumulative returns
stock_prices['cum_return'] = (1 + stock_prices['daily_return']).cumprod() - 1

In [6]:
# fill NaN values with 0
stock_prices.fillna(0, inplace=True)

In [7]:
# save the data to a csv file
stock_prices.to_csv('../../data/outputs/s&p500_returns.csv', index=False)
stock_prices.to_csv('../../data/outputs/s&p500_returns.csv.gz', index=False, compression='gzip')