In [1]:
import pandas as pd
import yfinance as yf

In [2]:
snp_path = "../data/snp_allstocks_2015_2019.csv"
snp = pd.read_csv(
    snp_path,
    index_col='Date',
    parse_dates=True,
    date_format="%Y-%m-%d",
)

snp_info_path = "../data/snp_info.csv"
info = (
    pd.read_csv(snp_info_path, index_col=0)
    .set_index("Symbol")
)

## Sort Stocks By Liquidity (2016-2019)

Sort stocks in S&P 500 Index based on average trading volume in the period 2016-01-01 to 2019-01-01.

In [3]:
volumes = pd.Series(index=snp.columns, dtype=float)
for stock_ticker in snp.columns:
    stock_data = yf.download(stock_ticker, start='2016-01-02', end='2019-01-01')
    volumes[stock_ticker] = stock_data['Volume'].mean()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['ATVI']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*******

In [4]:
volumes = volumes.dropna()
volumes = volumes.sort_values(ascending=False)
volumes

AAPL    1.327973e+08
AMZN    8.857717e+07
BAC     8.509439e+07
AMD     6.012129e+07
NVDA    5.587807e+07
            ...     
ESS     3.988901e+05
HII     3.844816e+05
AZO     3.724928e+05
TFX     3.071094e+05
MTD     1.837668e+05
Length: 444, dtype: float64

## Sort Stocks By Liquidity (2016-2019)

Choose eight sectors:

- Communication Services
- Consumer Discretionary
- Consumer Staples
- Energy
- Financials
- Health Care
- Industrials
- Information Technology

Select the most liquid 10 stocks in each of these sectors for analysis.

In [5]:
SECTORS_TO_ANALYZE = {
    "Communication Services",
    "Consumer Discretionary",
    "Consumer Staples",
    "Energy",
    "Financials",
    "Health Care",
    "Industrials",
    "Information Technology",
}

stocks_by_sector = {
    sector: []
    for sector in info["GICS Sector"].unique()
    if sector in SECTORS_TO_ANALYZE
}

for stock_ticker in volumes.index[:250]:
    sector = info.loc[stock_ticker]["GICS Sector"]
    if (
        sector in stocks_by_sector
        and len(stocks_by_sector[sector]) < 10
    ):
        stocks_by_sector[sector].append(stock_ticker)

stocks_by_sector_df = pd.DataFrame(stocks_by_sector).sort_index(axis=1)
stocks_by_sector_df

Unnamed: 0,Communication Services,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology
0,T,AMZN,WMT,MRO,BAC,PFE,CSX,AAPL
1,GOOGL,F,KO,KMI,WFC,MRK,GE,AMD
2,GOOG,GM,KR,XOM,C,GILD,DAL,NVDA
3,CMCSA,EBAY,PG,HAL,RF,BMY,AAL,MU
4,VZ,SBUX,MDLZ,WMB,JPM,BSX,CPRT,MSFT
5,NFLX,NKE,MO,COP,KEY,ABT,LUV,INTC
6,DIS,M,MNST,SLB,MS,CVS,FAST,CSCO
7,IPG,MGM,COTY,DVN,HBAN,ABBV,CAT,HPE
8,EA,TJX,WBA,CVX,SCHW,JNJ,JCI,ORCL
9,DISH,TGT,PM,MPC,SYF,MDT,UAL,AMAT


In [6]:
stocks_by_sector_df.to_csv('../data/stocks_by_sector.csv', index=False)