In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

import yfinance as yf

In [2]:
mydateparser = lambda x: datetime.strptime(x, "%Y-%m-%d")
snp = pd.read_csv("../data/snp_allstocks_2015_2019.csv", index_col='Date', parse_dates=True, date_parser=mydateparser)
info = pd.read_csv('../data/snp_info.csv', index_col=0)

---------------
Sort Stocks By Liquidity (2016-2019)
---------

Sort stocks in S&P 500 Index based on average trading volume in the period 2016-01-01 to 2019-01-01.

In [3]:
volume = pd.Series(index=snp.columns, dtype=float)

for stock in snp.columns:
    data = yf.download(stock, start='2016-01-02', end='2019-01-01')
    volume[stock] = data['Volume'].mean()    

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [4]:
volume = volume.dropna()
volume = volume.sort_values(ascending=False)
volume

AAPL    1.327973e+08
BAC     8.509439e+07
AMD     6.012129e+07
GE      5.951927e+07
F       3.899485e+07
            ...     
HII     3.844816e+05
AZO     3.724928e+05
RE      3.454800e+05
TFX     3.071094e+05
MTD     1.837668e+05
Length: 476, dtype: float64

---------------
Sort Stocks By Liquidity (2016-2019)
---------

Choose eight sectors:
 - Communication Services
 - Consumer Discretionary
 - Consumer Staples
 - Energy
 - Financials
 - Health Care
 - Industrials
 - Information Technology
 

Select the most liquid 10 stocks in each of these sectors for analysis.

In [5]:
stocks_by_sector = {sector: [] for sector in info['GICS Sector'].unique()}

for stock in volume.index[:160]:
    sector = info.set_index('Symbol').loc[stock]['GICS Sector']
    stocks_by_sector[sector].append(stock)

In [6]:
# Select the top 10 stocks (by liquidity) within each GICS Sector
stocks = {}
for sector in stocks_by_sector:
    stocks[sector] = stocks_by_sector[sector][:10]

# select eight sectors for analysis
# order sectors alphabetically
tmp = {}
tmp['Communication Services'] = stocks['Communication Services']
tmp['Consumer Discretionary'] = stocks['Consumer Discretionary']
tmp['Consumer Staples'] = stocks['Consumer Staples']
tmp['Energy'] = stocks['Energy']
tmp['Financials'] = stocks['Financials']
tmp['Health Care'] = stocks['Health Care']
tmp['Industrials'] = stocks['Industrials']
tmp['Information Technology'] = stocks['Information Technology']

stocks = tmp
# stocks

In [7]:
df = pd.DataFrame(stocks)
df

Unnamed: 0,Communication Services,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology
0,T,F,KO,MRO,BAC,PFE,GE,AAPL
1,TWTR,GM,KR,KMI,WFC,MRK,DAL,AMD
2,FB,EBAY,PG,XOM,C,GILD,CSX,MU
3,CMCSA,SBUX,WMT,HAL,RF,BMY,AAL,MSFT
4,VZ,NKE,MDLZ,WMB,JPM,BSX,LUV,INTC
5,NFLX,M,MO,COP,KEY,ABT,FAST,CSCO
6,DIS,MGM,COTY,SLB,MS,CVS,CAT,HPE
7,ATVI,TJX,WBA,DVN,HBAN,ABBV,JCI,ORCL
8,IPG,TGT,PM,CVX,SCHW,JNJ,UAL,NVDA
9,DISCA,NWL,PEP,COG,SYF,MDT,UNP,AMAT


In [8]:
df.to_csv('stocks_by_sector.csv', index=False)