In [1]:
from pandas_datareader import data as pddr
import pandas as pd
import datetime
import requests
from bs4 import BeautifulSoup
import numpy as np

WIKI_URL = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

req = requests.get(WIKI_URL)
soup = BeautifulSoup(req.content, 'lxml')
table_classes = {"class": ["sortable", "plainrowheaders"]}
wikitables = soup.findAll("table", table_classes)

rows = wikitables[0].findAll('tr')
headers = [i.text for i in rows[0].findAll('th')]
table_data = map(lambda x:[i.text for i in x.findAll('td')], rows[1:])
sp = pd.DataFrame(table_data, columns = headers)
sp['Ticker symbol'] = sp['Ticker symbol'].astype(str)

In [2]:
dfs = np.array_split(sp, 3)
raw_data = []
for df in dfs:
    # Define the instruments to download. We would like to see Apple, Microsoft and the S&P500 index.
    tickers = list(df['Ticker symbol'].values)

    # Define which online source one should use
    data_source = 'yahoo'

    # We would like all available data from 01/01/2000 until 12/31/2016.
    # Google ignores start/end dates, gives you a year from end data
    start_date = '2017-9-20'
    end_date = '2017-9-22'

    # User pandas_reader.data.DataReader to load the desired data. As simple as that.
    panel_data = pddr.DataReader(tickers, data_source, start_date, end_date)
    raw_data.append(panel_data)



In [20]:
data = pd.concat(raw_data, axis=2)

In [21]:
today_str = datetime.date.today().strftime('%Y-%m-%d')

today_close = data.loc['Close'][today_str]
yst_close = data.loc['Close'].shift(-1)[today_str]
eod_delta = (today_close - yst_close)/yst_close * 100

In [26]:
eod_delta.loc[today_str].sort_values().round(2)

DVA     -6.15
INCY    -3.65
SCG     -3.43
WRK     -3.37
MHK     -3.32
MAT     -3.20
HCP     -2.45
TIF     -2.24
VTR     -2.19
PKG     -2.18
ARNC    -2.16
IP      -2.16
MGM     -2.06
CF      -2.03
KHC     -1.89
HCN     -1.72
SBAC    -1.65
NI      -1.65
ETR     -1.55
DLR     -1.48
TDG     -1.46
FTI     -1.40
SJM     -1.26
CMS     -1.25
XEL     -1.25
AEP     -1.23
ES      -1.19
AMT     -1.16
CPB     -1.16
GGP     -1.16
         ... 
LEG      1.78
GPS      1.79
EXPE     1.85
GPC      1.89
IVZ      1.91
VZ       1.96
WYN      1.98
HES      1.99
SYF      2.01
AAP      2.04
CSCO     2.05
HRB      2.06
IPG      2.06
ABT      2.10
KSS      2.51
ALK      2.52
TXN      2.58
LRCX     2.58
STX      2.60
PAYX     2.68
COO      3.17
HPE      3.41
ORLY     3.62
AZO      3.71
NAVI     5.35
EFX      6.91
LB       7.51
KMX      7.77
BF.B      NaN
BRK.B     NaN
Name: 2017-09-22 00:00:00, dtype: float64

to do:
- choose a scope of eod data to analyze
- analyze to pick a cutoff - eye towards highly volatile swings
- overall: how to pick emotionally driven quick downward swings that come back up within 6 months? ie. first solar
    - how fast was the first solar downward tumble?