In [1]:
#inspect_stock_data.ipynb
#
#by Joe Hahn
#jmh.datasciences@gmail.com
#23 January 2018
#
#this reads some NYSE data and makes some charts

In [2]:
#prep matplotlib
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(font_scale=1.5)

In [3]:
#import numpy and pandas
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

In [4]:
#select date ranges, for plotting and for models
data_date_range  = ('2015-10-01', '2017-03-01')
plot_date_range  = ('2016-03-01', '2018-01-12')
model_date_range = ('2017-01-12', '2018-01-12')

In [5]:
#read NYSE data
path = 'private/eoddata/NYSE_*.zip'
drop_holidays=True
from helper_fns import *
market = read_nyse_data(path, start_date=data_date_range[0], end_date=data_date_range[1], drop_holidays=drop_holidays)
print market.dtypes
market.head()

reading file =  private/eoddata/NYSE_2013.zip
reading file =  private/eoddata/NYSE_2014.zip
reading file =  private/eoddata/NYSE_2015.zip
reading file =  private/eoddata/NYSE_2016.zip
reading file =  private/eoddata/NYSE_2017.zip
reading file =  private/eoddata/NYSE_2018.zip
ticker            object
date      datetime64[ns]
open             float64
high             float64
low              float64
close            float64
vol                int64
dtype: object


Unnamed: 0,ticker,date,open,high,low,close,vol
1729427,A,2015-10-01,34.32,34.48,33.48,33.74,3011400
1729428,AA,2015-10-01,29.13,29.55,27.57,27.78,36864900
1729429,AAC,2015-10-01,22.27,22.69,22.11,22.59,197400
1729430,AAN,2015-10-01,36.25,36.62,35.98,36.32,597500
1729431,AAP,2015-10-01,190.28,192.64,188.1,192.04,1586900


In [19]:
ticker = 'GE'
idx = (market['ticker'] == ticker)
df = market[idx]
df = df.set_index('date', drop=True)
df.head()

Unnamed: 0_level_0,ticker,open,high,low,close,vol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-10-01,GE,25.15,25.31,24.79,25.19,39723500
2015-10-02,GE,24.87,25.49,24.83,25.47,42676200
2015-10-05,GE,26.37,27.2,26.22,26.82,105423300
2015-10-06,GE,26.99,27.41,26.96,27.29,71909400
2015-10-07,GE,27.5,27.77,27.32,27.77,65845300


In [23]:
r = df['open'].resample('w').first()

In [26]:
#r = df.groupby([pd.TimeGrouper('w'), 'ticker']).first()
pd.Grouper?

In [32]:
#r = df.groupby(pd.Grouper(key='ticker', freq='w'))
r = df.groupby(pd.Grouper(level='date', freq='w')).first()
r.head()

Unnamed: 0_level_0,ticker,open,high,low,close,vol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-10-04,GE,25.15,25.31,24.79,25.19,39723500
2015-10-11,GE,26.37,27.2,26.22,26.82,105423300
2015-10-18,GE,27.98,28.17,27.91,28.09,25239600
2015-10-25,GE,28.8,29.56,28.75,28.99,131036300
2015-11-01,GE,29.52,29.66,29.41,29.55,48757400


<pandas.core.groupby.DataFrameGroupBy object at 0x1a0a4d8c90>

In [7]:
#compute volume of shares traded each day
daily_volume = market.groupby('date')['vol'].sum()
daily_volume.head(10)

date
2015-10-01    3590656500
2015-10-02    3961370900
2015-10-05    3841454200
2015-10-06    3776969500
2015-10-07    4162598800
2015-10-08    3432836300
2015-10-09    3332670600
2015-10-12    2512890600
2015-10-13    3037652600
2015-10-14    3306149600
Name: vol, dtype: int64

In [None]:
#plot daily volume vs date
fig, ax = plt.subplots(figsize=(16, 6))
xp = daily_volume.index
yp = daily_volume.values/1.0e9
p = ax.plot(xp, yp, linestyle='-')
ax.set_title('NYSE daily share volume')
ax.set_xlabel('date')
ax.set_ylabel('shares traded    (G)')
ax.set_ylim(0, 8)
ax.set_xlim(plot_date_range)
plt.savefig('figs/volume.png')

In [None]:
#compute volume-weighted share price versus date
def volume_weighted_share_price(record):
    vwsp = None
    cv = record['close']*record['vol']
    v_total = record['vol'].sum()
    if (v_total > 0.0):
        vwsp = cv.sum()/v_total
    return vwsp
vwsp = market.groupby('date').apply(volume_weighted_share_price)

In [None]:
#plot volume-weighted share price vs date
fig, ax = plt.subplots(figsize=(16, 6))
xp = vwsp.index
yp = vwsp.values
p = ax.plot(xp, yp, linestyle='-')
ax.set_title('NYSE volume-weighted mean share price')
ax.set_xlabel('date')
ax.set_ylabel('volume-weighted share price')
ax.set_ylim(31, 45)
ax.set_xlim(plot_date_range)
plt.savefig('figs/vwsp.png')

In [None]:
#plot fractional volume-weighted gain vs date
fig, ax = plt.subplots(figsize=(16, 6))
xp = vwsp.index
yp = vwsp.values
yp = (yp - yp[0])/yp[0]
p = ax.plot(xp, yp, linestyle='-')
p = ax.set_title('fractional change in NYSE volume-weighted mean share price')
p = ax.set_xlabel('date')
p = ax.set_ylabel('fractional gain')
p = ax.set_xlim(plot_date_range)
plt.savefig('figs/fractional_vwsp.png')

In [None]:
#simulate mean outcome for 25 people, each trading $10K worth of randomly selected tickers daily
N_buyers = 25
N_tickers = 5
initial_dollars = 10000.0
idx = (market['date'] >= model_date_range[0]) & (market['date'] <= model_date_range[1])
market_data = market[idx]
ran_purchases = random_purchases(market_data, N_buyers, N_tickers, initial_dollars)

In [None]:
#plot mean fractional gain exhibited by random purchasers
fig, ax = plt.subplots(figsize=(16, 6))
xp = ran_purchases.index.values
yp = ran_purchases['dollars_mean'].values
err = ran_purchases['dollars_std'].values/yp[0]
yp = (yp -yp[0])/yp[0]
p = ax.plot(xp, yp, linestyle='-')
p = ax.errorbar(xp, yp, err, linestyle='none', alpha=0.5)
ax.set_title('simulated outcome for daily random purchases')
ax.set_xlabel('date')
ax.set_ylabel('fractional gain')
ax.set_xlim(model_date_range)
plt.savefig('figs/random_purchases.png')