In [9]:
!pip install alpha_vantage pandas

Collecting alpha_vantage
  Downloading https://files.pythonhosted.org/packages/ba/b4/d95f9e0eccea6732bab5a079772d453a4f0b68a9f63740d9cf320f92beaa/alpha_vantage-2.3.1-py3-none-any.whl
Collecting aiohttp (from alpha_vantage)
  Downloading https://files.pythonhosted.org/packages/85/8f/e4fb9574630ae3fe385549cd29174f57e2a0b6808b9109be9bdf6d5e8533/aiohttp-3.7.4.post0-cp36-cp36m-win_amd64.whl (630kB)
Collecting idna-ssl>=1.0; python_version < "3.7" (from aiohttp->alpha_vantage)
  Downloading https://files.pythonhosted.org/packages/46/03/07c4894aae38b0de52b52586b24bf189bb83e4ddabfe2e2c8f2419eec6f4/idna-ssl-1.1.0.tar.gz
Collecting yarl<2.0,>=1.0 (from aiohttp->alpha_vantage)
  Downloading https://files.pythonhosted.org/packages/69/c9/eb6492cf3b1d41665aaf2350e611e44dcadf21c3fbdaaf1b8a28f60b00c9/yarl-1.6.3-cp36-cp36m-win_amd64.whl (124kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->alpha_vantage)
  Downloading https://files.pythonhosted.org/packages/ab/9f/9663fa1f11d29fc7b87ebc3c461735a04275392

In [2]:
# Stock data source: Alpha Vantage, https://www.alphavantage.co/
import os
import pandas as pd
alphakey = os.environ.get("alphakey")

The ETF SPY is set as a proxy for the large cap market. The fund consists of company stocks in S&P 500 with equivalent $\beta$ = 1

In [3]:
# api call
from alpha_vantage.timeseries import TimeSeries
from pprint import pprint
ts = TimeSeries(key=alphakey, output_format='pandas')
data1, meta_data = ts.get_daily(symbol='SPY', outputsize='full')
pprint(data1)

             1. open   2. high    3. low  4. close    5. volume
date                                                           
2021-05-07  419.8900  422.8150  419.1600  422.1200   67733790.0
2021-05-06  415.8300  419.2100  413.6750  419.0700   74321368.0
2021-05-05  417.3800  417.6300  415.1500  415.7500   39960661.0
2021-05-04  416.0700  416.6000  411.6700  415.6200  101591166.0
2021-05-03  419.4300  419.8400  417.6650  418.2000   68128289.0
...              ...       ...       ...       ...          ...
1999-11-05  138.6250  139.1093  136.7812  137.8750    7431500.0
1999-11-04  136.7500  137.3593  135.7656  136.5312    7907500.0
1999-11-03  136.0000  136.3750  135.1250  135.5000    7222300.0
1999-11-02  135.9687  137.2500  134.5937  134.5937    6516900.0
1999-11-01  136.5000  137.0000  135.5625  135.5625    4006500.0

[5414 rows x 5 columns]


In [4]:
data1['change'] = (data1['4. close'] - data1['1. open'])/data1['1. open']

In [5]:
data1.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low", "4. close": "close", "5. volume": "volume"}, inplace=True)
data1.sort_values('date', inplace=True)
data1

Unnamed: 0_level_0,open,high,low,close,volume,change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-11-01,136.5000,137.0000,135.5625,135.5625,4006500.0,-0.006868
1999-11-02,135.9687,137.2500,134.5937,134.5937,6516900.0,-0.010113
1999-11-03,136.0000,136.3750,135.1250,135.5000,7222300.0,-0.003676
1999-11-04,136.7500,137.3593,135.7656,136.5312,7907500.0,-0.001600
1999-11-05,138.6250,139.1093,136.7812,137.8750,7431500.0,-0.005410
...,...,...,...,...,...,...
2021-05-03,419.4300,419.8400,417.6650,418.2000,68128289.0,-0.002933
2021-05-04,416.0700,416.6000,411.6700,415.6200,101591166.0,-0.001082
2021-05-05,417.3800,417.6300,415.1500,415.7500,39960661.0,-0.003905
2021-05-06,415.8300,419.2100,413.6750,419.0700,74321368.0,0.007792


In [6]:
# Save the dataframe in a csv file for uses as a label or modified label in predictive machine learning models
data1.to_csv('./Data/stock_spy.csv', index=True)

The ETF MDY is set as a proxy for the mid cap market. The fund consists of company stocks in S&P 400 with equivalent  𝛽  = 1

In [7]:
data2, meta_data = ts.get_daily(symbol='MDY', outputsize='full')
pprint(data2)

            1. open   2. high  3. low  4. close  5. volume
date                                                      
2021-05-07   498.54  505.2900  497.21    504.81   718340.0
2021-05-06   497.41  499.6100  492.12    499.54  1736103.0
2021-05-05   499.94  499.9400  495.78    496.97   774400.0
2021-05-04   496.48  498.1735  492.30    497.70  1103994.0
2021-05-03   501.73  502.7200  498.07    499.54   856729.0
...             ...       ...     ...       ...        ...
1999-11-05    79.00   79.1300   77.72     78.28   632500.0
1999-11-04    77.38   77.4700   76.69     77.25   419200.0
1999-11-03    76.50   76.7500   76.22     76.67   622700.0
1999-11-02    75.47   76.4700   75.41     75.73   823200.0
1999-11-01    75.25   75.9100   75.22     75.41   452100.0

[5414 rows x 5 columns]


In [8]:
data2['change'] = (data2['4. close'] - data2['1. open'])/data2['1. open']

In [9]:
data2.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low", "4. close": "close", "5. volume": "volume"}, inplace=True)
data2.sort_values('date', inplace=True)
data2

Unnamed: 0_level_0,open,high,low,close,volume,change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-11-01,75.25,75.9100,75.22,75.41,452100.0,0.002126
1999-11-02,75.47,76.4700,75.41,75.73,823200.0,0.003445
1999-11-03,76.50,76.7500,76.22,76.67,622700.0,0.002222
1999-11-04,77.38,77.4700,76.69,77.25,419200.0,-0.001680
1999-11-05,79.00,79.1300,77.72,78.28,632500.0,-0.009114
...,...,...,...,...,...,...
2021-05-03,501.73,502.7200,498.07,499.54,856729.0,-0.004365
2021-05-04,496.48,498.1735,492.30,497.70,1103994.0,0.002457
2021-05-05,499.94,499.9400,495.78,496.97,774400.0,-0.005941
2021-05-06,497.41,499.6100,492.12,499.54,1736103.0,0.004282


In [10]:
# Save the dataframe in a csv file for uses as a label or modified label in predictive machine learning models
data2.to_csv('./Data/stock_mdy.csv', index=True)

The ETF IJR, a proxy for the small cap market, consists of company stocks in Russell 2000 with equivalent  𝛽  = 1

In [11]:
data3, meta_data = ts.get_daily(symbol='IJR', outputsize='full')
pprint(data3)

            1. open  2. high  3. low  4. close  5. volume
date                                                     
2021-05-07   111.69   112.94  111.24    112.81  3262527.0
2021-05-06   111.27   111.97  109.84    111.97  4351412.0
2021-05-05   111.80   111.80  110.82    111.23  2225700.0
2021-05-04   111.11   111.45  109.89    111.28  3804786.0
2021-05-03   111.60   112.35  111.00    111.79  3144098.0
...             ...      ...     ...       ...        ...
2000-06-02   103.50   104.70  103.50    104.70   258700.0
2000-06-01   100.20   101.80  100.20    101.80   150900.0
2000-05-31    98.75    99.88   98.75     99.88     2400.0
2000-05-30    97.03    97.59   97.03     97.59      300.0
2000-05-26    96.09    96.25   95.16     95.69   139700.0

[5270 rows x 5 columns]


In [13]:
data3['change'] = (data3['4. close'] - data3['1. open'])/data3['1. open'] 

In [14]:
data3.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low", "4. close": "close", "5. volume": "volume"}, inplace=True)
data3.sort_values('date', inplace=True)
data3

Unnamed: 0_level_0,open,high,low,close,volume,change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-05-26,96.09,96.25,95.16,95.69,139700.0,-0.004163
2000-05-30,97.03,97.59,97.03,97.59,300.0,0.005771
2000-05-31,98.75,99.88,98.75,99.88,2400.0,0.011443
2000-06-01,100.20,101.80,100.20,101.80,150900.0,0.015968
2000-06-02,103.50,104.70,103.50,104.70,258700.0,0.011594
...,...,...,...,...,...,...
2021-05-03,111.60,112.35,111.00,111.79,3144098.0,0.001703
2021-05-04,111.11,111.45,109.89,111.28,3804786.0,0.001530
2021-05-05,111.80,111.80,110.82,111.23,2225700.0,-0.005098
2021-05-06,111.27,111.97,109.84,111.97,4351412.0,0.006291


In [15]:
# Save the dataframe in a csv file for uses as a label or modified label in predictive machine learning models
data3.to_csv('./Data/stock_ijr.csv', index=True)