## Import necessary modules

In [1]:
import time
import datetime
import pandas as pd
import numpy as np
import datetime as dt
from collections import OrderedDict
from copy import copy
import warnings
import matplotlib.pyplot as plt
%matplotlib inline

## Filepath management

In [2]:
project_dir = r'/Users/hudson/Code/marketModel/'
price_path = project_dir + r'stock_price_data/'

## "BarChart" api access

In [3]:
apikey = 'a207db3b2e61eac30ed9b9cd18b2e0d0'

def construct_barChart_url(sym, start_date, freq='minutes', interval='1', api_key=apikey):
    '''Function to construct barchart api url
    Get the barChart url for
    * sym -- the ticker symbol
    * start_date -- the earliest time in format yyyymmddhhmmss
    * freq -- ['days', 'hours', 'minutes', 'seconds']
    * api_key the barChart api key
    '''
    url = 'http://marketdata.websol.barchart.com/getHistory.csv?' +\
            'key={}&symbol={}&type={}&startDate={}'\
    .format(api_key, sym, freq, start_date)

    return url

In [4]:
api_test_url = construct_barChart_url('GOOG', '20170701000000', 'minutes', apikey)
goog = pd.read_csv(api_test_url, parse_dates=['timestamp'])

In [5]:
print goog.head()
print goog.describe()

  symbol           timestamp  tradingDay    open    high       low     close  \
0   GOOG 2017-06-06 13:30:00  2017-06-06  983.16  983.47  981.0200  983.4700   
1   GOOG 2017-06-06 13:31:00  2017-06-06  984.27  985.59  983.0842  983.6800   
2   GOOG 2017-06-06 13:32:00  2017-06-06  984.45  984.71  982.5900  983.3083   
3   GOOG 2017-06-06 13:33:00  2017-06-06  983.73  984.32  982.9550  984.3200   
4   GOOG 2017-06-06 13:34:00  2017-06-06  984.32  984.53  983.9740  983.9740   

   volume  
0   38054  
1    6259  
2    5213  
3    7581  
4    3415  
               open          high           low         close         volume
count  23940.000000  23940.000000  23940.000000  23940.000000   23940.000000
mean     937.999755    938.245102    937.742463    937.989021    2801.018212
std       20.727614     20.713608     20.740116     20.724061    8160.877618
min      894.860000    895.740000    894.790000    895.000000     100.000000
25%      921.840000    922.050000    921.610000    921.849750 

## Now start pulling in data for our sample stock symbols

In [6]:
symbols = pd.read_csv(project_dir + 'data/stock_data/symbols.csv')

In [7]:
# Pull data for all the test symbols
days_prior_to_now = 30
current = datetime.datetime.now()
starttime = (current - datetime.timedelta(days=days_prior_to_now)).strftime('%Y%m%d%H%M00')
print "stock data start time: " + starttime
prices = {}

for symbol in symbols.ticker_symbol:
    # Construct the appropriate URL
    url = construct_barChart_url(symbol, starttime, 'minutes', apikey)
    print(url)
    
    try:
        # Read the data from the url
        data = pd.read_csv(url, parse_dates=['timestamp']).set_index('timestamp')

        # Drop the symbol and trading day columns
        data = data.drop(['symbol','tradingDay'], axis=1)

        # Convert the times to eastern time zone
        data.index = data.index.tz_localize('utc').tz_convert('US/Eastern')

        # Add data to prices dictionary
        prices[symbol] = data
    except:
        print "Failed to load data for " + symbol
        continue

stock data start time: 20170807160400
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=ARDM&type=minutes&startDate=20170807160400
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=MICR&type=minutes&startDate=20170807160400
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=AVIR&type=minutes&startDate=20170807160400
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=SEAC&type=minutes&startDate=20170807160400
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=CPST&type=minutes&startDate=20170807160400
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=GBR&type=minutes&startDate=20170807160400
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=DTRM&type=minutes&startDat

In [12]:
# Concatenate all of the stock data into a multiIndex dataframe
stock_data = pd.concat(prices.values(), keys=prices.keys())
stock_data.index.set_names(['ticker', 'timestamp'], inplace=True)
stock_data

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
ticker,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ASTC,2017-08-08 10:47:00-04:00,0.7311,0.7311,0.7311,0.7311,1000
ASTC,2017-08-08 10:57:00-04:00,0.7500,0.7500,0.7500,0.7500,300
ASTC,2017-08-08 11:04:00-04:00,0.7600,0.7600,0.7600,0.7600,100
ASTC,2017-08-08 11:33:00-04:00,0.7301,0.7301,0.7301,0.7301,100
ASTC,2017-08-08 11:34:00-04:00,0.7400,0.7401,0.7250,0.7250,18677
ASTC,2017-08-08 11:41:00-04:00,0.7400,0.7400,0.7400,0.7400,100
ASTC,2017-08-08 11:49:00-04:00,0.7323,0.7488,0.7301,0.7488,5177
ASTC,2017-08-08 12:21:00-04:00,0.7698,0.7698,0.7698,0.7698,4000
ASTC,2017-08-08 12:40:00-04:00,0.7251,0.7251,0.7251,0.7251,3250
ASTC,2017-08-08 12:48:00-04:00,0.7698,0.7698,0.7698,0.7698,1000


## Write to hdf

In [9]:
stock_data.to_hdf(project_dir + 'data/stock_data/raw_stock_data.hdf', 'table')