## Import necessary modules

In [1]:
import time
import datetime
import pandas as pd
import numpy as np
import datetime as dt
from collections import OrderedDict
from copy import copy
import warnings
import matplotlib.pyplot as plt
%matplotlib inline

## Filepath management

In [2]:
project_dir = r'/Users/hudson/Code/marketModel/'
price_path = project_dir + r'stock_price_data/'

## "BarChart" api access

In [3]:
apikey = 'a207db3b2e61eac30ed9b9cd18b2e0d0'

def construct_barChart_url(sym, start_date, freq='minutes', interval='1', api_key=apikey):
    '''Function to construct barchart api url
    Get the barChart url for
    * sym -- the ticker symbol
    * start_date -- the earliest time in format yyyymmddhhmmss
    * freq -- ['days', 'hours', 'minutes', 'seconds']
    * api_key the barChart api key
    '''
    url = 'http://marketdata.websol.barchart.com/getHistory.csv?' +\
            'key={}&symbol={}&type={}&startDate={}'\
    .format(api_key, sym, freq, start_date)

    return url

In [4]:
api_test_url = construct_barChart_url('GOOG', '20170701000000', 'minutes', apikey)
goog = pd.read_csv(api_test_url, parse_dates=['timestamp'])

In [5]:
print goog.head()
print goog.describe()

  symbol           timestamp  tradingDay     open    high       low    close  \
0   GOOG 2017-06-07 13:30:00  2017-06-07  979.650  982.64  978.9001  982.640   
1   GOOG 2017-06-07 13:31:00  2017-06-07  983.315  983.94  981.3300  981.970   
2   GOOG 2017-06-07 13:32:00  2017-06-07  981.990  982.70  981.6200  982.649   
3   GOOG 2017-06-07 13:33:00  2017-06-07  982.200  982.56  981.3000  981.520   
4   GOOG 2017-06-07 13:34:00  2017-06-07  981.920  982.26  981.5400  982.090   

   volume  
0   25071  
1    7533  
2    4347  
3    8057  
4    1329  
               open          high           low         close         volume
count  24171.000000  24171.000000  24171.000000  24171.000000   24171.000000
mean     936.935901    937.181437    936.678764    936.925488    2782.513632
std       19.821337     19.810959     19.831944     19.819011    8060.787691
min      894.860000    895.740000    894.790000    895.000000     100.000000
25%      921.870000    922.070000    921.640000    921.890000 

## Now start pulling in data for our sample stock symbols

In [6]:
symbols = pd.read_csv(project_dir + 'data/stock_data/symbols.csv')

In [7]:
# Pull data for all the test symbols
days_prior_to_now = 10
current = datetime.datetime.now()
starttime = (current - datetime.timedelta(days=days_prior_to_now)).strftime('%Y%m%d%H%M00')
print "stock data start time: " + starttime
prices = {}

for symbol in symbols.ticker_symbol:
    # Construct the appropriate URL
    url = construct_barChart_url(symbol, starttime, 'minutes', apikey)
    print(url)
    
    try:
        # Read the data from the url
        data = pd.read_csv(url, parse_dates=['timestamp']).set_index('timestamp')

        # Drop the symbol and trading day columns
        data = data.drop(['symbol','tradingDay'], axis=1)

        # Convert the times to eastern time zone
        data.index = data.index.tz_localize('utc').tz_convert('US/Eastern')

        # Add data to prices dictionary
        prices[symbol] = data
    except:
        print "Failed to load data for " + symbol
        continue

stock data start time: 20170828074300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=ARDM&type=minutes&startDate=20170828074300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=MICR&type=minutes&startDate=20170828074300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=AVIR&type=minutes&startDate=20170828074300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=SEAC&type=minutes&startDate=20170828074300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=CPST&type=minutes&startDate=20170828074300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=GBR&type=minutes&startDate=20170828074300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=DTRM&type=minutes&startDat

In [8]:
# Concatenate all of the stock data into a multiIndex dataframe
stock_data = pd.concat(prices.values(), keys=prices.keys())
stock_data.index.set_names(['ticker', 'timestamp'], inplace=True)
stock_data

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
ticker,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ASTC,2017-08-28 09:30:00-04:00,0.7500,0.7500,0.7500,0.7500,161
ASTC,2017-08-28 09:37:00-04:00,0.7500,0.7500,0.7500,0.7500,200
ASTC,2017-08-28 10:08:00-04:00,0.7700,0.7700,0.7700,0.7700,100
ASTC,2017-08-28 10:13:00-04:00,0.7523,0.7523,0.7523,0.7523,234
ASTC,2017-08-28 10:24:00-04:00,0.7700,0.7700,0.7700,0.7700,100
ASTC,2017-08-28 11:39:00-04:00,0.7100,0.7100,0.7100,0.7100,100
ASTC,2017-08-28 11:42:00-04:00,0.7700,0.7700,0.7700,0.7700,100
ASTC,2017-08-28 13:05:00-04:00,0.7500,0.7500,0.7500,0.7500,100
ASTC,2017-08-28 13:12:00-04:00,0.7500,0.7500,0.7500,0.7500,100
ASTC,2017-08-28 13:49:00-04:00,0.7200,0.7500,0.7200,0.7500,500


## Write to hdf

In [9]:
stock_data.to_hdf(project_dir + 'data/stock_data/raw_stock_data.hdf', 'table')