## Import necessary modules

In [1]:
import time
import datetime
import pandas as pd
import numpy as np
import datetime as dt
from copy import copy
import warnings
import matplotlib.pyplot as plt
%matplotlib inline

## Filepath management

In [2]:
project_dir = r'/Users/hudson/Code/marketModel/'
price_path = project_dir + r'stock_price_data/'

## "BarChart" api access

In [3]:
apikey = 'a207db3b2e61eac30ed9b9cd18b2e0d0'


def construct_barChart_url(sym, start_date, freq, api_key=apikey):
    '''Function to construct barchart api url
    Get the barChart url for
    * sym -- the ticker symbol
    * start_date -- the earliest time in format yyyymmddhhmmss
    * freq -- ['days', 'hours', 'minutes', 'seconds']
    * api_key the barChart api key
    '''
    url = 'http://marketdata.websol.barchart.com/getHistory.csv?' +\
            'key={}&symbol={}&type={}&startDate={}'\
    .format(api_key, sym, freq, start_date)

    return url

In [4]:
api_test_url = construct_barChart_url('GOOG', '20170701000000', 'minutes', apikey)
goog = pd.read_csv(api_test_url, parse_dates=['timestamp'])

In [5]:
goog

Unnamed: 0,symbol,timestamp,tradingDay,open,high,low,close,volume
0,GOOG,2017-06-06 13:30:00,2017-06-06,983.1600,983.4700,981.0200,983.4700,38054
1,GOOG,2017-06-06 13:31:00,2017-06-06,984.2700,985.5900,983.0842,983.6800,6259
2,GOOG,2017-06-06 13:32:00,2017-06-06,984.4500,984.7100,982.5900,983.3083,5213
3,GOOG,2017-06-06 13:33:00,2017-06-06,983.7300,984.3200,982.9550,984.3200,7581
4,GOOG,2017-06-06 13:34:00,2017-06-06,984.3200,984.5300,983.9740,983.9740,3415
5,GOOG,2017-06-06 13:35:00,2017-06-06,984.1050,984.6400,983.7120,984.3900,7966
6,GOOG,2017-06-06 13:36:00,2017-06-06,984.5400,985.8400,984.4000,985.6050,10213
7,GOOG,2017-06-06 13:37:00,2017-06-06,985.6215,986.0100,984.3700,985.6300,9851
8,GOOG,2017-06-06 13:38:00,2017-06-06,985.2900,985.7000,984.0800,984.0900,4300
9,GOOG,2017-06-06 13:39:00,2017-06-06,984.0800,985.1789,984.0600,984.0600,1900


## Now start pulling in data for our sample stock symbols

In [6]:
symbols = pd.read_csv(project_dir + 'data/stock_data/symbols.csv')

In [7]:
current = datetime.datetime.now()
starttime = current - datetime.timedelta(days=30)
print starttime.strftime('%Y%m%d%H%M%S')

20170807065707


In [15]:
# Pull data for all the test symbols
current = datetime.datetime.now()
starttime = (current - datetime.timedelta(days=10)).strftime('%Y%m%d%H%M00')
print "stock data start time: " + starttime
prices = {}

for symbol in symbols.ticker_symbol:
    # Construct the appropriate URL
    url = construct_barChart_url(symbol, starttime, 'minutes', apikey)
    print(url)
    
    try:
        # Read the data from the url
        data = pd.read_csv(url, parse_dates=['timestamp']).set_index('timestamp')

        # Drop the symbol and trading day columns
        data = data.drop(['symbol','tradingDay'], axis=1)

        # Convert the times to eastern time zone
        data.index = data.index.tz_localize('utc').tz_convert('US/Eastern')

        # Add data to prices dictionary
        prices[symbol] = data
    except:
        print "Failed to load data for " + symbol
        continue

stock data start time: 20170827070300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=ARDM&type=minutes&startDate=20170827070300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=MICR&type=minutes&startDate=20170827070300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=AVIR&type=minutes&startDate=20170827070300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=SEAC&type=minutes&startDate=20170827070300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=CPST&type=minutes&startDate=20170827070300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=GBR&type=minutes&startDate=20170827070300
http://marketdata.websol.barchart.com/getHistory.csv?key=a207db3b2e61eac30ed9b9cd18b2e0d0&symbol=DTRM&type=minutes&startDat

In [16]:
# Concatenate all of the stock data into a multiIndex dataframe
stock_data = pd.concat(prices.values(), keys=prices.keys())
stock_data.index.set_names(['ticker', 'timestamp'], inplace=True)
stock_data

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
ticker,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ASTC,2017-08-28 09:30:00-04:00,0.7500,0.7500,0.7500,0.7500,161
ASTC,2017-08-28 09:37:00-04:00,0.7500,0.7500,0.7500,0.7500,200
ASTC,2017-08-28 10:08:00-04:00,0.7700,0.7700,0.7700,0.7700,100
ASTC,2017-08-28 10:13:00-04:00,0.7523,0.7523,0.7523,0.7523,234
ASTC,2017-08-28 10:24:00-04:00,0.7700,0.7700,0.7700,0.7700,100
ASTC,2017-08-28 11:39:00-04:00,0.7100,0.7100,0.7100,0.7100,100
ASTC,2017-08-28 11:42:00-04:00,0.7700,0.7700,0.7700,0.7700,100
ASTC,2017-08-28 13:05:00-04:00,0.7500,0.7500,0.7500,0.7500,100
ASTC,2017-08-28 13:12:00-04:00,0.7500,0.7500,0.7500,0.7500,100
ASTC,2017-08-28 13:49:00-04:00,0.7200,0.7500,0.7200,0.7500,500


## Write to hdf

In [17]:
stock_data.to_hdf(project_dir + 'data/stock_data/raw_stock_data.hdf', 'table')