## Imports

In [1]:
import sys
sys.path.append('C:\\Users\\jchang\\AppData\\Local\\Programs\\Python\\Python37-32\\Lib\\site-packages')
import os
import datetime
import requests
import pandas as pd
import json
import boto3
from boto3.dynamodb.conditions import Key, Attr

## Get Data

In [23]:
## make timestart and timeend conditional on a table scan of the dates that already exist for the specified ticker.
## goal is to avoid duplicates

In [24]:
def getStockDailyURL(ticker, timestart, timeend):
    
    main_request = 'https://www.worldtradingdata.com/api/v1/history?symbol=' + ticker + '&api_token=' + os.environ['WORLD_TRADING_DATA_API_KEY']
    
    if timestart is not None:
        main_request = main_request + '&date_from=' + timestart
    
    if timeend is not None:
        main_request = main_request + '&date_end=' + timeend
        
    return main_request

In [25]:
def getStockDailyData(url):
    
    data_response = requests.get(url)
    
    return data_response

In [26]:
def convertStockResponseToDf(stock_response, ticker):
    
    stock_json = json.loads(stock_response.content)

    stock_df = pd.DataFrame()
    
    stock_df['session'] = list(stock_json['history'].keys())
    stock_df['ticker'] = ticker
    stock_df['open'] = ''
    stock_df['close'] = ''
    stock_df['high'] = ''
    stock_df['low'] = ''
    stock_df['volume'] = ''
    
    attributes = list(stock_json['history'][stock_df['session'][1]].keys())

    for i in stock_df['session']:
        for j in attributes:
            stock_df[j].loc[stock_df['session'] == i] = stock_json['history'][i][j]
    
    return stock_df

In [27]:
def getStockDF(ticker, timestart, timeend):
    
    df = convertStockResponseToDf(
        stock_response = getStockDailyData(
            getStockDailyURL(
                ticker = ticker,
                timestart = timestart,
                timeend = timeend
            )
        ),
        ticker = ticker
    )
    
    return df

## AWS ETL & Query 

In [28]:
def importStockDFtoAWS(stock_df):
    
    dynamodb = boto3.resource('dynamodb')
    stock_table = dynamodb.Table('stockprices')
    
    for i in stock_df.index:
        stock_table.put_item(
            Item = {
                'session': str(stock_df['session'][i]),
                'ticker': str(stock_df['ticker'][i]),
                'open': str(stock_df['open'][i]),
                'close': str(stock_df['close'][i]),
                'high': str(stock_df['high'][i]),
                'low': str(stock_df['low'][i]),
                'volume': str(stock_df['volume'][i]),
            }
)

In [29]:
def URLtoAWS(ticker, timestart = None, timeend = None):
    
    stock_df = getStockDF(
        ticker = ticker,
        timestart = timestart,
        timeend = timeend
    )
    
    importStockDFtoAWS(stock_df)

In [None]:
def queryTickerToDF(ticker):
    
    dynamodb = boto3.resource('dynamodb')
    stock_table = dynamodb.Table('stockprices')

    scan_response_json = stock_table.scan(
        FilterExpression=Attr('ticker').eq(ticker)
    )

    scan_df = pd.DataFrame.from_dict(scan_response_json['Items'], orient='columns')
    
    return scan_df

In [None]:
def exportCSV(stock_scan_df, ticker):
    stock_scan_df.to_csv(
        path_or_buf = 'C:\\Users\\jchang\\Desktop\\Projects\\Stocks\\data\\' + ticker + '.csv'
    )

In [44]:
# stock_list = ['AMZN', 'MSFT', 'BA', 'LMT', 'WMT', 'LULU', 'TSLA', 'TSM', 'JPM', 'V', 'MA']
stock_list = ['.DJI', 'SPY', 'SPGI', 'DIS', 'CRM', 'NKE', 'AXP', 'CHEV', 'CAT', 'HD', 'COST', 'SBUX', 'FB', 'NFLX', 'UPS', 'FDX', 'BRK.B', 'BAC', 'ADBE']

In [None]:
for i in stock_list:
    
    start = datetime.datetime.today()
    
    URLtoAWS(ticker = i)
    
    print(i + ' ' + str(datetime.datetime.today() - start) + str(datetime.datetime.today()))

.DJI 0:42:39.482504
SPY 0:21:58.666621
SPGI 0:40:57.457350
DIS 0:39:28.972816
CRM 0:12:31.604002
NKE 0:32:19.169526
AXP 0:39:31.740782
CHEV 0:06:36.955400
CAT 0:39:29.962295
HD 0:31:36.298820
COST 0:31:18.793956
SBUX 0:22:33.394848
FB 0:05:52.915557
NFLX 0:14:14.719590
UPS 0:16:21.124115
FDX 0:34:33.209901
BRK.B 0:19:16.904026


In [None]:
# start = datetime.datetime.now()

# importStockDFtoAWS(aapl)
# datetime.datetime.now() - start

In [18]:
dynamodb = boto3.resource('dynamodb')
stock_table = dynamodb.Table('stockprices')

print(stock_table.item_count)
print(stock_table.table_size_bytes)

85563
6380067
