In [1]:
import pandas as pd
import numpy as np

import datetime
import pytz
import requests
from pandas.io.json import json_normalize

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
IEX_API_URL_TEMPLATE = 'https://api.iextrading.com/1.0/stock/{}/chart/5y'

djia_tickers = [
    'BA',   'PFE', 'MCD', 'WMT', 'KO',   'MRK',  'HD',   'V',   'JNJ',  'VZ',
    'CSCO', 'AXP', 'TRV', 'DIS', 'MSFT', 'UNH',  'DWDP', 'CAT', 'AAPL', 'UTX',
    'MMM',  'JPM', 'IBM', 'GS',  'XOM',  'INTC', 'NKE',  'CVX', 'PG',   'WBA' ]

In [3]:
def get_dataframe_from_ticker(ticker_symbol):
    r = requests.get(url = IEX_API_URL_TEMPLATE.format(ticker_symbol.lower()))
    df = json_normalize(r.json())

    df.insert(loc=0, column='symbol', value=ticker_symbol)

    df.date = pd.to_datetime(df.date, format='%Y-%m-%d')

    df['year'] = df.date.dt.year
    df['month'] = df.date.dt.month
    df['day'] = df.date.dt.day
    df['week'] = df.date.dt.week
    df['dayofweek'] = df.date.dt.dayofweek
    df['dayofyear'] = df.date.dt.dayofyear
    df['timestamp'] = df.date.values.astype(np.int64)

    df = df.drop(['label',
                  'change', 'changeOverTime', 'changePercent',
                  'high', 'low', 'open',
                  'unadjustedVolume', 'volume', 'vwap'],
                 axis=1)

    return df

In [12]:
aapl_hist = get_dataframe_from_ticker('AAPL')
aapl_hist.dtypes

symbol               object
close               float64
date         datetime64[ns]
year                  int64
month                 int64
day                   int64
week                  int64
dayofweek             int64
dayofyear             int64
timestamp             int64
dtype: object

In [11]:
print(min(aapl_hist.date), '-->', max(aapl_hist.date))

2014-03-17 00:00:00 --> 2019-03-14 00:00:00


In [14]:
df1 = get_dataframe_from_ticker('AAPL')
df2 = get_dataframe_from_ticker('AAPL')

In [17]:
df1.append(df2)

Unnamed: 0,symbol,close,date,year,month,day,week,dayofweek,dayofyear,timestamp
0,AAPL,69.1778,2014-03-17,2014,3,17,12,0,76,1395014400000000000
1,AAPL,69.7898,2014-03-18,2014,3,18,12,1,77,1395100800000000000
2,AAPL,69.7714,2014-03-19,2014,3,19,12,2,78,1395187200000000000
3,AAPL,69.4352,2014-03-20,2014,3,20,12,3,79,1395273600000000000
4,AAPL,69.9828,2014-03-21,2014,3,21,12,4,80,1395360000000000000
5,AAPL,70.8128,2014-03-24,2014,3,24,13,0,83,1395619200000000000
6,AAPL,71.5746,2014-03-25,2014,3,25,13,1,84,1395705600000000000
7,AAPL,70.8903,2014-03-26,2014,3,26,13,2,85,1395792000000000000
8,AAPL,70.5856,2014-03-27,2014,3,27,13,3,86,1395878400000000000
9,AAPL,70.5068,2014-03-28,2014,3,28,13,4,87,1395964800000000000


In [19]:
df1.duplicated()

0       False
1       False
2       False
3       False
4       False
5       False
6       False
7       False
8       False
9       False
10      False
11      False
12      False
13      False
14      False
15      False
16      False
17      False
18      False
19      False
20      False
21      False
22      False
23      False
24      False
25      False
26      False
27      False
28      False
29      False
        ...  
1228    False
1229    False
1230    False
1231    False
1232    False
1233    False
1234    False
1235    False
1236    False
1237    False
1238    False
1239    False
1240    False
1241    False
1242    False
1243    False
1244    False
1245    False
1246    False
1247    False
1248    False
1249    False
1250    False
1251    False
1252    False
1253    False
1254    False
1255    False
1256    False
1257    False
Length: 1258, dtype: bool

In [24]:
df1.drop_duplicates(['symbol', 'date'], keep='last')

Unnamed: 0,symbol,close,date,year,month,day,week,dayofweek,dayofyear,timestamp
0,AAPL,69.1778,2014-03-17,2014,3,17,12,0,76,1395014400000000000
1,AAPL,69.7898,2014-03-18,2014,3,18,12,1,77,1395100800000000000
2,AAPL,69.7714,2014-03-19,2014,3,19,12,2,78,1395187200000000000
3,AAPL,69.4352,2014-03-20,2014,3,20,12,3,79,1395273600000000000
4,AAPL,69.9828,2014-03-21,2014,3,21,12,4,80,1395360000000000000
5,AAPL,70.8128,2014-03-24,2014,3,24,13,0,83,1395619200000000000
6,AAPL,71.5746,2014-03-25,2014,3,25,13,1,84,1395705600000000000
7,AAPL,70.8903,2014-03-26,2014,3,26,13,2,85,1395792000000000000
8,AAPL,70.5856,2014-03-27,2014,3,27,13,3,86,1395878400000000000
9,AAPL,70.5068,2014-03-28,2014,3,28,13,4,87,1395964800000000000


In [33]:
import pickle
file = open('djia_2019-02-28.pkl','rb')
df3 = pickle.load(file)

In [34]:
df3.dtypes

symbol                      object
change                     float64
changeOverTime             float64
changePercent              float64
close                      float64
date                datetime64[ns]
high                       float64
label                       object
low                        float64
open                       float64
unadjustedVolume             int64
volume                       int64
vwap                       float64
dtype: object

In [36]:
def preprocess_dataframe(df):
    df['year'] = df.date.dt.year
    df['month'] = df.date.dt.month
    df['day'] = df.date.dt.day
    df['week'] = df.date.dt.week
    df['dayofweek'] = df.date.dt.dayofweek
    df['dayofyear'] = df.date.dt.dayofyear
    df['timestamp'] = df.date.values.astype(np.int64)

    return df.drop(['label',
                    'change', 'changeOverTime', 'changePercent',
                    'high', 'low', 'open',
                    'unadjustedVolume', 'volume', 'vwap'],
                   axis=1)

In [37]:
preprocess_dataframe(df3)

Unnamed: 0,symbol,close,date,year,month,day,week,dayofweek,dayofyear,timestamp
0,BA,112.8392,2014-03-03,2014,3,3,10,0,62,1393804800000000000
1,BA,114.6081,2014-03-04,2014,3,4,10,1,63,1393891200000000000
2,BA,113.3409,2014-03-05,2014,3,5,10,2,64,1393977600000000000
3,BA,113.4025,2014-03-06,2014,3,6,10,3,65,1394064000000000000
4,BA,113.1209,2014-03-07,2014,3,7,10,4,66,1394150400000000000
5,BA,111.6688,2014-03-10,2014,3,10,11,0,69,1394409600000000000
6,BA,110.5951,2014-03-11,2014,3,11,11,1,70,1394496000000000000
7,BA,109.5039,2014-03-12,2014,3,12,11,2,71,1394582400000000000
8,BA,107.2686,2014-03-13,2014,3,13,11,3,72,1394668800000000000
9,BA,108.3422,2014-03-14,2014,3,14,11,4,73,1394755200000000000
