# Requirements

In [1]:
import pandas as pd
import numpy as np

import datetime
import pytz
import requests
from pandas.io.json import json_normalize

import pickle

import matplotlib.pyplot as plt
%matplotlib inline


from sklearn.linear_model import LinearRegression

# Dataset

In [2]:
IEX_API_URL_TEMPLATE = 'https://api.iextrading.com/1.0/stock/{}/chart/{}'

HIST_5Y = '5y'
HIST_1Y = '1y'
HIST_1M = '1m'

djia_tickers = [
    'BA',   'PFE', 'MCD', 'WMT', 'KO',   'MRK',  'HD',   'V',   'JNJ',  'VZ',
    'CSCO', 'AXP', 'TRV', 'DIS', 'MSFT', 'UNH',  'DWDP', 'CAT', 'AAPL', 'UTX',
    'MMM',  'JPM', 'IBM', 'GS',  'XOM',  'INTC', 'NKE',  'CVX', 'PG',   'WBA' ]

In [3]:
def expand_date(df):
    df['year'] = df.date.dt.year
    df['month'] = df.date.dt.month
    df['day'] = df.date.dt.day
    df['week'] = df.date.dt.week
    df['dayofweek'] = df.date.dt.dayofweek
    df['dayofyear'] = df.date.dt.dayofyear
    df['timestamp'] = df.date.values.astype(np.int64)

def format_dataset(df):
    formated_df = df.drop(['label',
                           'change', 'changeOverTime', 'changePercent',
                           'high', 'low', 'open',
                           'unadjustedVolume', 'volume', 'vwap'],
                          axis=1)

    expand_date(formated_df)
    return formated_df

def get_dataset_from_ticker(ticker_symbol, hist_period=HIST_5Y):
    r = requests.get(url = IEX_API_URL_TEMPLATE.format(ticker_symbol.lower(), hist_period))
    df = json_normalize(r.json())

    df.date = pd.to_datetime(df.date, format='%Y-%m-%d')
    df.insert(loc=0, column='symbol', value=ticker_symbol)

    return format_dataset(df)


def get_djia_dataset(hist_period=HIST_5Y):
    df = None

    for ticker_symbol in djia_tickers:
        if df is None:
            df = get_dataset_from_ticker(ticker_symbol, hist_period=hist_period)
        else:
            df = df.append(get_dataset_from_ticker(ticker_symbol, hist_period))

    return df


def update_djia_dataset(djia_ds):
    last_recorded_day = max(djia_ds.date)
    today = datetime.datetime.now()
    days_to_update = (today - last_recorded_day).days

    hist_period = HIST_5Y

    if days_to_update < 1:
        return djia_ds
    elif days_to_update < 28:
        hist_period = HIST_1M
    elif days_to_update < 365:
        hist_period = HIST_1Y

    last_djia_ds = get_djia_dataset(hist_period)

    return djia_ds.append(last_djia_ds).drop_duplicates(['symbol', 'date'], keep='last')


def save_dataset_to_file(ds, file_name):
    with open(file_name, 'wb') as fp:
        pickle.dump(ds, fp)


def load_dataset_from_file(file_name):
    with open(file_name, 'rb') as fp:
        return pickle.load(fp)

# Trading days

In [4]:
market_holidays = []

def load_market_holidays(market_holidays_file='market_holidays.txt'):
    market_holidays.clear()

    with open(market_holidays_file) as f:
        lines = f.readlines()

    for line in lines:
        market_holidays.append(datetime.datetime.strptime(line.strip(), '%Y-%m-%d'))

load_market_holidays()

In [5]:
def is_trading_day(day):
    day_of_week = day.weekday()
    if day_of_week == 5 or day_of_week == 6 or day in market_holidays:
        return False
    else:
        return True


def get_trading_days_in_range(start_date, end_date):
    trading_days = []

    current_day = start_date
    while (current_day <= end_date):
        if is_trading_day(current_day):
            trading_days.append(current_day)
        current_day += datetime.timedelta(days=1)

    return trading_days

In [6]:
def str_to_datetime(str_date):
    return datetime.datetime.strptime(str_date, '%Y-%m-%d')

def datetime_array_to_dataframe(days):
    return pd.DataFrame({'date':days_poc})

# Models

In [17]:
class StockForecasterModel:
    def __init__(self, ticker, dataset):
        self.ticker = ticker
        self.dataset = dataset

    def getDataset(self):
        return self.dataset

    def setDataset(self, dataset):
        self.dataset = dataset

    def train(self, start_date, end_date):
        raise NotImplementedError("Please Implement this method")

    def predict(self, from_date, to_date):
        raise NotImplementedError("Please Implement this method")

In [18]:
class LinearStockForecaster(StockForecasterModel):

    def __init__(self, ticker, dataset):
        StockForecasterModel.__init__(self, ticker, dataset)

    def train(self, start_date, end_date):
        training_set = self.dataset.query(
            "symbol == '{}' and date >= '{}' and date <= '{}'"
            .format(self.ticker, start_date, end_date))

        x = training_set.drop(['symbol', 'date', 'close'], axis=1)
        y = training_set.close

        self.model = LinearRegression()
        self.model.fit(x, y)

    def predict(self, from_date, to_date):
        days_to_predict = get_trading_days_in_range(from_date, to_date)
        x = datetime_array_to_dataframe(days_to_predict)
        expand_date(x)
        x = x.drop(['date'], axis=1)
        return self.model.predict(x)

In [19]:
djia_ds = load_dataset_from_file('data/djia_20140303-20190315.pkl')

m = LinearStockForecaster('AAPL', djia_ds)
m.train(str_to_datetime('2015-04-01'), str_to_datetime('2018-03-31'))
y = m.predict(str_to_datetime('2018-04-01'), str_to_datetime('2018-06-30'))
print(y)

[181.25231306 181.40353943 181.45395795 181.50437648 181.554795
 181.60521353 181.75643989 181.80685841 181.85727694 181.90769546
 181.95811399 182.10934035 182.15975888 182.2101774  182.26059592
 182.31101445 182.46224081 182.51265934 182.56307786 182.61349638
 182.66391491 183.32902807]
