In [1]:
import os

os.chdir(r'C:\Users\carlo\GitHub\ProjectBuffalo')

In [2]:
import buffalo.ingestion as ingestion
import buffalo.predictor as predictor

In [3]:
import torch
import pandas as pd

In [4]:
ingestor = ingestion.DataIngestion(ingestion.enum.API.ADVANTAGE)

In [5]:
ingestor.load_data(r'cached_data/ingestion.sqlite')

In [6]:
target_symbol = 'AAPL'

In [7]:
ingestor.data['ADJUSTED_DAILY_STOCK'].symbol.unique()

array(['AAPL', 'MSFT', 'IBM', 'META', 'JNJ', 'PFE', 'UNH', 'MARK', 'XLV',
       'JPM', 'BAC', 'GS', 'MS', 'XLF', 'PG', 'KO', 'PEP', 'NKE', 'XLP',
       'GE', 'HON', 'UTX', 'MMM', 'XLI'], dtype=object)

In [8]:
target_stock = ingestor.data['ADJUSTED_DAILY_STOCK'].query('symbol == @target_symbol').drop(columns='close').rename(columns={'adjusted_close': 'close'})

In [9]:
other_stocks = ingestor.data['ADJUSTED_DAILY_STOCK'].query('symbol != "AAPL"')

In [10]:
for symbol in ['MSFT', 'IBM']:
    temp = other_stocks[other_stocks['symbol'] == symbol][['open', 'high', 'low', 'adjusted_close', 'volume']].rename(columns={'adjusted_close': 'close'})
    temp.columns = symbol + '_' + temp.columns
    target_stock = predictor.util.align_dataframe_by_time(target_stock, temp)

In [11]:
target_income_statement = ingestor.data['COMPANY_INCOME_STATEMENT'].query('symbol == @target_symbol & freq == "quarterly"').drop(columns=['reported_currency', 'symbol', 'freq', 'function'])
target_balance_sheet = ingestor.data['COMPANY_BALANCE_SHEET'].query('symbol == @target_symbol & freq == "quarterly"').drop(columns=['reported_currency', 'symbol', 'freq', 'function'])
target_cash_flow = ingestor.data['COMPANY_CASH_FLOW'].query('symbol == @target_symbol & freq == "quarterly"').drop(columns=['reported_currency', 'symbol', 'freq', 'function', 'net_income'])
fed_funds_rate = ingestor.data['FEDERAL_FUNDS_RATE'][['value']].rename(columns={'value': 'effective_federal_funds_rate'})
payroll = ingestor.data['NONFARM_PAYROLL'][['value']].rename(columns={'value': 'total_nonfarm_payroll'})
cpi = ingestor.data['CPI'][['value']].rename(columns={'value': 'consumer_price_index'})
unemployment = ingestor.data['UNEMPLOYMENT'][['value']].rename(columns={'value': 'unemployment_rate'})
real_gdp = ingestor.data['REAL_GDP'][['value']].rename(columns={'value': 'real_gross_domestic_product'})
real_gdp_per_capita = ingestor.data['REAL_GDP_PER_CAPITA'][['value']].rename(columns={'value': 'real_gross_domestic_product_per_capita'})
treasury_yield = ingestor.data['TREASURY_YIELD'][['value', 'maturity']].pivot(columns=['maturity'], values=['value'])
treasury_yield.columns = 'treasury_yield_' + treasury_yield.columns.droplevel(level=0)


In [12]:
target_stock = predictor.util.align_dataframe_by_time(target_stock, target_income_statement)
target_stock = predictor.util.align_dataframe_by_time(target_stock, target_balance_sheet)
target_stock = predictor.util.align_dataframe_by_time(target_stock, target_cash_flow)
target_stock = predictor.util.align_dataframe_by_time(target_stock, fed_funds_rate)
target_stock = predictor.util.align_dataframe_by_time(target_stock, payroll)
target_stock = predictor.util.align_dataframe_by_time(target_stock, cpi)
target_stock = predictor.util.align_dataframe_by_time(target_stock, unemployment)
target_stock = predictor.util.align_dataframe_by_time(target_stock, real_gdp)
target_stock = predictor.util.align_dataframe_by_time(target_stock, real_gdp_per_capita)
target_stock = predictor.util.align_dataframe_by_time(target_stock, treasury_yield)



In [16]:
import torch.nn as nn

In [20]:
class RNN(nn.Module):
    """
    This class provide wrapper for self defined RNN module.
    """

    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 nonlinearity='tanh',
                 bias: bool=True,
                 batch_first: bool=False,
                 dropout=0,
                 bidirectional: bool=False,
                 use_gpu: bool=True):
        super().__init__()
        if use_gpu:
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = 'cpu'
        self.model = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, nonlinearity=nonlinearity, bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional).to(self.device)

    def forward(self, input_v: torch.Tensor, h_0: torch.Tensor):
        input_v = input_v.to(self.device)
        return self.model(input=input_v, h_0=h_0)

    def train_loop(self, opt, t_loader, v_loader, epochs):
        for epoch in range(epochs):
            t_loss_sum = 0
            v_loss_sum = 0
            for batch in t_loader:
                opt.zero_grad()

                data, label, lens = batch
                data, label = data.to(self.device), label.to(self.device)
                pred = self.model(data, lens)

                loss = nn.CrossEntropyLoss()(pred, label)
                loss.backward()
                opt.step()
                t_loss_sum += loss.item()

            with torch.no_grad():
                for batch in v_loader:
                    data, label, lens = batch
                    data, label = data.to(self.device), label.to(self.device)
                    pred = self.model(data, lens)

                    loss = nn.CrossEntropyLoss()(pred, label)
                    v_loss_sum += loss.item()

            if epoch % 5 == 0:
                out = "Epoch {}: Train Loss {}, Val Loss {}"
                avg_t_loss = t_loss_sum / len(t_loader)
                avg_v_loss = v_loss_sum / len(v_loader)
                print(out.format(epoch, avg_t_loss, avg_v_loss))

In [21]:
rnn = RNN(input_size=118, hidden_size=32, num_layers=4, batch_first=True)

In [22]:
target_stock.shape

(946, 118)

In [13]:
target_stock

Unnamed: 0_level_0,open,high,low,close,volume,dividend_amount,split_coefficient,symbol,interval,adjusted,...,consumer_price_index,unemployment_rate,real_gross_domestic_product,real_gross_domestic_product_per_capita,treasury_yield_10year,treasury_yield_2year,treasury_yield_30year,treasury_yield_3month,treasury_yield_5year,treasury_yield_7year
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-04-01 00:00:00-05:00,191.640,191.6800,188.38,46.211150,27861964,0.0000,1.0,AAPL,daily,1,...,255.548,3.6,4753.025,57418.0,2.49,2.33,2.89,2.43,2.31,2.40
2019-04-02 00:00:00-05:00,191.090,194.4600,191.05,46.882908,22765732,0.0000,1.0,AAPL,daily,1,...,255.548,3.6,4753.025,57418.0,2.48,2.30,2.88,2.42,2.28,2.38
2019-04-03 00:00:00-05:00,193.250,196.5000,193.15,47.204289,23271830,0.0000,1.0,AAPL,daily,1,...,255.548,3.6,4753.025,57418.0,2.52,2.33,2.93,2.44,2.32,2.42
2019-04-04 00:00:00-05:00,194.790,196.3700,193.14,47.286446,19114275,0.0000,1.0,AAPL,daily,1,...,255.548,3.6,4753.025,57418.0,2.51,2.33,2.92,2.44,2.32,2.41
2019-04-05 00:00:00-05:00,196.450,197.1000,195.93,47.602994,18526644,0.0000,1.0,AAPL,daily,1,...,255.548,3.6,4753.025,57418.0,2.50,2.35,2.91,2.44,2.31,2.40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-22 00:00:00-05:00,134.352,134.5600,130.30,131.862207,77852108,0.0000,1.0,AAPL,daily,1,...,296.797,3.5,5018.093,60080.0,3.67,4.24,3.73,4.35,3.79,3.77
2022-12-23 00:00:00-05:00,130.920,132.4150,129.64,131.659472,63814893,0.1667,1.0,AAPL,daily,1,...,296.797,3.5,5018.093,60080.0,3.75,4.31,3.82,4.34,3.86,3.83
2022-12-27 00:00:00-05:00,131.380,131.4100,128.72,129.832255,69007830,0.0000,1.0,AAPL,daily,1,...,296.797,3.5,5018.093,60080.0,3.84,4.32,3.93,4.46,3.94,3.93
2022-12-28 00:00:00-05:00,129.670,131.0275,125.87,125.848323,85438391,0.0000,1.0,AAPL,daily,1,...,296.797,3.5,5018.093,60080.0,3.88,4.31,3.98,4.46,3.97,3.97
