# Bifrost Neural Engine
A generic autonomous model builder for various problems using neural networks.

Problem Areas:
- Regression
- Classification
- Time Series Predictions

## Import Dependencies

In [1]:
%run ./../../utilities/common/data_loader.ipynb

In [2]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import time
import logging as log
from tqdm import tqdm
import os.path
import matplotlib.pyplot as plt
%matplotlib inline

## Implementation

In [3]:
class BifrostNeuralEngine(nn.Module):
    def __init__(self, 
                 data: pd.DataFrame, 
                 labels_column_names: list,
                 autobootstrap_filename: str = None,
                 date_column_name: str = None,
                 layers_config: list = list(), 
                 dropout_probability: float=0.5,
                 learning_rate: float=0.001):
        super().__init__()
        
        self.data_loader = DataLoader(data=data,
                                      labels_column_names=labels_column_names,
                                      date_column_name=date_column_name)
        train_x, train_y, _, _ = self.data_loader.get_train_test_split()
        self.features_column_names = train_x.columns
        self.output_count = train_y.shape[1]
        self.learning_rate = learning_rate
        self.labels_column_names = labels_column_names
        self.autobootstrap_filename = autobootstrap_filename

        # Batch normalizer.
        self.bn_cont = nn.BatchNorm1d(len(self.features_column_names))

        # Layers.
        layers = list()
        n_in = len(self.features_column_names)

        if len(layers_config) <= 0:
            layers_config = self.__auto_determine_layers_config__(input_feature_count=n_in)

        for i in layers_config:
            layers.append(nn.Linear(in_features=n_in, out_features=i))
            layers.append(nn.ReLU(inplace=True))
            layers.append(nn.BatchNorm1d(num_features=i))
            layers.append(nn.Dropout(p=dropout_probability))
            n_in = i

        layers.append(nn.Linear(in_features=layers_config[-1], out_features=self.output_count))

        self.layers = nn.Sequential(*layers)

        if self.autobootstrap_filename is not None:
            if os.path.isfile(self.autobootstrap_filename):
                self.load_state_dict(torch.load(self.autobootstrap_filename))
                self.data_loader.state_dict = torch.load(self.autobootstrap_filename.replace('.pt', '.data_loader.pt'))

    def __auto_determine_layers_config__(self, input_feature_count: int) -> list:
        return [
            int(input_feature_count * 1.25),
            int(input_feature_count * 1.10),
            int(input_feature_count * 0.70),
            int(input_feature_count)
        ]

    def __get_tensors_for_dataframe__(self, data: pd.DataFrame) -> torch.tensor:
        return torch.tensor(data.values, dtype=torch.float)

    def __eval_test_data__(self, test_x: torch.tensor, test_y: torch.tensor, criterion: nn.modules.loss._Loss):
        log.info(f'Evaluating test data.')

        with torch.no_grad():
            y_pred = self(test_x)
            loss = torch.sqrt(criterion(y_pred, test_y))

        log.info(f'Training loss evaluation: {loss}')

    def forward(self, x: torch.tensor):
        x = self.bn_cont(x)
        x = self.layers(x)
        
        return x

    def fit(self, epochs: int):
        start_time: int = time.time()
        criterion: nn.modules.loss._Loss = nn.MSELoss()# if self.output_count == 1 else nn.CrossEntropyLoss()
        optimizer: torch.optim.Optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        train_x, train_y, test_x, test_y = self.data_loader.get_train_test_split()
        train_x = self.__get_tensors_for_dataframe__(data=train_x)
        train_y = self.__get_tensors_for_dataframe__(data=train_y)
        test_x = self.__get_tensors_for_dataframe__(data=test_x)
        test_y = self.__get_tensors_for_dataframe__(data=test_y)
        losses = list()
        prev_displayed_loss = -1

        pbar = tqdm(range(epochs))
        
        for i in pbar:
            i += 1
            y_pred = self(train_x)
            loss = torch.sqrt(criterion(y_pred, train_y))
            losses.append(loss)

            if i % 50 == 1:
                log.debug(f'Epoch: {i}, Loss: {loss} (Prev: {prev_displayed_loss})')
                pbar.set_description(f'Epoch: {i}, Loss: {loss} (Prev: {prev_displayed_loss})')
                prev_displayed_loss = loss
                
                if self.autobootstrap_filename is not None:
                    torch.save(self.state_dict(), self.autobootstrap_filename)
                    torch.save(self.data_loader.state_dict, self.autobootstrap_filename.replace('.pt', '.data_loader.pt'))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        duration = time.time() - start_time
        log.info(f'Training took: {duration / 60} minutes.')

        self.__eval_test_data__(test_x=test_x, test_y=test_y, criterion=criterion)
        plt.plot(range(epochs), [float(l) for l in losses])

        return self

    def predict(self, x: pd.DataFrame) -> pd.DataFrame:
        x = self.data_loader.get_featurized_data(data=x)
        x = x[[c for c in x.columns if c in self.features_column_names]]
        x = self.__get_tensors_for_dataframe__(data=x)

        with torch.no_grad():
            predictions = self.forward(x)
            predictions = pd.DataFrame(predictions.numpy())
            predictions.columns = self.labels_column_names
            predictions = self.data_loader.apply_sigmoid_scale(data=predictions, reverse=True)
            
            return predictions