In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

## add your path as a variable below aman_path
aman_path = '/content/gdrive/My Drive/School/Undergrad/Fall 2021/CS 490/Group Project/Code'

## cd into your path instead of aman_path. Don't delete, just comment out.
%cd -q $aman_path

Mounted at /content/gdrive/


In [None]:
!pip install treelib
!pip install bintrees
from structures.b_tree import BTree, Item
import pickle
import numpy as np
import bintrees
from sklearn.linear_model import LinearRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
import tensorflow as tf
import types
import tempfile
import tensorflow.keras.models
import argparse
from glob import glob
from tqdm import tqdm
from timeit import default_timer as timer
import re

device_name = tf.test.gpu_device_name()
#if device_name != '/device:GPU:0':
  #raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Collecting treelib
  Downloading treelib-1.6.1.tar.gz (24 kB)
Building wheels for collected packages: treelib
  Building wheel for treelib (setup.py) ... [?25l[?25hdone
  Created wheel for treelib: filename=treelib-1.6.1-py3-none-any.whl size=18386 sha256=453c2ef2ac3f96af95276f3d12f2babce1e971f9d189577d1f4b2c333995aedf
  Stored in directory: /root/.cache/pip/wheels/89/be/94/2c6d949ce599d1443426d83ba4dc93cd35c0f4638260930a53
Successfully built treelib
Installing collected packages: treelib
Successfully installed treelib-1.6.1
Collecting bintrees
  Downloading bintrees-2.2.0.zip (108 kB)
[K     |████████████████████████████████| 108 kB 5.3 MB/s 
[?25hBuilding wheels for collected packages: bintrees
  Building wheel for bintrees (setup.py) ... [?25l[?25hdone
  Created wheel for bintrees: filename=bintrees-2.2.0-cp37-cp37m-linux_x86_64.whl size=183857 sha256=84939e19d20d30c58d0add29c5f485ee9ed03b3d477ba1abbf45cfac464d9876
  Stored in directory: /root/.cache/pip/wheels/f3/2d/47/a75778

In [None]:
def get_data(path):
    with open(path, 'rb') as f:
        contents = pickle.load(f)
    return contents['data'], contents['memory']

def construct_b_tree(data, memory):
    start = timer()
    bt = BTree(2)
    for rec, memory_loc in zip(data, memory):
        bt.insert(Item(rec, memory_loc))
    elapsed_time = timer() - start
    return bt, elapsed_time

def construct_AVL(data, memory):
    start = timer()
    avl = bintrees.AVLTree()
    for rec, memory_loc in zip(data, memory):
        avl.insert(rec, memory_loc)
    elapsed_time = timer() - start
    return avl, elapsed_time

def construct_RBT(data, memory):
    start = timer()
    rbt = bintrees.RBTree()
    for rec, memory_loc in zip(data, memory):
        rbt.insert(rec, memory_loc)
    elapsed_time = timer() - start
    return rbt, elapsed_time

def construct_LR(data, memory):
    start = timer()
    reg = LinearRegression().fit(data.reshape(-1, 1), memory.reshape(-1, 1))
    elapsed_time = timer() - start
    return reg, elapsed_time

## dense_layers param indicates num nodes per dense layer after the first
def construct_ANN(data, memory, epochs=5, dense_layers=[32], batch_norm=True, dense_act='relu'):
    start = timer()
    ann = Sequential() 
    ann.add(Dense(32, input_dim=1, activation='relu'))
    if batch_norm:
      ann.add(BatchNormalization())

    for layer in dense_layers:
      ann.add(Dense(layer, activation=dense_act))
      if batch_norm:
        ann.add(BatchNormalization())
    ann.add(Dense(1))

    class PredictionHistory(tf.keras.callbacks.Callback):
      def __init__(self):
        self.predhis = []
      def on_epoch_end(self, epoch, logs={}):
        self.predhis.append(ann.predict(X))

    history = PredictionHistory()
    ann.compile(loss='mean_squared_error', optimizer='adam')

    X = data.astype(np.float32)
    y = memory.astype(np.float32)
    ann.fit(X, y, callbacks=[history], epochs=epochs)

    elapsed_time = timer() - start
    return ann, elapsed_time, history.predhis

def map_mods_to_struct(mods):
    mapper = {}
    for mod in mods:
        if mod == 'bt':
            mapper[mod] = construct_b_tree
        elif mod == 'avl':
            mapper[mod] = construct_AVL
        elif mod == 'rbt':
            mapper[mod] = construct_RBT
        elif mod == 'lr':
            mapper[mod] = construct_LR
        elif mod == 'ann':
            mapper[mod] = construct_ANN
    return mapper

In [None]:
def main():
    IS_DATA_STD = True

    if IS_DATA_STD:
      SAVE_PATH = './models/Standardized'
      DATA_PATH = '../Data/Standardized'
    else:
      SAVE_PATH = './models/Unstandardized'
      DATA_PATH = '../Data/Unstandardized'
    #MODS = 'bt,avl,rbt,lr,ann'
    MODS = 'ann' # only build ANN index for hyperparameter tuning; waste of time to rebuild the other models because they don't have tunable params

    if not re.fullmatch(r'((bt)|(avl)|(rbt)|(lr)|(ann))+(,((bt)|(avl)|(rbt)|(lr)|(ann)))*', MODS):
        class InvalidModsException(Exception):
            pass
        raise InvalidModsException('Invalid models passed as argument. Options are: [bt, avl, rbt, lr, ann] and must be passed separated by commas without spaces!')
    
    mods_for_data = {}
    files = glob(f'{DATA_PATH}/*.dat')
    mods = map_mods_to_struct(MODS.split(','))
    for mod_name in mods:
        datasets = {}
        for data_path in tqdm(files, total=len(files)):
            data_name = data_path.split('/')[-1].split('.')[0]
            print(f'Building {mod_name} for {data_name}.')
            data, memory = get_data(data_path)
            if mod_name == 'ann':
              datasets[data_name] = {}
              # hyperparameter tuning; play around with num_layer_ops. Maybe add [64, 64], [32, 64], [64, 32], [32, 64, 128], [128, 64, 32]. Add False to batch_norm_ops. Add "tanh" to act_func_ops.
              for num_layer_ops in [[32, 32]]:
                for batch_norm_ops in [True]:
                  for act_func_ops in ['relu']:
                    mod_attrs = mods[mod_name](data, memory)
                    mod, mod_train_time, history = mod_attrs
                    datasets[data_name][f'ann_{str(tuple(num_layer_ops))}_{str(batch_norm_ops).lower()}_{act_func_ops}'] = {'mod': mod, 'train_time': mod_train_time, 'history': history}

            else:
              mod_attrs = mods[mod_name](data, memory)
              mod, mod_train_time = mod_attrs
              datasets[data_name] = {'mod': mod, 'train_time': mod_train_time}
        
        if mod_name == 'ann':
            for dataset in datasets:
              for ann in datasets[dataset]:
                datasets[dataset][ann]['mod'].save(f'{SAVE_PATH}/{ann}_{dataset}')
            for dataset in datasets:
              for ann in datasets[dataset]:
                datasets[dataset][ann]['mod'] = None

        if SAVE_PATH:
            with open(f'{SAVE_PATH}/{mod_name}.dat', 'wb') as f:
              pickle.dump(datasets, f)

main()

  0%|          | 0/7 [00:00<?, ?it/s]

Building ann for fb.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


 14%|█▍        | 1/7 [02:23<14:22, 143.83s/it]

Building ann for amzn.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


 29%|██▊       | 2/7 [02:45<06:00, 72.03s/it] 

Building ann for random.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


 43%|████▎     | 3/7 [08:08<12:27, 186.77s/it]

Building ann for binomial.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


 57%|█████▋    | 4/7 [13:32<12:02, 240.68s/it]

Building ann for poisson.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


 71%|███████▏  | 5/7 [18:55<09:00, 270.46s/it]

Building ann for exponential.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


 86%|████████▌ | 6/7 [24:18<04:48, 288.41s/it]

Building ann for lognormal.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


100%|██████████| 7/7 [29:41<00:00, 254.55s/it]


INFO:tensorflow:Assets written to: ./models/Standardized/ann_[32, 32]_True_relu_fb/assets
INFO:tensorflow:Assets written to: ./models/Standardized/ann_[32, 32]_True_relu_amzn/assets
INFO:tensorflow:Assets written to: ./models/Standardized/ann_[32, 32]_True_relu_random/assets
INFO:tensorflow:Assets written to: ./models/Standardized/ann_[32, 32]_True_relu_binomial/assets
INFO:tensorflow:Assets written to: ./models/Standardized/ann_[32, 32]_True_relu_poisson/assets
INFO:tensorflow:Assets written to: ./models/Standardized/ann_[32, 32]_True_relu_exponential/assets
INFO:tensorflow:Assets written to: ./models/Standardized/ann_[32, 32]_True_relu_lognormal/assets
