In [5]:
from structures.b_tree import BTree, Item
import pickle
import numpy as np
import bintrees
from sklearn.linear_model import LinearRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import types
import tempfile
import tensorflow.keras.models
from glob import glob

In [2]:
def make_keras_picklable():
    def __getstate__(self):
        model_str = ""
        with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
            tensorflow.keras.models.save_model(self, fd.name, overwrite=True)
            model_str = fd.read()
        d = { 'model_str': model_str }
        return d

    def __setstate__(self, state):
        with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
            fd.write(state['model_str'])
            fd.flush()
            model = keras.models.load_model(fd.name)
        self.__dict__ = model.__dict__


    cls = tensorflow.keras.models.Model
    cls.__getstate__ = __getstate__
    cls.__setstate__ = __setstate__

make_keras_picklable()

In [3]:
def get_data(path):
    with open(path, 'rb') as f:
        contents = pickle.load(f)
    return contents['data'], contents['memory']

def construct_b_tree(data, memory):
    bt = BTree(2)
    for rec, memory_loc in zip(data, memory):
        bt.insert(Item(rec, memory_loc))
    return bt

def construct_AVL(data, memory):
    avl = bintrees.AVLTree()
    for rec, memory_loc in zip(data, memory):
        avl.insert(rec, memory_loc)
    return avl

def construct_RBT(data, memory):
    rbt = bintrees.RBTree()
    for rec, memory_loc in zip(data, memory):
        rbt.insert(rec, memory_loc)
    return rbt

def construct_LR(data, memory):
    reg = LinearRegression().fit(data.reshape(1, -1), memory.reshape(1, -1))
    return reg

def construct_ANN(data, memory, epochs=10):
    ann = Sequential()
    ann.add(Dense(32, input_dim=1, activation='relu'))
    ann.add(Dense(32, activation='relu'))
    ann.add(Dense(1))
    ann.compile(loss='mean_squared_error', optimizer='adam')
    ann.fit(data.astype(np.float32), memory.astype(np.float32), epochs=10)
    return ann
    


In [4]:
data, memory = get_data('../Data/amzn.dat')

bt = construct_b_tree(data, memory)
avl = construct_AVL(data, memory)
rbt = construct_RBT(data, memory)
lr = construct_LR(data, memory)
ann = construct_ANN(data, memory)

with open('./models/models.dat', 'wb') as f:
    mods = {'bt': bt, 'avl': avl, 'rbt': rbt, 'lr': lr, 'ann': ann}
    pickle.dump(mods, f)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
glob('../Data/*.dat')[0].split('/')[-1].split('.')

['lognormal', 'dat']