# openlis usage example

In [1]:
import openlis
import openlis.data
import openlis.model
import openlis.database
li = openlis
import numpy as np

## Set up dataset

In [2]:
## Generate a dataset of a 100,000 uniform floats betweeo 0.0 and 1.0

num_keys = 100000
key_range = [0.0, 1.0]
raw_data_set = li.data.generate_uniform_floats_data_set(num_keys, 
                                                        key_range, 
                                                        iseed=17)

In [3]:
## Split into train/validate, using 100% for training (no validation needed)

data_sets = li.data.create_train_validate_data_sets(raw_data_set, validation_size=0)

## Create a recursive-model index 

In [4]:
## Create a Recursive-model index based on the training data set

rmi = li.model.RMI_simple(data_sets.train,
                          hidden_layer_widths=[8,8],
                          num_experts=100)

## Create database interface to that model

In [5]:
## Create a learned index structure, which can be used like a database

rmi_db = li.database.IndexStructurePacked(model=rmi)

## Train the model

In [6]:
## Train the database

# May need to try different batch_sizes, max_steps, learning rates.
# Each is an array with two elements (for Stage 1 and Stage 2).

# Note that rmi_db.train() not only trains the model, but also
# calculates and saves the maximum errors for each "expert" and 
# saves the trained weights and biases for use in fast Numpy 
# inference calculations. Basically, this function does everything
# needed to get Select, Insert, and Delete ready to work.

rmi_db.train(batch_sizes=[10000,1000],
             max_steps=[500,500],
             learning_rates=[0.001,1000],
             model_save_dir='tf_checkpoints_example')

Step 0: loss = 44023.55 (0.052 sec, total 0.052 secs)
Step 100: loss = 10698.79 (0.006 sec, total 0.380 secs)
Step 200: loss = 4290.76 (0.006 sec, total 0.760 secs)
Step 300: loss = 1681.16 (0.007 sec, total 1.106 secs)
Step 400: loss = 891.59 (0.006 sec, total 1.495 secs)
Step 0: loss = 50030.11 (0.053 sec, total 1.973 secs)
Step 100: loss = 543.85 (0.012 sec, total 2.947 secs)
Step 200: loss = 195.52 (0.012 sec, total 3.860 secs)
Step 300: loss = 195.18 (0.012 sec, total 4.798 secs)
Step 400: loss = 183.91 (0.019 sec, total 5.813 secs)
INFO:tensorflow:Restoring parameters from tf_checkpoints_example/stage_2.ckpt


## Select, example usage

In [7]:
## Use Select, some examples

# Select single key
keys = np.array(data_sets.train.keys[0])
pos = rmi_db.select(keys)
print("Select single key:")
print(" Key: {}\n Pos: {}\n".format(np.squeeze(keys),np.squeeze(pos)))

# Select multiple keys
keys = np.array(data_sets.train.keys[0:5])
pos = rmi_db.select(keys)
print("Select multiple keys:")
print("Keys: {}\n Pos: {}\n".format(np.squeeze(keys),np.squeeze(pos)))

# Select non-existing key
keys = [17.0]
pos = rmi_db.select(keys)
print("Select non-existing key:")
print("Keys: {}\n Pos: {}".format(np.squeeze(keys),np.squeeze(pos)))

print("Note: Pos=-1 indicates that the key is not in the dataset.")

Select single key:
 Key: 0.1540215085518002
 Pos: 15476

Select multiple keys:
Keys: [ 0.15402151  0.1999664   0.71770092  0.21420649  0.42162701]
 Pos: [15476 20118 71921 21498 42394]

Select non-existing key:
Keys: 17.0
 Pos: -1
Note: Pos=-1 indicates that the key is not in the dataset.


## Insert, example usage

In [8]:
## Use Insert, some examples

# Insert single key
keys = np.array([0.5])
success = rmi_db.insert(keys)
pos = rmi_db.select(keys)
print("Insert single key:")
print(" Success: {}\n Key: {}\n Pos: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

# Insert multiple keys
keys = np.array([-17.0, 0.2, 0.8, 17.0])
success = rmi_db.insert(keys)
pos = rmi_db.select(keys)
print("Insert multiple keys:")
print(" Success: {}\n Keys: {}\n Pos: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))
# Insert existing key
keys = np.array([0.5])
success = rmi_db.insert(keys)
pos = rmi_db.select(keys)
print("Insert existing key:")
print(" Success: {}\n Keys: {}\n Pos: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

Insert single key:
 Success: True
 Key: 0.5
 Pos: 50115

Insert multiple keys:
 Success: [ True  True  True  True]
 Keys: [-17.    0.2   0.8  17. ]
 Pos: [     0  20121  80189 100004]

Insert existing key:
 Success: False
 Keys: 0.5
 Pos: 50117



## Delete, example usage

In [9]:
## Use Delete, some examples

# Delete single key
keys = np.array([0.5])
success = rmi_db.delete(keys)
pos = rmi_db.select(keys)
print("Delete single key:")
print(" Success: {}\n Key: {}\n Pos after deletion: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

# Delete multiple keys
keys = np.array([-17.0, 0.2, 0.8, 17.0])
success = rmi_db.delete(keys)
pos = rmi_db.select(keys)
print("Delete multiple keys:")
print(" Success: {}\n Keys: {}\n Pos after deletion: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))
# Delete non-existing key
keys = np.array([0.5])
success = rmi_db.delete(keys)
pos = rmi_db.select(keys)
print("Delete non-existing key:")
print(" Success: {}\n Keys: {}\n Pos after deletion: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

print("Note: Pos=-1 indicates that the key is not in the dataset.")

Delete single key:
 Success: True
 Key: 0.5
 Pos after deletion: -1

Delete multiple keys:
 Success: [ True  True  True  True]
 Keys: [-17.    0.2   0.8  17. ]
 Pos after deletion: [-1 -1 -1 -1]

Delete non-existing key:
 Success: False
 Keys: 0.5
 Pos after deletion: -1

Note: Pos=-1 indicates that the key is not in the dataset.


## Retrain the model if needed

In [10]:
## Retrain the model after many insertions and/or deletions.

rmi_db.train()

Step 0: loss = 48241.78 (0.047 sec, total 0.047 secs)
Step 100: loss = 18202.67 (0.006 sec, total 0.405 secs)
Step 200: loss = 4730.72 (0.006 sec, total 0.796 secs)
Step 300: loss = 2723.77 (0.011 sec, total 1.189 secs)
Step 400: loss = 1835.27 (0.011 sec, total 1.621 secs)
Step 0: loss = 48809.59 (0.062 sec, total 2.258 secs)
Step 100: loss = 394.18 (0.012 sec, total 3.199 secs)
Step 200: loss = 170.76 (0.012 sec, total 4.129 secs)
Step 300: loss = 174.48 (0.021 sec, total 5.144 secs)
Step 400: loss = 185.45 (0.013 sec, total 6.193 secs)
INFO:tensorflow:Restoring parameters from tf_checkpoints_example/stage_2.ckpt
