# openlis usage example

In [1]:
import openlis
import openlis.data
import openlis.model
import openlis.database
li = openlis
import numpy as np

## Set up dataset

In [2]:
## Generate a dataset of a 100,000 uniform floats betweeo 0.0 and 1.0

num_keys = 100000
key_range = [0.0, 1.0]
raw_data_set = li.data.generate_uniform_floats(num_keys, 
                                                        key_range, 
                                                        iseed=17)

In [3]:
## Split into train/validate, using 100% for training (no validation needed)

data_sets = li.data.create_train_validate_data_sets(raw_data_set, validation_size=0)

## Create a recursive-model index 

In [4]:
## Create a Recursive-model index based on the training data set

rmi = li.model.RMI_simple(data_sets.train,
                          hidden_layer_widths=[8,8],
                          num_experts=100)

## Create database interface to that model

In [5]:
## Create a learned index structure, which can be used like a database.
## Choose either IndexStructurePacked or IndexStructureGapped.

# IndexStructureGapped is faster for insertions and deletions.
rmi_db = li.database.IndexStructureGapped(model=rmi, scale=3)
# If using IndexStructureGapped, you can rescale the array at any time.
rmi_db.rescale(scale=2)

# IndexStructurePacked uses less space.
# Comment the above code and uncomment the following code if you 
# want to use IndexStructurePacked instead.
#rmi_db = li.database.IndexStructurePacked(model=rmi)

## Train the model

In [6]:
## Train the database

# May need to try different batch_sizes, max_steps, learning rates.
# Each is an array with two elements (for Stage 1 and Stage 2).

# Note that rmi_db.train() not only trains the model, but also
# calculates and saves the maximum errors for each "expert" and 
# saves the trained weights and biases for use in fast Numpy 
# inference calculations. Basically, this function does everything
# needed to get Select, Insert, and Delete ready to work.

rmi_db.train(batch_sizes=[10000,1000],
             max_steps=[500,500],
             learning_rates=[0.001,1000],
             model_save_dir='tf_checkpoints_example')

Stage 1 Training:
Step 0: loss = 63851.57 (0.054 sec, total 0.054 secs)
Step 100: loss = 41941.07 (0.008 sec, total 0.480 secs)
Step 200: loss = 28179.98 (0.008 sec, total 0.900 secs)
Step 300: loss = 21735.10 (0.008 sec, total 1.328 secs)
Step 400: loss = 17722.35 (0.008 sec, total 1.835 secs)

Stage 2 Training:
Step 0: loss = 107819.66 (0.064 sec, total 2.518 secs)
Step 100: loss = 20163.71 (0.012 sec, total 3.477 secs)
Step 200: loss = 2616.37 (0.012 sec, total 4.408 secs)
Step 300: loss = 1140.00 (0.012 sec, total 5.373 secs)
Step 400: loss = 1063.47 (0.012 sec, total 6.321 secs)
INFO:tensorflow:Restoring parameters from tf_checkpoints_example/stage_2.ckpt


## Select, example usage

In [7]:
## Use Select, some examples

# Select single key
keys = np.array(data_sets.train.keys[0])
pos = rmi_db.select(keys)
print("Select single key:")
print(" Key: {}\n Pos: {}\n".format(np.squeeze(keys),np.squeeze(pos)))

# Select multiple keys
keys = np.array(data_sets.train.keys[0:5])
pos = rmi_db.select(keys)
print("Select multiple keys:")
print("Keys: {}\n Pos: {}\n".format(np.squeeze(keys),np.squeeze(pos)))

# Select non-existing key
keys = [17.0]
pos = rmi_db.select(keys)
print("Select non-existing key:")
print("Keys: {}\n Pos: {}".format(np.squeeze(keys),np.squeeze(pos)))

print("Note: Pos=-1 indicates that the key is not in the dataset.")

Select single key:
 Key: 0.1540215085518002
 Pos: 30952

Select multiple keys:
Keys: [ 0.15402151  0.1999664   0.71770092  0.21420649  0.42162701]
 Pos: [ 30952  40236 143842  42996  84788]

Select non-existing key:
Keys: 17.0
 Pos: -1
Note: Pos=-1 indicates that the key is not in the dataset.


## Insert, example usage

In [8]:
## Use Insert, some examples

# Insert single key
keys = np.array([0.5])
success = rmi_db.insert(keys)
pos = rmi_db.select(keys)
print("Insert single key:")
print(" Success: {}\n Key: {}\n Pos: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

# Insert multiple keys
keys = np.array([-42.0, -17.0, 0.2, 0.8, 17.0, 42.0])
success = rmi_db.insert(keys)
pos = rmi_db.select(keys)
print("Insert multiple keys:")
print(" Success: {}\n Keys: {}\n Pos: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))
# Insert existing key
keys = np.array([0.5])
success = rmi_db.insert(keys)
pos = rmi_db.select(keys)
print("Insert existing key:")
print(" Success: {}\n Keys: {}\n Pos: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

Insert single key:
 Success: True
 Key: 0.5
 Pos: 100229

Insert multiple keys:
 Success: [ True  True  True  True  True  True]
 Keys: [-42.  -17.    0.2   0.8  17.   42. ]
 Pos: [     0      1  40239 160371 199998 199999]

Insert existing key:
 Success: False
 Keys: 0.5
 Pos: 100229



## Delete, example usage

In [9]:
## Use Delete, some examples

# Delete single key
keys = np.array([0.5])
success = rmi_db.delete(keys)
pos = rmi_db.select(keys)
print("Delete single key:")
print(" Success: {}\n Key: {}\n Pos after deletion: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

# Delete multiple keys
keys = np.array([-42.0, -17.0, 0.2, 0.8, 17.0, 42.0])
success = rmi_db.delete(keys)
pos = rmi_db.select(keys)
print("Delete multiple keys:")
print(" Success: {}\n Keys: {}\n Pos after deletion: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))
# Delete non-existing key
keys = np.array([0.5])
success = rmi_db.delete(keys)
pos = rmi_db.select(keys)
print("Delete non-existing key:")
print(" Success: {}\n Keys: {}\n Pos after deletion: {}\n".format(np.squeeze(success),
                                                   np.squeeze(keys),
                                                   np.squeeze(pos)))

print("Note: Pos=-1 indicates that the key is not in the dataset.")

Delete single key:
 Success: True
 Key: 0.5
 Pos after deletion: -1

Delete multiple keys:
 Success: [ True  True  True  True  True  True]
 Keys: [-42.  -17.    0.2   0.8  17.   42. ]
 Pos after deletion: [-1 -1 -1 -1 -1 -1]

Delete non-existing key:
 Success: False
 Keys: 0.5
 Pos after deletion: -1

Note: Pos=-1 indicates that the key is not in the dataset.


## Retrain the model if needed

In [10]:
## Retrain the model after many insertions and/or deletions.

rmi_db.train()

Stage 1 Training:
Step 0: loss = 39780.80 (0.048 sec, total 0.048 secs)
Step 100: loss = 41813.46 (0.006 sec, total 0.398 secs)
Step 200: loss = 18912.00 (0.006 sec, total 0.908 secs)
Step 300: loss = 8783.54 (0.006 sec, total 1.286 secs)
Step 400: loss = 4011.18 (0.009 sec, total 1.668 secs)

Stage 2 Training:
Step 0: loss = 102250.53 (0.061 sec, total 2.197 secs)
Step 100: loss = 20968.05 (0.012 sec, total 3.276 secs)
Step 200: loss = 2277.74 (0.012 sec, total 4.249 secs)
Step 300: loss = 427.80 (0.012 sec, total 5.198 secs)
Step 400: loss = 408.62 (0.020 sec, total 6.154 secs)
INFO:tensorflow:Restoring parameters from tf_checkpoints_example/stage_2.ckpt
