In [1]:
!pip install redis numpy lmdb

Collecting redis
  Downloading redis-4.5.1-py3-none-any.whl (238 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m238.5/238.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting numpy
  Downloading numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting lmdb
  Downloading lmdb-1.4.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (306 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m306.5/306.5 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting async-timeout>=4.0.2
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Installing collected packages: lmdb, numpy, async-timeout, redis
Successfully installed async-timeout-4.0.2 lmdb-1.4.0 numpy-1.24.2 redis-4.5.1


In [2]:
import redis
import uuid
import pickle
import random
import time
import numpy as np
import lmdb

In [3]:
uuids = set(str(uuid.uuid4()) for _ in range(1000000))

In [4]:
uuids = list(uuids)

In [5]:
# with open("uuids.pickle", "wb") as f:
#     pickle.dump(uuids, f)

# Redis

In [6]:
# Connect to Redis
redis_client = redis.Redis(host='localhost', port=6379)

In [7]:
# Set the feature data in Redis
for entity_id in uuids:
    feature_value = round(random.random(), 4)
    redis_client.hset("feature_popularity", entity_id, feature_value)

In [8]:
redis_client.hmget("feature_popularity", [uuids[0], uuids[1]])

[b'0.3779', b'0.6428']

## Lookup test for 1000 features

In [9]:
times = []
for _ in range(1000):
    
    ids_list = random.sample(uuids, 1000)
    
    start = time.time()
    features = redis_client.hmget("feature_popularity", ids_list)
    features = [float(feat) for feat in features]
    end = time.time()
    
    times.append((end-start)*1000)
    
print(np.mean(times), np.std(times), np.percentile(times, 0.99))

3.2391490936279297 0.11083147143054363 3.1380653381347656


# LMDB

In [10]:
lmdb_env = lmdb.open("/tmp/lmdb", map_size=int(1e9))

In [11]:
with lmdb_env.begin(write=True) as txn:
    for entity_id in sorted(uuids):
        feature_value = round(random.random(), 4)
        txn.put(f"{entity_id}__feature_popularity".encode(), str(feature_value).encode(), append=True) 

In [19]:
with lmdb_env.begin(write=False) as txn:
    cursor = txn.cursor()  
    feat = cursor.getmulti([f"{uuids[0]}__feature_popularity".encode(), f"{uuids[1]}__feature_popularity".encode()])
    print(feat)

[(b'0dcb1936-98d6-4053-8694-6a9cd35341f6__feature_popularity', b'0.9508'), (b'656b8b4f-96d3-40bc-8d34-9a70021a84ba__feature_popularity', b'0.9425')]


## Lookup test for 1000 features

In [22]:
times = []
for _ in range(1000):
    
    ids_list = random.sample(uuids, 1000)
    
    start = time.time()
    __keys = [ f"{_uuid}__feature_popularity".encode() for _uuid in ids_list]
    with lmdb_env.begin(write=False) as txn:
        cursor = txn.cursor()
        features = cursor.getmulti(__keys)
    features = [float(feat[1]) for feat in features]
    end = time.time()
    
    times.append((end-start)*1000)
    
print(np.mean(times), np.std(times), np.percentile(times, 0.99))

1.1198091506958008 0.3922207714158349 0.9849071502685547
