In [29]:
import binascii

def shard_key(base, key, total_elements, shard_size):
    # If the key is a string that looks like an integer, use it directly.
    if isinstance(key, (int, float)) or key.isdigit():
        shard_id = int(str(key), 10) // shard_size
    else:
        shards = 2 * total_elements // shard_size
        shard_id = binascii.crc32(key.encode('utf-8')) % shards
    return f'{base}:{shard_id}'

In [35]:
shard_key('key', 'one', 10_000, 1024)

'key:10'

In [38]:
shard_key('key', 'two', 10_000, 1024)

'key:3'

## Sharding HASHes

In [32]:
def shard_hset(conn, base, key, value, total_elements, shard_size):
    shard = shard_key(base, key, total_elements, shard_size)
    return conn.hset(shard, key, value)

In [33]:
def shard_hget(conn, base, key, total_elements, shard_size):
    shard = shard_key(base, key, total_elements, shard_size)
    return conn.hget(shard, key)

In [37]:
# Sharded ip lookup.

TOTAL_SIZE = 320_000
SHARD_SIZE = 1024

def import_cities_to_redis(conn, filename):
    for row in csv.reader(open(filename)):
        # ...
        shard_hset(conn, 'cityid2city:', city_id, json.dumps([city, region, country]),
                   TOTAL_SIZE, SHARD_SIZE)

def find_city_by_ip(conn, ip_address):
    # ...
    data = shard_hget(conn, 'cityid2city:', city_id, TOTAL_SIZE, SHARD_SIZE)
    return json.loads(data)

## Sharding SETs

In [39]:
def shard_sadd(conn, base, member, total_elements, shard_size):
    shard = shard_key(base, 'x' + str(member), total_elements, shard_size)
    return conn.sadd(shard, member)

In [42]:
# Typical shard size for the intset encoding for SETs.
SHARD_SIZE = 512

def count_visit(conn, session_id):
    today = date.today()
    key = f'unique:{today.isoformat()}'
    
    # Fetch or calculate the expected number of unique views today.
    expected = get_expected(conn, key, today)
    
    # Calculate the 56-bit ID for this 128-bit UUID.
    id = int(session_id.replace('-', '')[:15], 16)
    if shard_sadd(conn, key, id, expected, SHARD_SIZE):
        conn.incr(key)

In [44]:
DAILY_EXPECTED = 1_000_000
EXPECTED = {}

def get_expected(conn, key, today):
    if key in EXPECTED:
        return EXPECTED[key]
    
    exkey = key + ':expected'
    expected = conn.get(exkey)
    
    if not expected:
        yesterday = (today - timedelta(days=1)).isoformat()
        expected = conn.get(f'unique:{yesterday}')
        expected = int(expected or DAILY_EXPECTED)
        
        # Add 50% to yesterday's count, and round up to the next even power
        # of 2, under the assumption that view count today should be at least
        # 50% better than yesterday.
        expected = 2 ** int(math.ceil(math.log(expected * 1.5, 2)))
        if not conn.setnx(exkey, expected):
            expected = conn.get(exkey)
    EXPECTED[key] = int(expected)
    return EXPECTED[key]