# Counters and Statistics

We can store the counters in Redis HASHes. 

Counter names can be as follow:
- site hits
- sales
- database queries

We can store them in time slice (1s, 5s, 1 min and so on).

To keep track of each counters, we keep a record of them in a ZSET. That way, we can periodically clear counters when the size grew.

In [2]:
PRECISION = [1, 5, 60, 300, 3600, 18000, 86400]

def update_counter(conn, name, count=1, now=None):
    # Get the current time to know which time slice to increment.
    now = now or time.time()
    pipe = conn.pipeline()
    
    for prec in PRECISION:
        # Get the start of the current time slice.
        pnow = int(now/prec) * prec
        
        # Create a named hash where this data will be stored.
        hash = f'{prec}:{name}'
            
        # Record a reference to the counter into a ZSET with the score 0
        # so we can clean up after ourselves.
        pipe.zadd('known:', hash, 0)
        
        # Update the counter for the given name and time precision.
        pipe.hincrby('count:' + hash, pnow, count)
    pipe.execute()

In [3]:
def get_counter(conn, name, precision):
    # Get the name of the key where we will be storing the data.
    hash = f'{precision}:{name}'
        
    # Fetch the counter data from Redis.
    data = conn.hgetall('count:' + hash)
    
    to_return = []
    # Convert the strings to integers.
    for key, value in data.iteritems():
        to_return.append((int(key), int(value)))
    
    # Sort our data so that older samples are first.
    to_return.sort()
    return to_return

In [5]:
SAMPLE_COUNT = 120

def clean_counters(conn):
    pipe = conn.pipeline(True)
    # Keep a record of the number of passes so that we can balance
    # cleaning out per-second vs per-day counters.
    passes = 0
    while not QUIT:
        # Get the start time of the pass to calculate the total duration.
        start = time.time()
        
        # Incrementally iterate over all known counters.
        index = 0
        while index < conn.zcard('known:'):
            hash = conn.zrange('known:', index, index)
            index += 1
            if not hash:
                break
            
            hash = hash[0]
            # Get the precision of the counter.
            prec = int(hash.partition(':')[0])
            
            # We will take a pass every 60 seconds, so we will try to clean out counters
            # at roughly the rate they are written to.
            bprec = int(prec // 60) or 1
            
            # Try the next counter if we are not supposed to check this one on this pass.
            # E.g. we have taken 3 pass, but the counter has a precision of 5 minutes.
            if passes % brep:
                continue
            
            hkey = 'count:' + hash
            # Find the cutoff time for the earliest sample that we should keep,
            # given the precision and number of samples that we want to keep.
            cutoff = time.time() - SAMPLE_COUNT * prec
            
            # Fetch the times of the samples, and convert the strings to integers.
            samples = map(int, conn.hkeys(hkey))
            samples.sort()
            
            # Determine the number of samples that needs to be removed.
            remove = bisect.bisect_right(samples, cutoff)
            if remove:
                conn.hdel(hkey, *samples[:remove])
                # The data hash may be empty.
                if remove == len(samples):
                    try:
                        pipe.watch(hkey)
                        # Verify that the counter hash is empty, and if so
                        # remove it from the known counters.
                        if not pipe.hlen(hkey):
                            pipe.multi()
                            pipe.zrem('known:', hash)
                            pipe.execute()
                            # If we deleted a counter, then we can use the same index next pass.
                            index -= 1
                        else:
                            # The hash isn't empty, keep it in the list of known containers.
                            pipe.unwatch()
                    except redis.exceptions.WatchError:
                        # Someone else changed the counter hash by adding counters, which
                        # means that it has data, so we will leave the counter in the list of known counters.
                        pass
        # Update our passes and duration variables for the next pass to clean out counters
        # as often as they are seeing the updates.
        passes += 1
        duration = min(int(time.time() - start) + 1, 60)
        # Sleep for 60 seconds, or at least 1 second.
        time.sleep(max(60 - duration, 1))