In [10]:
!pip install -e .. -q

In [5]:
import os
import hashlib
import unittest
import urllib.request

import cachejar

`ByteSum` computes an MD5 on a supplied URL.  Big files can take a while

In [8]:
class ByteSum:
    # An MD5 summary of a URL
    def __init__(self, url):
        self.hash = hashlib.md5(urllib.request.urlopen(url).read()).hexdigest()

`cachejar`'s are identified by "appid" (could be a python package).  Each jar contains a map from a file name or url to a list of signatures (length, timestamp and other identifying information) and an associated object image.  If a file or url changes, the cached image will not be returned.

In the example below, we cache checksums for a large url:

In [9]:
appid = 'example_notebbook'
url = "http://hl7.org/fhir/fhir.ttl"

# Connect to the cachejar for this application
jar = cachejar.jar(appid)

# Remove any existing data
jar.clear()

# Invoke the same operation twice
for _ in range(1, 3):
    obj = jar.object_for(url, ByteSum)
    if obj:
        print("Retrived from cache")
    else:
        print("Not cached - computing md5")
        obj = ByteSum(url)
        jar.update(url, obj, ByteSum)
    print(f"MD5 for {url} = {obj.hash}")

Not cached - computing md5
MD5 for http://hl7.org/fhir/fhir.ttl = 8ce6c3545b7a238f0091abe05dbcb7dd
Retrived from cache
MD5 for http://hl7.org/fhir/fhir.ttl = 8ce6c3545b7a238f0091abe05dbcb7dd
