In [1]:
# pip install json
# pip install hashlib
# pip install multiformats_cid

In [None]:
import json
import hashlib

def hash_data(data):
    
    return hashlib.sha256(json.dumps(data).encode()).hexdigest()

In [None]:
import geopandas as gpd
import pygeohash as pgh
import multihash as mh
from multiformats_cid import cid
import multicodec
import multibase

In [None]:
def merkle_tree(hash_list):
    if len(hash_list) == 1:
        return hash_list[0]
    
    new_hash_list = []
    
    # Process pairs. If odd number, the last is skipped for now.
    for i in range(0, len(hash_list) - 1, 2):
        new_hash_list.append(hash_data(hash_list[i] + hash_list[i+1]))

    # If odd number of hashes, hash the last item twice
    if len(hash_list) % 2 == 1:
        new_hash_list.append(hash_data(hash_list[-1] + hash_list[-1]))

    # Recursive call
    return merkle_tree(new_hash_list)

In [None]:
# Load GeoJSON file
with open("../data/naive.geojson", "r") as f:
    geojson = json.load(f)

# Extract features and hash them
feature_hashes = [hash_data(json.dumps(feature)) for feature in geojson['features']]

# Create Merkle Tree
merkle_root = merkle_tree(feature_hashes)

print(f"Merkle Root: {merkle_root}")

In [2]:
#cryptographic hash function
hash_data(geojson)

'cf967acf8ea0f56b0444894a2b81409e74447e3259bdda79b1f7d3d4abf08a95'

In [4]:
x,y = gpd.GeoDataFrame.from_features(geojson).geometry.get_coordinates().iloc[0]

In [5]:
def geohash_from_geojson(gj,precision=8):
    gdf = gpd.GeoDataFrame.from_features(gj)
    if len(gdf) < 1:
        #invalid geometry results in all 'a' which represent no geometry
        return 'a' * precision
    x,y = gpd.GeoDataFrame.from_features(gj).geometry.get_coordinates().iloc[0]
    return pgh.encode(latitude=y, longitude=x, precision=precision)


In [42]:
#geographical hash function
geohash_from_geojson(geojson)

'9q8yv93y'

In [6]:

ghsh2 = geohash_from_geojson(geojson)+hash_data(geojson)

In [46]:
mh.encode(bytes(ghsh2,'utf-8'),0x01)

b'\x01H9q8yv93ycf967acf8ea0f56b0444894a2b81409e74447e3259bdda79b1f7d3d4abf08a95'

In [13]:
#new CID
geocid = cid.CIDv1('dag-pb',mh.encode(bytes(ghsh2,'utf-8'),0x01))

In [14]:
geocid

CIDv1(version=1, codec=dag-pb, multihash=b'\x01H9q8yv93ycf967acf8e..')

In [15]:
cidbytes = geocid.encode('base32')

In [48]:
cidbytes

b'bafyacsbzoe4hs5rzgn4wgzrzgy3wcy3ghbswcmdggu3gembugq2dqojumezgeobrgqydszjxgq2din3fgmzdkolcmrsgcnzzmiywmn3egnsdiylcmyydqyjzgu'

In [None]:
# decyphering 

In [17]:
cid.from_bytes(cidbytes)

CIDv1(version=1, codec=dag-pb, multihash=b'\x01H9q8yv93ycf967acf8e..')

In [21]:
cid = multibase.decode(cidbytes)

In [22]:
cid

b'\x01p\x01H9q8yv93ycf967acf8ea0f56b0444894a2b81409e74447e3259bdda79b1f7d3d4abf08a95'

In [23]:
data = bytes(cid[1:])
version = int(cid[0])
codec = multicodec.get_codec(data)
multihash = multicodec.remove_prefix(data)

In [40]:
#retrieve geohash
ghash = mh.decode(multihash).digest[:-64].decode("utf-8")

In [41]:
ghash

'9q8yv93y'

In [None]:
#bingo we got the geohash