# IPFS Concepts - SCRATCHPAD

## IPFS Service Node

A node that is running the IPFS daemon and is connected to the IPFS network. It is able to provide content to other nodes and to retrieve content from other nodes.

In [37]:
# docker compose -f "tests/ipfs/docker-compose.yml" up -d --build

### Default Server Address

In [38]:
__IPFS_DEFAULT_URL__ = '/ip4/127.0.0.1/tcp/5001'

## IPFS-Toolkit

In [39]:
# pip install IPFS-Toolkit

### Connect

In [40]:
import ipfs_api

# Connect to the IPFS daemon
def connect():
    client = ipfs_api.ipfshttpclient
    return client.connect(__IPFS_DEFAULT_URL__)

#### Check Connection

In [41]:
def print_status():
    ipfs = connect()
    print('Client ID: ', ipfs.id())
    print('Client Version: ', ipfs.version())

print_status()

Client ID:  <ipfshttpclient2.client.base.ResponseBase: {'ID': '12D3KooWNTinNPaFXPyoo2wGcrjFC4uekMgDM5pFY46zwxxgL8HA', 'PublicKey': 'CAESILveCqhrvYi7xfZkCFoytPVThX++ztHAh85jBT3xQ0Et', 'Addresses': ['/ip4/127.0.0.1/tcp/4001/p2p/12D3KooWNTinNPaFXPyoo2wGcrjFC4uekMgDM5pFY46zwxxgL8HA', '/ip4/127.0.0.1/udp/4001/quic-v1/p2p/12D3KooWNTinNPaFXPyoo2wGcrjFC4uekMgDM5pFY46zwxxgL8HA', '/ip4/127.0.0.1/udp/4001/quic-v1/webtransport/certhash/uEiAsG_OZi39bk9tp4XO6luYhWxo_vmHB30yD1HFNsCV1hg/certhash/uEiBU9zqLAAbnHBe6T6br9Bg29WLOe4hW0nsQrOBKeoPadA/p2p/12D3KooWNTinNPaFXPyoo2wGcrjFC4uekMgDM5pFY46zwxxgL8HA', '/ip4/144.202.90.11/tcp/4001/p2p/12D3KooWBvuV5RKq8WqhGhWTm25uJwyjBp8RV14sJQ9JwPiTu1KX/p2p-circuit/p2p/12D3KooWNTinNPaFXPyoo2wGcrjFC4uekMgDM5pFY46zwxxgL8HA', '/ip4/144.202.90.11/udp/4001/quic-v1/p2p/12D3KooWBvuV5RKq8WqhGhWTm25uJwyjBp8RV14sJQ9JwPiTu1KX/p2p-circuit/p2p/12D3KooWNTinNPaFXPyoo2wGcrjFC4uekMgDM5pFY46zwxxgL8HA', '/ip4/144.202.90.11/udp/4001/quic/p2p/12D3KooWBvuV5RKq8WqhGhWTm25uJwyjBp8RV14sJQ9JwPiT

### Publish

#### Strings

In [42]:
def add_str(string: str):
    with connect() as c:
        return c.add_str(string)

returned_hash = add_str(u'Hello World!')
print(returned_hash)

Qmf1rtki74jvYmGeqaaV51hzeiaa6DyWc98fzDiuPatzyy


#### Bytes

In [43]:
from cid import make_cid

def add_bytes(data: bytes):
    with connect() as c:
        return c.add_bytes(data)
    
returned_hash = add_bytes(b'Hello World!')

# convert to CIDv1
returned_hash = make_cid(returned_hash).to_v1()

print(returned_hash.__dict__)

print(returned_hash)

{'_version': 1, '_codec': 'dag-pb', '_multihash': b'\x12 \xf7\xc6k\x1c\x005\xd5-\\\x15\xbe\x7f\x07\xe8F\xe43\xcc\xa9g\x18n\x16\xc31mYW\xc5*\x944'}
zdj7Wn77GBmxYDXRUaaaKY6wPYZGvn1omPeUTYCf27aD2Tu9u


#### JSON

In [44]:
from cid import CIDv0, CIDv1, make_cid

def add_json(json: dict):
    with connect() as c:
        return c.add_json(json)
    
returned_hash = add_json({'hello': 'world'})

# convert to CIDv1
returned_hash = make_cid(returned_hash).to_v1()

print(returned_hash)

zdj7WVwV64Uhe5ZpkZvh7qm6UgHrRmJcUR6Lp6amyvRtX8opi


#### Files

In [45]:
def add_file(path: str):
    with connect() as c:
        return c.add(path)
    
returned_hash = add_file('concepts_cid_test.json')
print(returned_hash['Hash'])

print(returned_hash)
cidv1 = make_cid(returned_hash['Hash']).to_v1()
print(cidv1)

QmUJarh6GuMaNgpkvkC1CoRk8qDk6J39CopoYi9fVdSEiv
<ipfshttpclient2.client.base.ResponseBase: {'Name': 'concepts_cid_test.json', 'Hash': 'QmUJarh6GuMaNgpkvkC1CoRk8qDk6J39CopoYi9fVdSEiv', 'Size': '349'}>
zdj7WbPqE8A8NqBFQ8gfV9d5BGJm3RBoqTH58ELDWYLK518tr


## Hash calculation

In [46]:
# pip install pycroptodome
# pip install py-cid
# pip install merkly

In [47]:
import io
import base58
import json
# import BytesIO
from typing import Callable, Iterable
from cid import CIDv1, make_cid, CIDv0
# from multiformats_cid import make_cid, CIDv0, CIDv1, cid
# from Crypto.Hash import SHA256
import hashlib
from merkly.mtree import MerkleTree
import multihash
import codecs
import ipfs_api
import multibase

multipart = ipfs_api.ipfshttpclient.client.multipart

def as_chunks(stream: io.BytesIO, chunk_size: int) -> Iterable[bytes]:
    while len(chunk := stream.read(chunk_size)) > 0:
        yield chunk

def chunk_to_leaf(chunk: bytes) -> bytes:
    return hashlib.sha256(chunk).digest()

def data_to_tree(data: bytes, chunk_size: int) -> list[str]:
    # Create a buffer to hold the tree
    tree: list[bytes] = []

    # Iterate over the chunks
    for chunk in as_chunks(io.BytesIO(data), chunk_size):
        # Calculate the leaf hash
        tree.append(chunk_to_leaf(chunk))
        # print(tree)

    # Return the tree
    return tree

def tree_to_root(tree: list[str]) -> str:
    sha256_hash_funciton: Callable[[str], str] = lambda x, y: str(hashlib.sha256(x.encode() + y.encode()).hexdigest())

    # Calculate the root hash
    mt = MerkleTree(tree, sha256_hash_funciton)
    root = mt.root
    print(f'raw_root: {root}')
    # root = codecs.decode(root, 'hex')
    # print('root:', root.hex())
    return root

def calculate_ipfs_hash(data: bytes):
    # Calculate the SHA256 hash of the data
    sha256_hash = hashlib.sha256(data).digest()

    # Create a multihash for the SHA256 hash
    # Multihash format: <hash function code><digest size><digest>
    # SHA2-256 function code is 0x12, digest size is 0x20 (32 in decimal)
    mh = b'\x12\x20' + sha256_hash

    # Create a CIDv0
    # cidv0: CIDv0 = CIDv0(mh)

    # return cidv0

    # Create a CIDv0 for the multihash
    c_old: CIDv0 = make_cid(base58.b58encode(mh))
    print('CIDv0:', c_old)
    print(f'CIDv0 to CIDv1: {c_old.to_v1()}')

    c: CIDv1 = make_cid(1, 'dag-pb', mh)
    # c.buffer = c.encode()

    return str(c)

def calculate_cid_from_tree(data: bytes, chunk_size: int | None = 256 * 1024) -> str:
    # Calculate the tree
    # tree = data_to_tree(data, chunk_size)

    multipart_encoder = multipart.stream_bytes(data, chunk_size=chunk_size)

    # Calculate the root hash
    # root = tree_to_root(tree)
    # print('root:', root)
    # root_hashed_256 = hashlib.sha256(root.encode()).digest()
    print(f'root_hashed_256: {multipart_encoder[1]}')
    root_hashed_256 = json.dumps(multipart_encoder[1]).encode()
    # print(f'root_hashed_256: {root_hashed_256.hex()}')

    root_hashed_256 = hashlib.sha256(root_hashed_256).digest()
    mh = multihash.encode(root_hashed_256, 'sha2-256')
    print('mh:', mh)

    # Create a CIDv0 for the root hash
    # mh_root = b'\x12\x20' + codecs.decode(root, 'hex')
    # mh_root = b'\x12\x20' + codecs.decode(root, 'hex')
    mh_root = b'\x12\x20' + root_hashed_256
    print('mh_root:', mh_root)
    c_old: CIDv0 = make_cid(base58.b58encode(mh_root))
    print('CIDv0:', c_old)

    # Create a CIDv1 for the root hash
    cidv0_2: CIDv1= make_cid(1, 'dag-pb', mh_root)
    print('CIDv0_2:', cidv0_2)
    # print('CIDv0_2:', cidv0_2.to_v1())

    # Return the CIDv0
    return cidv0_2

# Read the file data
with open('concepts_cid_test.json', 'rb') as f:
    data = f.read()

# Calculate the IPFS hash
# ipfs_hash = calculate_ipfs_hash(data)
print(f'hash from mt: {calculate_cid_from_tree(data)}')
print(f'hash from ipfs add function: {returned_hash["Hash"]}')
# print(f'cidv1: {calculate_cid_from_tree(data).to_v1()}')

# print('IPFS Hash:', ipfs_hash)

root_hashed_256: {'Content-Disposition': 'form-data; name="file"; filename="bytes"', 'Content-Type': 'multipart/form-data; boundary="f58a35bf4148471c9a4c6f7fb3c2e358"'}
mh: b'\x12 \x86\x805\xdc\xc6M\xc9\xddd8c\xaf%a\x0b\xae@\\\xfa\xe7l\xb1\x85\x83\xc7=Y=\xe1\tZg'
mh_root: b'\x12 \x86\x805\xdc\xc6M\xc9\xddd8c\xaf%a\x0b\xae@\\\xfa\xe7l\xb1\x85\x83\xc7=Y=\xe1\tZg'
CIDv0: QmXPgjMDoCBxB1tECdoUxwMhhckhjgtDNedpaZswrjZWt6
CIDv0_2: zdj7WeUw6nHeffZ3jC9wNm6qKCGKpx9TEJMEy3MFNGcgB8R42
hash from mt: zdj7WeUw6nHeffZ3jC9wNm6qKCGKpx9TEJMEy3MFNGcgB8R42
hash from ipfs add function: QmUJarh6GuMaNgpkvkC1CoRk8qDk6J39CopoYi9fVdSEiv


#### Create IPLD object

In [48]:
import multihash
import cid

class IPLDObject:
    def __init__(self, data: bytes):
        self.data: bytes = data
        self.links = {}

    def add_link(self, name, other):
        self.links[name] = other.cid()

    

    def cid(self):
        # Calculate the multihash of the data
        mh = multihash.encode(hashlib.sha256(self.data).digest(), 'sha2-256')

        # Create a CID for the multihash
        return cid.make_cid(0, 'dag-pb', mh).encode()

    def serialize(self):
        # Serialize the data and links
        return {
            'data': self.data,
            'links': [{name: cid for name, cid in self.links.items()}],
        }

    @staticmethod
    def deserialize(serialized):
        # Deserialize the data and links
        obj = IPLDObject(serialized['data'])
        obj.links = serialized['links']
        return obj
        
# Create the objects
obj1 = IPLDObject(b'Hello World!')
obj2 = IPLDObject(b'Hello IPFS!')

# Link the objects
# obj1.add_link('next', obj2)
# obj2.add_link('prev', obj1)

# Serialize the objects
serialized = obj1.serialize()
serialized_two = obj2.serialize()

# Deserialize the objects
# obj1 = IPLDObject.deserialize(serialized)
# obj2 = IPLDObject.deserialize(serialized['links']['next'])

# convert to CIDv1
obj1 = make_cid(obj1.cid()).to_v1()

# Print the CIDs
print('obj1 CID:', obj1)
# print('obj2 CID:', obj2.cid())

# Print the links
# print('obj1 links:', obj1.links)
# print('obj2 links:', obj2.links)


obj1 CID: zdj7We1fKtuSjWA1zhER12jc7mydHG43F22AyC8eLrBaBF9Da


In [49]:
import cbor2

class IPLDObject:
    def __init__(self, data):
        self.data = data
        self.links = {}

    def add_link(self, name, other):
        self.links[name] = other.cid()

    def cid(self, version=1):
        # Encode the data and links in CBOR
        encoded = cbor2.dumps({'data': self.data, 'links': self.links})
        print('encoded:', encoded)

        # Calculate the multihash of the encoded data
        mh = multihash.encode(hashlib.sha256(encoded).digest(), 'sha2-256')
        # mh = multihash.encode(encoded, 'sha2-256')

        if version == 1:
        # Create a CIDv1 for the multihash
            return cid.make_cid(1, 'dag-cbor', mh).encode()
        else:
            print('CIDv0 cannot be created from CIDv1 with cbor encoding')

    def serialize(self) -> bytes:
        # Serialize the data and links
        return cbor2.dumps({'data': self.data, 'links': self.links})

    @staticmethod
    def deserialize(serialized):
        # Deserialize the data and links
        obj = IPLDObject(cbor2.loads(serialized)['data'])
        obj.links = cbor2.loads(serialized)['links']
        return obj
    
# Create the objects
obj1 = IPLDObject(b'Hello World!')
# obj2 = IPLDObject(b'Hello IPFS!')

# Link the objects
# obj1.add_link('next', obj2)
# obj2.add_link('prev', obj1)

# Serialize the objects
serialized = obj1.serialize()
# serialized_two = obj2.serialize()
print('serialized:', serialized)

# Deserialize the objects
obj1 = IPLDObject.deserialize(serialized)
# obj2 = IPLDObject.deserialize(serialized_two)

# Print the CIDs
print('obj1 CID:', obj1.cid())
# print('obj2 CID:', obj2.cid())

serialized: b'\xa2ddataLHello World!elinks\xa0'
encoded: b'\xa2ddataLHello World!elinks\xa0'
obj1 CID: b'zdpuAwHj7GmEXwDjMT34HzPWrDec9qZSWFofHUi4FAzeJDUNJ'


In [50]:
import json
import multihash
import cid

class IPLDObject:
    def __init__(self, data):
        self.data = data
        self.links = {}

    def add_link(self, name, other):
        self.links[name] = other.cid()

    def cid(self):
        # Encode the data and links in JSON
        encoded = json.dumps({'data': self.data, 'links': self.links}).encode('utf-8')
        print('encoded:', encoded)

        # Calculate the multihash of the encoded data
        mh = multihash.encode(hashlib.sha256(encoded).digest(), 'sha2-256')

        # Create a CIDv1 for the multihash
        return cid.make_cid(1, 'dag-json', mh)

    def serialize(self) -> str:
        # Serialize the data and links
        return json.dumps({'data': self.data, 'links': self.links})

    @staticmethod
    def deserialize(serialized):
        # Deserialize the data and links
        obj_dict = json.loads(serialized)
        obj = IPLDObject(obj_dict['data'])
        obj.links = obj_dict['links']
        return obj

# Create the objects
obj1 = IPLDObject('Hello World!')
obj2 = IPLDObject('Hello IPFS!')

# Link the objects
# obj1.add_link('next', obj2)
# obj2.add_link('prev', obj1)

# Serialize the objects
serialized = obj1.serialize()
serialized_two = obj2.serialize()

# Deserialize the objects
obj1 = IPLDObject.deserialize(serialized)
obj2 = IPLDObject.deserialize(serialized_two)

# Print the CIDs
print('obj1 CID:', obj1.cid())
print('obj2 CID:', obj2.cid())


encoded: b'{"data": "Hello World!", "links": {}}'
obj1 CID: z4EBG9j6p5ReZnuDBNmJ8a6imnkxz7CFwkhtTaoBjeAX6B2SiAW
encoded: b'{"data": "Hello IPFS!", "links": {}}'
obj2 CID: z4EBG9j8KgufqvLDcw6UukLChJKffi5f49KFk6LUjFs1kuDfurp


In [51]:
# import cbor2
import hashlib
from multihash import encode as multihash_encode
from cid import make_cid

# Your data
data = {'hello': 'world'}

# Serialize the data using DAG-CBOR
serialized = json.dumps(data).encode('utf-8')

# Hash the serialized data using SHA-256
hashed = hashlib.sha256(serialized).digest()

# Encode the hash as a multihash
mh = multihash_encode(hashed, 'sha2-256')

# Encode the multihash as a CIDv1 with the DAG-CBOR codec
cid_ = make_cid(1, 'dag-json', mh)

print(cid_)



z4EBG9j6YnWGhq3XEDXD1Ji8CJND3bY4NvrCo1qwPeqCfrxPuzx


In [52]:
import cid
import cbor2
from multihash import decode as multihash_decode

# # Your CID
# cid_str = cid_.buffer.hex()

# # Create a CID object
# cid__ = cid.CIDv1('json', cid_str)
# print('cid:', cid__)

# Decode the multihash
mh = multihash_decode(cid_.multihash)
print('mh:', mh)

# Hash function used (e.g., 'sha2-256')
hash_function = mh.name
print('hash_function:', hash_function)

# Original hash (as bytes)
original_hash = mh.digest
print('original_hash: ', original_hash.hex())


# If you have the original serialized data, you can deserialize it
# For example, if the data was serialized using DAG-CBOR:
# serialized_data = ...
# data = cbor2.loads(serialized_data)

mh: Multihash(code=18, name='sha2-256', length=32, digest=b'_\x8f\x04\xf6\xa3\xa8\x92\xaa\xab\xbd\xdbl\xf2s\x89D\x93w9`\xd4\xa3%\xb1\x05\xfe\xe4n\xefC\x04\xf1')
hash_function: sha2-256
original_hash:  5f8f04f6a3a892aaabbddb6cf273894493773960d4a325b105fee46eef4304f1


### IPLD

In [53]:
# pip install ipld - Not working on Python 3.12.0

In [54]:
# from ipld import Ipld, Link

# # Create a node
# node = Ipld()

# # Add data to the node
# node['data'] = b'Hello, world!'

# # Create a link to another node
# link = Link('/ipfs/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o')
# node['link'] = link

# # Serialize the node
# serialized_node = node.serialize()

# # Calculate the CID of the node
# cid = node.cid()

# print('CID:', cid)