https://github.com/multiformats/cid/blob/ef1b2002394b15b1e6c26c30545fd485f2c4c138/README.md#decoding-algorithm
https://stackoverflow.com/a/51304779/6794086
https://github.com/jgraef/python3-pb2nano/blob/master/pb2nano/writer.py
https://github.com/ipfs/go-unixfs/blob/master/pb/unixfs.proto

## 

In [261]:
from pb2nano.protocol import Pb2Enum, Pb2Message, Pb2Protocol

DataType = Pb2Enum("DataType") \
           .define("Raw", 0) \
           .define("Directory", 1) \
           .define("File", 2) \
           .define("Metadata", 3) \
           .define("Symlink", 4)

Data = Pb2Message("Data")\
       .field("required", "DataType", "Type", 1) \
       .field("optional", "bytes", "Data", 2) \
       .field("optional", "uint64", "filesize", 3) \
       .field("repeated", "uint64", "blocksize", 4)

Metadata = Pb2Message("Metadata")\
           .field("required", "string", "MimeType", 1)

UnixFsProtocol = Pb2Protocol()\
                 .enum(DataType)\
                 .message(Data)\
                 .message(Metadata)

In [266]:
import pb2nano
import pb2nano.writer

In [307]:
with open('./test.txt', 'wb') as f:
    ww = pb2nano.writer.Pb2WireWriter(f)

    w = pb2nano.writer.Pb2Writer(ww, UnixFsProtocol, UnixFsProtocol.messages['Data'])
    w.write({
        'Type': 'File',
        'Data': b'hello',
    })

with open('./test.txt', 'r') as f:
    content = f.read()

## 

In [49]:
import base58

In [81]:
import multihash

In [1]:
import sys
from pathlib import Path

repo_dir = Path('..').resolve()
assert repo_dir.name == 'hicetnunc-dataset', repo_dir
if str(repo_dir) not in sys.path:
    sys.path.append(str(repo_dir))

import src.reload; src.reload.reload()

In [72]:
import cid

In [104]:
multihash.decode(c.multihash)

Multihash(code=18, name='sha2-256', length=32, digest=b'\xadc\x85j\x8b3\xe8D\x07\xe0\xdaW\xe9\xc9\xb1\x8b\x07UF\xd8n.K=8N\xaf\x95\x06\xcd\x05h')

In [103]:
c = make_cid('Qma1VFw4VhCBcZM5wFXgeqLwYuFcxyVFGRFk6XKYM9xa7H')

In [79]:
base58.b58decode('Qma9cbtRfqsptPs3fuJ9xDUGK1zs4mccs4n8x5gM91HLUU')

b'\x12 \xafxx\xb8\x9c\xcb\xd7\x05\xa4\xb5r\xd4\x1f\xe2\x8a\xbf_\x86\xb6oB\xf8\x1c9\x0e\xa3\xc7\x9bPj3\xe5'

In [80]:
base58.b58encode(m.digest())

b'6UxYbEZZtiko2CxQAE5R8bcCJqUvd8Vmx5a7qJthFpvq'

In [194]:
## https://github.com/multiformats/cid/blob/ef1b2002394b15b1e6c26c30545fd485f2c4c138/README.md#decoding-algorithm

In [329]:
len(fpath.read_bytes())

599

In [335]:
import hashlib
import base64

for no, fpath in enumerate(src.config.ipfs0_dir.iterdir()):
    if no == 0:
        break

with open('./test.txt', 'wb') as f:
    ww = pb2nano.writer.Pb2WireWriter(f)

    w = pb2nano.writer.Pb2Writer(ww, UnixFsProtocol, UnixFsProtocol.messages['Data'])
    w.write({
        'Type': 'File',
        'Data': fpath.read_bytes(),
        # 'filesize': len(fpath.read_bytes()),
        # 'blocksize': [],
    })

m = hashlib.sha256()
m.update(Path('./test.txt').read_bytes())
m.digest()

assert fpath.name.startswith('Qm')
assert len(fpath.name) == 46

# c = cid.make_cid(fpath.name)
c = cid.from_string(fpath.name)
mh = multihash.decode(c.multihash)
mh.digest, m.digest()

(b'\xadDi\x17b\x1f$\xf9\xf7\xae\xcd\x92\xd8\x01\xbdI\xf9\x8c/.7\x9c\xa4\xbd\xeb\xec\xaa\xc4_\xab\xaaG',
 b'\x1b\xaf\xd6\x19uH\xf4z\x03.\x90E\xb4Z\xdb\xe6\x13\xf1\xe5\x04\xd6x;\x14(\x9f\x02p:%b\xc9')

In [248]:
cd = cid.CIDv0(m.digest())

In [249]:
cd.encode()

b'FqwCLKvMoJ16A8XBEfNHPAH1neBbM7CavjgF9apqD15L'

In [252]:
multihash.to_b58_string(mh.digest)

'DF6m9kvZLuBeQ28YZKsxCXtAqXFa3FbMhYDfMoz1NQTv'

In [251]:
multihash.to_b58_string(m.digest())

'FqwCLKvMoJ16A8XBEfNHPAH1neBbM7CavjgF9apqD15L'

In [253]:
fpath.name, len(fpath.name)

('QmaakUFtv57tfGzfYibXbRrhcAM17AguA3mrEX3yPKa6Xv', 46)

In [78]:
base58.b58encode(m.digest())

b'6UxYbEZZtiko2CxQAE5R8bcCJqUvd8Vmx5a7qJthFpvq'

In [40]:
import binascii

B58_DIGITS = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'

def encode(b):
    """Encode bytes to a base58-encoded string"""

    # Convert big-endian bytes to integer
    n = int('0x0' + binascii.hexlify(b).decode('utf8'), 16)

    # Divide that integer into bas58
    res = []
    while n > 0:
        n, r = divmod(n, 58)
        res.append(B58_DIGITS[r])
    res = ''.join(res[::-1])

    # Encode leading zeros as base58 zeros
    czero = b'\x00'
    if sys.version > '3':
        # In Python3 indexing a bytes returns numbers, not characters.
        czero = 0
    pad = 0
    for c in b:
        if c == czero:
            pad += 1
        else:
            break
    return B58_DIGITS[0] * pad + res

In [35]:
alphabet = '123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
base_count = len(alphabet)

def b58_encode(num):
    """ Returns num in a base58-encoded string """
    encode = ''

    if (num < 0):
        return ''

    while (num >= base_count):
        mod = num % base_count
        encode = alphabet[mod] + encode
        num = num // base_count

    if (num):
        encode = alphabet[num] + encode

    return encode