### Bencode Decoder 

| Type        | Format                                           |
| ----------- | ------------------------------------------------ |
| Integer     | `i<digits>e`                                     |
| Byte string | `<len>:<raw bytes>`                              |
| List        | `l<values>e`                                     |
| Dict        | `d<key><value>e` (keys are byte strings, sorted) |


In [2]:
'''
Psuedocode flow 

decode():
    peek next byte
    if 'i' → parse_int()
    if 'l' → parse_list()
    if 'd' → parse_dict()
    if digit → parse_string()
'''

"\nPsuedocode flow \n\ndecode():\n    peek next byte\n    if 'i' → parse_int()\n    if 'l' → parse_list()\n    if 'd' → parse_dict()\n    if digit → parse_string()\n"

bdecode()
→ {b'cow': b'moo', b'spam': b'eggs'}

In [4]:
import sys

In [None]:
s = b"d3:cow3:moo4:spam4:eggse"

def parse_dict(s,p):
    # since we're currently on d, let's find the length of the next 
    p += 1
    d = {}
    while s[p] != 101: # while we don't hit e 
        key, p = decode(s,p)
        value, p = decode(s,p)

        d[key] = value

    p += 1 # move past the e
    return d, p

# similar to parse_dict just no key, val pairs
# returns list, new_pointer
def parse_list(s,p):
    p += 1
    l = []
    while s[p] != 101: #while we don't hit e
        item, p = decode(s,p)
        l.append(item)

    p += 1 
    return l,p


# Reads digits till e 
# returns (int_value, new_pointer)
def parse_int(s,p):
    p += 1
    integer = ""
    while s[p] != 101:
        integer += chr(s[p])
        p+=1

    p += 1 # go past e 
    return int(integer), p


# read the digits till ':'
# slice the next length 

# returns byte_values, new_pointer
def parse_string(s,p):

    num = 0
    while 48 <= s[p] <= 57:   # '0'..'9'
        num = num * 10 + (s[p] - 48)
        p += 1

    # s[p] must be ':'
    if s[p] != 58:
        raise ValueError("Invalid bencode string")


    # now, we'll slice this length
    p += 1 # move past the colon
    string = s[p:p+num]
    p += num
    return string,p


def decode(s,p=0):# p is pointer
    # print("DECODE @", p, "BYTE =", chr(s[p]))

    if s[p] == 100: #d 
        parsed, p = parse_dict(s,p)
    elif s[p] == 105: #i
        parsed, p = parse_int(s,p)
    elif s[p] == 108: #l
        parsed, p = parse_list(s,p)
    elif s[p] >= 48 and s[p] <= 57:
        parsed, p = parse_string(s,p)
    else:
        print("Error: Can't determine type of data: ", s[p], p)
        
        sys.exit()

    return parsed, p


In [None]:
s = b"d4:infod4:name11:sample-file6:lengthi98765e6:piecesl20:AAAAAAAAAAAAAAAAAAAA20:BBBBBBBBBBBBBBBBBBBBee5:filesld4:pathl6:folder9:file1.txte6:lengthi123eed4:pathl6:folder9:file2.txte6:lengthi456eee"


with open("ubuntu.iso.torrent", "rb") as f:
    data = f.read()

root, _ = decode(data, 0)

### Tracker Discovery 

In [65]:
root[b'info']

{b'length': 5702520832,
 b'name': b'ubuntu-25.10-desktop-amd64.iso',
 b'piece length': 262144,
 b'pieces': b'U[\xb5\x8f\xab\x90\x93\xef\xce\xd0\xb4\x8e\x00X\xd0>\xe9\x1d\x17p9{\xe5\xebbA\xf1\xd7\xa3Q\x96\xaf&G7\x88\x17B\xb0\x90\xc4\xd1\x9a\xaf2\x91\ny,\xbd>LK\x16^S\x14#\xf7{\xc3\xce\x98\xdc\x99"[\x07\xf55q\rk\x8cPX\xc3\x87\xaeN}\xe0M\xbfP\xb16\x1f\xa3\xa1\x1d\x93\xe3\xe10\xec78\x16\x95\xedB,\xd0J\xc9!\xb7s\n"\xe8O\xe7O\xb3L\xf5\x8d\x0f\x1f:\xf4\x0b\x8c3\xe8I\xa8l\xad\x87\xe1lNF\x88n\xcfm\xa5\x17n\x8c\x93~\xa3\xb9\x82\x93j\x07\x99v7,*4\xb7\xb3\xdd\x1f\x87\xe3\xa4\x9c\x84\xa9\x84\xf3;\x902\xa2\xf7\x17/\x0e9S\x8f\xb6\x90j&\xa4vdrf\xad&d\x0b\xe0}8\x8b\xe4\xed\x85~1\xfeGL\xb9\xba\xd9\x9dHs%M\xa32ML\xc1\xa2\xcb"\xa8\x8bO\xfc\x87\xe9\x07\x94$\x04*\x9a%\xddm\xd5\xe9\xec\x8fw\xb3\xcd$\xad\xc1\x1c\x04\xdb<\xd49\xee\xa2\x8d9\xc2\xcf\x81\xaf\xbe\x93\xe2\xdf\xfc\xe4\xfb\xda\xc6\xc7\x14ec\xf0\x90\xe20\xd0x\xa1\tSy\t\xa7m\xdb-\x12#|\x8c\xd4\x1eI\xd1\xdc\x051w\xb78\xcd{\x00\x1c\xf7\x90|\x1f\xdb:\x9f\x

{
  b"name": b"ubuntu.iso",
  b"length": 12345
}

b"d4:name10:ubuntu.iso6:lengthi12345ee"

In [79]:
d = {
  b"name": b"ubuntu.iso",
  b"length": 12345
}

def encode_int(i):
    e_i = b""
    e_i += b"i" + str(i).encode() + b"e"

    return e_i

def encode_string(s):
    e_s = b""
    e_s = e_s + str(len(s)).encode() + b":" + s
    
    return e_s

def encode_dict(d): # you need to use recursion, send them back to decode 
    s = b"d"
    for key in sorted(d.keys()):
        s += encode(key)
        s += encode(d[key])

    s+=b"e"

    return s

def encode_list(l):
    s = b"l"
    for item in l:
        s += encode(item)

    s+=b"e"

    return s

def encode(x):
    
    if isinstance(x, int):
        return encode_int(x)
    elif isinstance(x, bytes):
        return encode_string(x)
    elif isinstance(x, list):
        return encode_list(x)
    elif isinstance(x, dict):
        return encode_dict(x)
    else:
        raise TypeError(f"Unsupported type for bencode: {type(x)}")


print(encode(d))

b'd6:lengthi12345e4:name10:ubuntu.isoe'


In [None]:
# even though both are not the same, the encoded one is lexographically sorted, hence is results in a little different encoded byte string
encode(decode(b"d4:name10:ubuntu.iso6:lengthi12345ee")[0])

b'd6:lengthi12345e4:name10:ubuntu.isoe'

In [89]:
root[b"info"]

{b'length': 5702520832,
 b'name': b'ubuntu-25.10-desktop-amd64.iso',
 b'piece length': 262144,
 b'pieces': b'U[\xb5\x8f\xab\x90\x93\xef\xce\xd0\xb4\x8e\x00X\xd0>\xe9\x1d\x17p9{\xe5\xebbA\xf1\xd7\xa3Q\x96\xaf&G7\x88\x17B\xb0\x90\xc4\xd1\x9a\xaf2\x91\ny,\xbd>LK\x16^S\x14#\xf7{\xc3\xce\x98\xdc\x99"[\x07\xf55q\rk\x8cPX\xc3\x87\xaeN}\xe0M\xbfP\xb16\x1f\xa3\xa1\x1d\x93\xe3\xe10\xec78\x16\x95\xedB,\xd0J\xc9!\xb7s\n"\xe8O\xe7O\xb3L\xf5\x8d\x0f\x1f:\xf4\x0b\x8c3\xe8I\xa8l\xad\x87\xe1lNF\x88n\xcfm\xa5\x17n\x8c\x93~\xa3\xb9\x82\x93j\x07\x99v7,*4\xb7\xb3\xdd\x1f\x87\xe3\xa4\x9c\x84\xa9\x84\xf3;\x902\xa2\xf7\x17/\x0e9S\x8f\xb6\x90j&\xa4vdrf\xad&d\x0b\xe0}8\x8b\xe4\xed\x85~1\xfeGL\xb9\xba\xd9\x9dHs%M\xa32ML\xc1\xa2\xcb"\xa8\x8bO\xfc\x87\xe9\x07\x94$\x04*\x9a%\xddm\xd5\xe9\xec\x8fw\xb3\xcd$\xad\xc1\x1c\x04\xdb<\xd49\xee\xa2\x8d9\xc2\xcf\x81\xaf\xbe\x93\xe2\xdf\xfc\xe4\xfb\xda\xc6\xc7\x14ec\xf0\x90\xe20\xd0x\xa1\tSy\t\xa7m\xdb-\x12#|\x8c\xd4\x1eI\xd1\xdc\x051w\xb78\xcd{\x00\x1c\xf7\x90|\x1f\xdb:\x9f\x