In [2]:
import matplotlib.pyplot as plt
import cv2, re
import numpy as np
from queue import PriorityQueue

In [6]:
# BMP to JPEG conversion
img = cv2.imread('snail.bmp')

s = img.shape
new_shape = (((s[0] + 15) // 16) * 16, ((s[1] + 15) // 16) * 16, s[2])
new_image = np.zeros(new_shape, dtype = 'float32')
new_image[:s[0], :s[1], :] = img
img = np.array(new_image)
old_shape = s
s = new_shape
s1 = (s[0] // 2, s[1] // 2)

# todo - padding

# conversion from RGB format to YCbCr (Y -> Luminance, Cb -> blue chrominance, Cr -> red chrominance)
imgYCC = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB)
Y = imgYCC[:,:,0]
Cb = np.zeros(s1, dtype = int)
Cr = np.zeros(s1, dtype = int)

# average every 2x2 block for Cb and Cr
for i in range(s[0]//2):
    for j in range(s[1]//2):
        x = i*2
        y = j*2
        avg = (img[x, y, 1] + img[x, y + 1, 1] + img[x + 1, y, 1] + img[x + 1, y + 1, 1]) // 4
        Cb[i, j] = avg
        avg = (img[x][y][2] + img[x][y + 1][2] + img[x + 1][y][2] + img[x + 1][y + 1][2]) // 4

# Discrete Cosine Transform
Y -= 128
Cb -= 128
Cr -= 128
Y_blocks = []
Cb_blocks = []
Cr_blocks = []
for i in range(0, s[0], 8):
    for j in range(0, s[1], 8):
        grid = np.float32(Y[i:i+8, j:j+8])
        Y_blocks.append(cv2.dct(grid))
for i in range(0, s1[0], 8):
    for j in range(0, s1[1], 8):
        grid = np.float32(Cb[i:i+8, j:j+8])
        Cb_blocks.append(cv2.dct(grid))
        grid = np.float32(Cr[i:i+8, j:j+8])
        Cr_blocks.append(cv2.dct(grid))

len_y = len(Y_blocks)
len_c = len(Cb_blocks)

# Quantization
Y_q = np.array([[4., 3, 4, 4, 4, 6, 11, 15], [3, 3, 3, 4, 5, 8, 14, 19], [3, 4, 4, 5, 8, 12, 16, 20], [4, 5, 6, 7, 12, 14, 18, 20], [6, 6, 9, 11, 14, 17, 21, 23], [9, 12, 12, 18, 23, 22, 25, 21], [11, 13, 15, 17, 21, 23, 25, 21], [13, 12, 12, 13, 16, 19, 21, 21]])
C_q = np.array([[4., 4, 6, 10, 21, 21, 21, 21], [4, 5, 6, 21, 21, 21, 21, 21], [6, 6, 12, 21, 21, 21, 21, 21], [10, 14, 21, 21, 21, 21, 21, 21], [21, 21, 21, 21, 21, 21, 21, 21], [21, 21, 21, 21, 21, 21, 21, 21], [21, 21, 21, 21, 21, 21, 21, 21], [21, 21, 21, 21, 21, 21, 21, 21]])
for i in range(len_y):
    Y_blocks[i] //= Y_q
for i in range(len_c):
    Cb_blocks[i] //= C_q
for i in range(len_c):
    Cr_blocks[i] //= C_q

# Run length encoding
def spiral_traversal(block):
    p = 8
    arr = np.array([])
    k = 0
    for c in range(p):
        for i in range(0, k+1):
            if k%2!=0 :
                arr = np.append(arr, block[i][k-i])
            else :
                arr = np.append(arr, block[k-i][i])
        k+=1

    k = 1
    for c in range(p-1):
        j = p
        for i in range(k, p):
            j-=1
            if k%2 == 0:
                arr = np.append(arr, block[i][j])
            else:
                arr = np.append(arr, block[j][i])
        k+=1
    return arr

dc = 0
def encode(block):
    global dc
    temp = spiral_traversal(block)
    l = len(temp)

    encoded_block = []
    encoded_block.append((0, temp[0] - dc))
    dc = temp[0]
    c0 = 0
    for i in range(1, l):
        if temp[i] or c0==15 :
            encoded_block.append((c0, temp[i]))
            c0 = 0
        else:
            c0 += 1
    encoded_block.append((0, 0))

    encoded_block = np.array(encoded_block, dtype = int)
    return encoded_block

encoded_Y = list()
encoded_Cr = list()
encoded_Cb = list()
for i in range(len_y):
    encoded_Y.append(encode(Y_blocks[i]))
for i in range(len_c):
    encoded_Cb.append(encode(Cb_blocks[i]))
    encoded_Cr.append(encode(Cr_blocks[i]))
encoded_Y = np.array(encoded_Y, dtype = object)
encoded_Cr = np.array(encoded_Cr, dtype = object)
encoded_Cb = np.array(encoded_Cb, dtype = object)

# print(encoded_Y, encoded_Cb, encoded_Cr, sep = '\n')

# Huffman Tables
def get_cat(num): ##extracting position of msb to determine what length of bits it will need to be encoded
    num = int(abs(num))
    ans = 0
    pwr = 1
    while pwr < num:
        pwr <<= 1
        ans += 1
    return ans
##***********************************************************DANGER**********************


def make_freq_table(dc_freq, ac_freq, encoded_blocks):
    for block in encoded_blocks:
        #block of length 64 usually (we take 8*8 grids), first element is dc coeff, the rest are ac coeffs
        #dc block[0] is encoded as (0, value)
        dc = block[0]
        dc_run_length, dc_val = dc #dc_run_length will be 0
        
        cat = get_cat(dc_val)
        dc_freq[dc_run_length, cat] += 1
        
        for i in range(1, len(block)):
            
            #ac block[i] is encoded as (run length of zeros before value max 15, value of ith non zero ac coefficient)
            ac_i = block[i]
            run_length, value = ac_i
            cat = get_cat(value) # find the category of value
            ac_freq[run_length, cat] += 1
            
dc_huffman_Y = np.zeros((1, 16), dtype=int)
ac_huffman_Y = np.zeros((16,16), dtype=int)
dc_huffman_C = np.zeros((1, 16), dtype=int)
ac_huffman_C = np.zeros((16,16), dtype=int)

make_freq_table(dc_huffman_Y, ac_huffman_Y, encoded_Y)
make_freq_table(dc_huffman_C, ac_huffman_C, encoded_Cr)
make_freq_table(dc_huffman_C, ac_huffman_C, encoded_Cb)



# make huffman codes
depth = 0
def get_codebook(freq):
    codebook = np.empty(freq.shape, dtype = object)
    def get_code_lengths(freq):

        def make_tree(freq):
            global code
            q = PriorityQueue()
            m = len(freq)
            n = len(freq[0])

            cnt = 0
            for i in range(m):
                for j in range(n):
                    if freq[i,j]:
                        q.put((freq[i,j], str((i,j))))
                        cnt += 1

            for i in range(cnt-1):
                a = q.get()
                b = q.get()
                q.put((a[0]+b[0], str("[" + a[1] + "," + b[1] + "]")))

            tree = eval(q.get()[1])
            return tree

        tree = make_tree(freq)
        code_lengths = np.zeros(freq.shape, dtype=int)

        def get_depths(arr, code_lengths):
            global depth
            if type(arr[1]) == int and type(arr[0]) == int:
                code_lengths[arr[0], arr[1]] = depth
            else:
                depth += 1
                get_depths(arr[0], code_lengths)
                get_depths(arr[1], code_lengths)
                depth-=1

        get_depths(tree, code_lengths)
        return code_lengths
    
    code_lengths = get_code_lengths(freq)
    length_symbol_pairs = []
    
    m, n = freq.shape
    for i in range(m):
        for j in range(n):
            if code_lengths[i, j]:
                length_symbol_pairs.append((code_lengths[i, j], (i, j)))
    length_symbol_pairs.sort()
    
    def get_code(code, length):
        st = bin(code).replace("0b", "")[::-1]
        while len(st) < length:
            st += '0'
        return st[::-1]
    
    code = 0
    cur_len = 0
    for length, symbol in length_symbol_pairs:
        while cur_len < length:  #if there is no symbol with the current length
            cur_len += 1
            code *= 2
        codebook[symbol[0], symbol[1]] = get_code(code, length)
        code += 1
    
    return codebook, length_symbol_pairs

codes_dc_Y, sorted_symbols_dc_Y = get_codebook(dc_huffman_Y)
codes_ac_Y, sorted_symbols_ac_Y = get_codebook(ac_huffman_Y)
codes_dc_C, sorted_symbols_dc_C = get_codebook(dc_huffman_C)
codes_ac_C, sorted_symbols_ac_C = get_codebook(ac_huffman_C)

def to_two_bytes(num):
    bh = bin(num).replace("0b", "")
    bh = bh[::-1]
    while len(bh) < 16:
        bh += '0'
    bh = bh[::-1]

    return [int(bh[:8], 2), int(bh[8:], 2)]

# Writing jpeg image in binary
jpeg_image = [0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10]

header = [0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00]
jpeg_image.extend(header)

luminance_quantisation_table = [0xff, 0xdb, 0x00, 0x43, 0x00]
for ele in Y_q:
    for x in ele:
        luminance_quantisation_table.append(int(x))
jpeg_image.extend(luminance_quantisation_table)

chrominance_quantisation_table = [0xff, 0xdb, 0x00, 0x43, 0x01]
for ele in C_q:
    for x in ele:
        chrominance_quantisation_table.append(int(x))
jpeg_image.extend(chrominance_quantisation_table)

start_of_frame = [0xff, 0xc0, 0x00, 0x11, 0x08]
start_of_frame.extend(to_two_bytes(old_shape[0]))
start_of_frame.extend(to_two_bytes(old_shape[1]))
start_of_frame.extend([0x03, 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01])
jpeg_image.extend(start_of_frame)

def parse_table(sorted_symbols, typ):
    table = [0xff, 0xc4]
    ln = 19
    symbols = []
    freqs = [0 for i in range(16)]
    
    for length, symbol in sorted_symbols:
        freqs[length] += 1
        symbols.append(symbol[0] * 16 + symbol[1])
        ln += 1
    
    table.extend(to_two_bytes(ln))
    table.append(typ)
    table.extend(freqs)
    table.extend(symbols)
    return table

jpeg_image.extend(parse_table(sorted_symbols_dc_Y, 0x00))
jpeg_image.extend(parse_table(sorted_symbols_ac_Y, 0x10))
jpeg_image.extend(parse_table(sorted_symbols_dc_C, 0x01))
jpeg_image.extend(parse_table(sorted_symbols_ac_C, 0x11))

start_of_scan = [0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00]
jpeg_image.extend(start_of_scan)

def bit_rep(value):
    if value == 0:
        return ''
    else : 
        v = bin(abs(value)).replace("0b", '')
        if value>0 :
            return v
        else:
            return v.replace('0', 'a').replace('1','0').replace('a','1')

def encode_data(data_blocks, dc_codebook, ac_codebook):
    s = ''
    for block in data_blocks:
        dc_val = block[0][1]
        cat = get_cat(dc_val)
        s+=(dc_codebook[0,cat]+bit_rep(dc_val))
        
        for i in range(1, len(block)):
            run, val = block[i]
            s+=(ac_codebook[run, get_cat(val)]+bit_rep(val))
    #pad with 0s
    a = len(s)%8
    s+='0'*a
    
    data = [int(s[i:i+8], 2) for i in range(0, len(s), 8)]
    return data
    
        
jpeg_image.extend(encode_data(encoded_Y, codes_dc_Y, codes_ac_Y))
jpeg_image.extend(encode_data(encoded_Cb, codes_dc_C, codes_ac_C))
jpeg_image.extend(encode_data(encoded_Cr, codes_dc_C, codes_ac_C))

end_of_image = [0xff, 0xd9]
jpeg_image.extend(end_of_image)

try:
    with open("my_image.jpeg", 'wb') as f:
        for byte in jpeg_image:
            f.write(byte.to_bytes(1, byteorder='big'))
except Exception as e:
    print(e)

file = open('my_image.jpeg', 'rb')
data = file.read()
s = data.hex()

l = re.findall('..?', s)
d = np.reshape(l, (-1, 4))
d = [' '.join(ele) for ele in d]
for ele in d:
    print(ele)
file.close()

ff d8 ff e0
00 10 4a 46
49 46 00 01
01 01 00 48
00 48 00 00
ff db 00 43
00 04 03 04
04 04 06 0b
0f 03 03 03
04 05 08 0e
13 03 04 04
05 08 0c 10
14 04 05 06
07 0c 0e 12
14 06 06 09
0b 0e 11 15
17 09 0c 0c
12 17 16 19
15 0b 0d 0f
11 15 17 19
15 0d 0c 0c
0d 10 13 15
15 ff db 00
43 01 04 04
06 0a 15 15
15 15 04 05
06 15 15 15
15 15 06 06
0c 15 15 15
15 15 0a 0e
15 15 15 15
15 15 15 15
15 15 15 15
15 15 15 15
15 15 15 15
15 15 15 15
15 15 15 15
15 15 15 15
15 15 15 15
15 15 ff c0
00 11 08 01
00 01 00 03
01 22 00 02
11 01 03 11
01 ff c4 00
1d 00 00 01
00 02 03 01
01 02 00 00
00 00 00 00
00 00 00 05
06 03 04 07
08 02 01 09
ff c4 00 40
10 00 00 01
04 02 01 03
03 03 03 01
06 02 08 08
00 00 01 02
03 f0 04 10
05 06 11 20
07 12 30 13
21 40 08 14
22 50 15 23
31 32 60 70
16 41 17 24
25 26 33 42
71 80 18 34
35 51 52 61
a0 c0 ff c4
00 16 01 00
01 02 00 00
00 00 00 00
00 00 00 00
00 00 00 09
00 08 ff c4
00 35 11 00
00 02 01 03
03 04 00 05
04 00 04 08
00 00 00 00
f0 01 02 03
10 04 05 20
06 11 12 30
07 1

6c c3 88 ce
e8 58 99 e1
79 20 38 a2
c0 4b 0f f9
1b 26 78 8e
be e8 6e 5a
44 40 43 3e
23 39 36 f2
e5 c6 53 90
32 28 31 06
1b 80 2a 66
24 cd 12 5c
4f 71 fe f4
a6 15 7b 1e
db c8 d4 49
2a 21 e5 97
a3 f9 de 1b
0b 9a e2 b9
df ec 98 25
e3 19 92 de
36 62 7e 82
66 fa bd b8
ca 88 38 62
8f 17 b6 03
13 03 e5 85
8f 4c 20 04
ef d9 dc 94
9b 7e 8c 64
39 ff 48 3d
41 87 82 e4
fb 72 7a 4b
ae 79 16 f8
29 79 28 48
71 9f 50 dc
ae 08 e5 b7
f6 95 7f e2
14 e4 5a 7a
52 7c ab cf
f7 5a 49 fe
1f 81 c6 60
f1 cc 0c 8f
0a 52 74 3b
95 af b9 5a
1a f2 7f d3
e0 7e 00 f0
00 00 00 6e
5b 45 b4 5b
45 b4 5b 45
b4 5b 45 b4
5b 45 b4 5b
45 b4 5b 4d
2e 57 95 e3
b8 2c 0c de
6b 21 3b 31
b1 fc 6e 1c
d9 79 53 2b
fc 31 43 0b
3b 9e a2 00
f8 4a 53 f0
07 c0 f0 3c
00 79 0b d2
d8 73 3d 5a
ea 4c bf d4
87 50 b1 31
dc cf 86 47
1b d2 98 4e
0d 37 c6 f0
f0 12 c0 e7
dc 3c 3e 5f
b8 a8 fd a3
44 f6 e9 2a
09 07 1f ea
be b2 eb 5f
d5 37 59 a7
a0 3a 17 26
6e 27 a7 fc
44 87 c5 9f
c9 8f 7b 1b
99 b2 48 32
27 60 f6 c1
4a 92 92 98
21 56 b7 e7
7e cf bb e9
17 e

de d9 4f d3
cc f9 e0 25
cc 3c a6 25
40 28 bd 4c
61 d2 b5 2f
b0 e8 9a 98
7d 1d cb 63
43 99 ca f4
bf ab fc 6a
70 32 99 ed
b2 7c ac 38
65 9a 14 bf
f8 fc c8 c5
91 0b 1f ed
9f f0 81 1d
8c 48 00 7b
00 4e 8d 68
5f d3 1d 2a
f7 f5 73 fa
c5 5d 6d d5
98 18 cf c5
87 9e c9 4a
53 c5 f0 42
70 5b 95 f0
c5 01 2d be
7e d5 00 22
4b f2 67 97
f8 db 63 18
97 95 24 91
e9 fe 74 3e
de 57 56 48
fa f8 bc 44
10 3d b3 95
90 29 04 fb
1b 63 1e e4
bf c0 24 2b
0e 18 00 90
47 f1 8c a9
19 69 4f 86
6c 90 3d 07
8b 97 ea 67
20 8f 8f e4
9b 9a 06 75
86 56 4a 73
23 aa 12 66
c6 8f 9a dc
53 b9 13 cb
15 11 4c 51
71 30 e2 62
fb d8 47 b1
f9 0f 0a 04
16 1e f7 6d
b6 44 c6 44
c1 a6 31 21
29 1f d0 00
00 1f fb 00
07 fe c7 ea
da 2d a2 da
2d a2 da 2d
a2 da 2d a2
da 2d a2 da
2d a2 da 2d
a2 da 2d a2
da 40 cd 00
4c ce 3c 91
e1 c7 96 25
20 04 a7 7f
d0 0f cd 00
0f 07 99 ff
50 fd 54 e4
bd 53 c3 f4
b2 4c 43 8d
e1 f8 96 66
67 15 29 45
bf a8 c8 3c
b1 8d b8 c6
30 90 4c 78
9c 63 60 3e
22 4c c0 33
6a 0c 49 38
ba b3 39 5e
a5 cf 87 a3
f8 7c 47 e4
39 2

In [5]:
file = open('snail.jpeg', 'rb')
data = file.read()
s = data.hex()

import re
l = re.findall('..?', s)
print(len(l))
d = np.reshape(l, (-1, 11))
d = [' '.join(ele) for ele in d]
for ele in d:
    print(ele)
file.close()

bit_rep(-63)

13211
ff d8 ff e0 00 10 4a 46 49 46 00
01 01 01 00 60 00 60 00 00 ff db
00 43 00 03 02 02 03 02 02 03 03
03 03 04 03 03 04 05 08 05 05 04
04 05 0a 07 07 06 08 0c 0a 0c 0c
0b 0a 0b 0b 0d 0e 12 10 0d 0e 11
0e 0b 0b 10 16 10 11 13 14 15 15
15 0c 0f 17 18 16 14 18 12 14 15
14 ff db 00 43 01 03 04 04 05 04
05 09 05 05 09 14 0d 0b 0d 14 14
14 14 14 14 14 14 14 14 14 14 14
14 14 14 14 14 14 14 14 14 14 14
14 14 14 14 14 14 14 14 14 14 14
14 14 14 14 14 14 14 14 14 14 14
14 14 14 14 ff c0 00 11 08 01 00
01 00 03 01 22 00 02 11 01 03 11
01 ff c4 00 1f 00 00 01 05 01 01
01 01 01 01 00 00 00 00 00 00 00
00 01 02 03 04 05 06 07 08 09 0a
0b ff c4 00 b5 10 00 02 01 03 03
02 04 03 05 05 04 04 00 00 01 7d
01 02 03 00 04 11 05 12 21 31 41
06 13 51 61 07 22 71 14 32 81 91
a1 08 23 42 b1 c1 15 52 d1 f0 24
33 62 72 82 09 0a 16 17 18 19 1a
25 26 27 28 29 2a 34 35 36 37 38
39 3a 43 44 45 46 47 48 49 4a 53
54 55 56 57 58 59 5a 63 64 65 66
67 68 69 6a 73 74 75 76 77 78 79
7a 83 84 85 86 87 88 89 8a 92 93
94 9

c0 a3 c3 fa ea a7 9e 9a 92 46 a4
5b 5c c2 5b 01 e5 8f cb 23 e6 64
69 57 7c 0c 16 40 e1 bd 5c 5e 5b
57 27 95 3c 45 58 fb 4a 50 83 a3
56 df 6e 83 5a 4a df cd 4e f6 9f
57 14 9a b2 6d af 9c a3 55 54 4e
31 5a a7 75 f3 d5 c7 e7 d3 d7 ca
c4 1e 3a b8 4f d9 cf f6 94 d0 be
2f 69 17 36 f7 9f 0c 7e 21 a1 37
f3 88 1a 45 86 2b af 2a e2 77 24
87 23 2f 8b c8 d7 00 90 b3 46 17
11 9d de 9b e2 7d 60 fc 17 f8 f1
75 e2 c8 a7 b5 4f 05 f8 e2 3b 78
2e ee a4 31 0b 58 35 88 d3 30 cb
33 83 85 8e 54 59 65 2e 72 ac 5e
62 48 d9 b9 7e 7f f0 4e ad 0f 81
23 d5 3f 67 7f 8d e9 f6 5f 08 dd
b0 7f 0c f8 8d 70 20 85 d9 d9 92
58 27 38 09 1b 49 2f 9a b2 33 01
13 07 8a 60 15 e4 d9 d8 fc 3d f1
a2 7c 07 86 ef e0 5f c7 5b 4b 6d
5f c0 77 82 4b 5d 2b c4 93 c6 e6
c6 44 ca bf d9 ae 30 04 b1 3c 6d
b4 a9 dc b2 db b6 c1 c2 14 75 f2
2b 61 64 a1 ec 66 bd b5 e9 f2 3e
5b 7e fa 82 6a 50 a9 0e 92 ab 49
db 9a 3f 13 b3 4a 56 6d 9d 34 ab
c7 9a f1 f4 d7 ee e5 7a 5d 7f 75
f9 2d 2e ac f5 ed ed 74 af 80 36
3e 22 f0 7f 89 34 bb df f8 52 1e
39 93 cd 8

'000000'