# Comparing Pickle File Size Against Manually Writing File as Bytes


In [1]:
import os
from signal_processing_utilities import process_signal
import pickle
import numpy as np

In [2]:
file_path = os.getcwd() + "/data/test_file.txt"

In [3]:
file_path

'/Volumes/T7/Github/Neuralink-Compression-Challenge/analysis/tutorials/data/test_file.txt'

In [4]:
test_data = [65536, 65537, 65538]

In [5]:
with open(file_path, "wb+") as fp:
    return_value = fp.write(pickle.dumps(test_data))
    fp.close()

In [6]:
process_signal.print_file_size(file_path)

File Name: "test_file.txt".
Size: 31 Bytes.


31

In [7]:
return_value

31

In [8]:
return_value.to_bytes(1, "big")

b'\x1f'

In [9]:
with open(file_path, "wb+") as fp:
    return_value_l = [fp.write(data.to_bytes(4, "big")) for data in test_data]

In [10]:
return_value_l

[4, 4, 4]

In [11]:
sum(return_value_l)

12

In [12]:
process_signal.print_file_size(file_path)

File Name: "test_file.txt".
Size: 12 Bytes.


12

In [13]:
byte_string = process_signal.read_file_bytes(file_path)
byte_string

b'\x00\x01\x00\x00\x00\x01\x00\x01\x00\x01\x00\x02'

In [14]:
# Reconstruct the array from the byte_string

reconstructed_array = []
byte_size = 4
for index in range(0, len(byte_string), byte_size):
    reconstructed_array.append(
        int.from_bytes(byte_string[index : index + byte_size], byteorder="big")
    )

In [15]:
reconstructed_array

[65536, 65537, 65538]

In [16]:
(12 / 31) * 97000

37548.38709677419

#### Node Mapping Dictionary


In [17]:
node_mapping_dict_list = [
    ("e0", "000000000"),
    ("b8", "000000001"),
    ("7d", "000000010"),
    ("ed", "000000011"),
    ("a7", "000000100"),
    ("2b", "000000101"),
    ("21", "000000110"),
    ("90", "000000111"),
    ("bc", "000001000"),
    ("e2", "000001001"),
    ("a6", "000001010"),
    ("3e", "000001011"),
    ("ec", "000001100"),
    ("f8", "000001101"),
    ("03", "00000111"),
    ("a5", "000010000"),
    ("a4", "000010001"),
    ("e6", "000010010"),
    ("b9", "000010011"),
    ("c3", "000010100"),
    ("da", "000010101"),
    ("c6", "000010110"),
    ("e7", "000010111"),
    ("4d", "000011000"),
    ("85", "000011001"),
    ("cd", "000011010"),
    ("bd", "000011011"),
    ("b1", "000011100"),
    ("94", "000011101"),
    ("e3", "000011110"),
    ("1d", "000011111"),
    ("0f", "00010000"),
    ("54", "000100010"),
    ("d5", "000100011"),
    ("8c", "000100100"),
    ("1a", "000100101"),
    ("10", "00010011"),
    ("d6", "000101000"),
    ("ad", "000101001"),
    ("d1", "000101010"),
    ("f9", "000101011"),
    ("71", "000101100"),
    ("c0", "000101101"),
    ("66", "000101110"),
    ("33", "000101111"),
    ("de", "000110000"),
    ("d9", "000110001"),
    ("84", "000110010"),
    ("f0", "000110011"),
    ("45", "000110100"),
    ("ef", "000110101"),
    ("69", "000110110"),
    ("9c", "000110111"),
    ("8e", "000111000"),
    ("a3", "000111001"),
    ("70", "000111010"),
    ("ee", "000111011"),
    ("4a", "000111100"),
    ("59", "000111101"),
]

In [18]:
bytes(node_mapping_dict_list[0][0], encoding="utf-8")

b'e0'

In [19]:
bit_string_test = bytes(node_mapping_dict_list[0][1], encoding="utf-8")

In [20]:
node_mapping_dict_list[0][0]

'e0'

In [21]:
bit_string_test

b'000000000'

In [22]:
bytes(bit_string_test)

b'000000000'

In [23]:
len(bit_string_test)

9

In [24]:
final_byte_string = b""

In [25]:
final_byte_string += bit_string_test

In [26]:
final_byte_string

b'000000000'

In [27]:
bit_string_test

b'000000000'

In [28]:
int(bit_string_test[0:8], base=2)

0

In [29]:
int(bit_string_test[0:8], base=2)

0

## Examining RLE Byte String


In [30]:
byte_string = b"0000000000000000010000000100000000110000001000000001010000001100000001110000010000000010010000010100000010110000011000000011010000011100000011110000100000000100010000100100000100110000101000000101010000101100000101110000110000000110010000110100000110110000111000001111000001111100010010000000010000010010000100010000110010001000010001010010001100010001110010010000010010010010010100010010110010011000010011010010011100010011110010100000010100010010100100010100110010101000010101010010101100010101110010110000010110010010110100101110000101110100101111000101111100110000000110000100110001000110001100110010000110010100110011000110011100110100000110100100110101000110101100110110000110110100110111000110111100111000000111000100111001000111001100111010000111010100111011000111011100111100000111100100111101000111101100111110000111110100111111000111111101000000001000000101000001001000001101000010001000010101000011001000011101000100010001010010001011010001100010001101010001110010001111010010000010010001010010010010010011010010100010010101010010110010010111010011000010011001010011010010011011010011100010011101010011110010011111010100000010100001010100010010100011010100100010100101010100110010100111010101000010101001010101010010101011010101100010101101010101110010101111010110000010110001010110010010110011010110100010110101010110110010110111010111000010111001010111010010111011010111100010111101010111110010111111011000000011000001011000010011000011011000100011000101011000110011000111011001000011001001011001010011001011011001100011001101011001110011001111011010000011010001011010010110101000110101010110101100110101110110110000110110010110110100110110110110111000110111010110111100110111110111000000111000010111000100111000110111001000111001010111001100111001110111010111100000111100010111100101111010111110000111110010111110101111110001111110101111111001111111110110000000110000001110000010110000011110000100110000101110000110110000111110001000110001001110001010110001011110001101100011111001110111101111000011110001111100100111100101111100111111011111100011111001011111001101111100111111110100011111010011111101010111110101111111011001111101101111110111011111011111111110011111101001111110101111111011111111100111111101011111110111111111100111111110111111111101111111111"

In [31]:
byte_string

b'00000000000000000100000001000000001100000010000000010100000011000000011100000100000000100100000101000000101100000110000000110100000111000000111100001000000001000100001001000001001100001010000001010100001011000001011100001100000001100100001101000001101100001110000011110000011111000100100000000100000100100001000100001100100010000100010100100011000100011100100100000100100100100101000100101100100110000100110100100111000100111100101000000101000100101001000101001100101010000101010100101011000101011100101100000101100100101101001011100001011101001011110001011111001100000001100001001100010001100011001100100001100101001100110001100111001101000001101001001101010001101011001101100001101101001101110001101111001110000001110001001110010001110011001110100001110101001110110001110111001111000001111001001111010001111011001111100001111101001111110001111111010000000010000001010000010010000011010000100010000101010000110010000111010001000100010100100010110100011000100011010100011100100011110100100000100100

In [32]:
len(byte_string)

2275

In [33]:
file_path

'/Volumes/T7/Github/Neuralink-Compression-Challenge/analysis/tutorials/data/test_file.txt'

In [34]:
# byte_string = bit_string_test

with open(file_path, "wb+") as fp:
    fp.write(byte_string)
    fp.close()
process_signal.print_file_size(file_path)

File Name: "test_file.txt".
Size: 2275 Bytes.


2275

In [35]:
byte_string = "0x"

In [36]:
byte_string = bytes(byte_string, encoding="utf-8")

In [37]:
byte_string

b'0x'

In [38]:
len(byte_string)

2

In [39]:
## compressing the byte string
byte_string = b"0000000000000000010000000100000000110000001000000001010000001100000001110000010000000010010000010100000010110000011000000011010000011100000011110000100000000100010000100100000100110000101000000101010000101100000101110000110000000110010000110100000110110000111000001111000001111100010010000000010000010010000100010000110010001000010001010010001100010001110010010000010010010010010100010010110010011000010011010010011100010011110010100000010100010010100100010100110010101000010101010010101100010101110010110000010110010010110100101110000101110100101111000101111100110000000110000100110001000110001100110010000110010100110011000110011100110100000110100100110101000110101100110110000110110100110111000110111100111000000111000100111001000111001100111010000111010100111011000111011100111100000111100100111101000111101100111110000111110100111111000111111101000000001000000101000001001000001101000010001000010101000011001000011101000100010001010010001011010001100010001101010001110010001111010010000010010001010010010010010011010010100010010101010010110010010111010011000010011001010011010010011011010011100010011101010011110010011111010100000010100001010100010010100011010100100010100101010100110010100111010101000010101001010101010010101011010101100010101101010101110010101111010110000010110001010110010010110011010110100010110101010110110010110111010111000010111001010111010010111011010111100010111101010111110010111111011000000011000001011000010011000011011000100011000101011000110011000111011001000011001001011001010011001011011001100011001101011001110011001111011010000011010001011010010110101000110101010110101100110101110110110000110110010110110100110110110110111000110111010110111100110111110111000000111000010111000100111000110111001000111001010111001100111001110111010111100000111100010111100101111010111110000111110010111110101111110001111110101111111001111111110110000000110000001110000010110000011110000100110000101110000110110000111110001000110001001110001010110001011110001101100011111001110111101111000011110001111100100111100101111100111111011111100011111001011111001101111100111111110100011111010011111101010111110101111111011001111101101111110111011111011111111110011111101001111110101111111011111111100111111101011111110111111111100111111110111111111101111111111"

In [40]:
byte_string_str = str(byte_string).lstrip("b'").rstrip("'")

In [41]:
byte_string_str[0]

'0'

In [42]:
# RLE
def rle_bit_compression(byte_string: bytes, compress=True):
    byte_string_str = str(byte_string).lstrip("b'").rstrip("'")
    if compress == True:
        # Convert byte string to string of bits

        initial_index = 0
        second_index = 1
        frequency = 0
        rle_compression = []

        while second_index < len(byte_string_str):
            if byte_string_str[initial_index] == byte_string_str[second_index]:
                rle_compression.append(int(byte_string_str[initial_index]))
                frequency += 1
                while byte_string_str[initial_index] == byte_string_str[second_index]:
                    second_index += 1
                    frequency += 1
                    if second_index >= len(byte_string_str):
                        break
                rle_compression.append(frequency)
            else:
                rle_compression.append(byte_string_str[initial_index])
                rle_compression.append(-1)
            frequency = 0
            initial_index = second_index
            second_index += 1
        rle_compression = "".join(rle_compression)
        rle_compressed_bytes = bytes(rle_compression, encoding="utf-8")

        return rle_compressed_bytes

In [43]:
bit_string_test

b'000000000'

In [44]:
bit_string_test_compressed = rle_bit_compression(bit_string_test)

TypeError: sequence item 0: expected str instance, int found

In [97]:
len(bit_string_test_compressed)

2

In [98]:
with open(file_path, "wb+") as fp:
    fp.write(bit_string_test_compressed)
    fp.close()
process_signal.print_file_size(file_path)

File Name: "test_file.txt".
Size: 2 Bytes.


2

In [99]:
bit_string_test

b'000000000'

In [103]:
len(byte_string)

2275

In [111]:
byte_string_compressed = rle_bit_compression(byte_string)

In [112]:
byte_string_compressed

b'0171x071x0812061x081x0x1x06120713051x081x021x051x0x1x061x0x12051207120x1x05130614041x081x031x041x021x051x0212041x0x1x061x0x1x0x1x041x0x12051x0x1304120712021x04120x1x05120x12041305140515031x021x081x051x021x041x031x0412021x031x041x031x0x1x021x0312031x0313021x021x051x021x021x021x021x0x1x031x021x0x12021x0212041x02120x1x021x0213031x0214021x0x1x061x0x1x031x021x0x1x021x031x0x1x0212021x0x1x0x1x041x0x1x0x1x0x1x021x0x1x0x12031x0x1x0x13021x0x12051x0x12021x021x0x120x1x021x0x13041x0x130x1x021x0x14031x0x1502120712041x0212031x031203120212021x0412021x0x1x021202120312021302120x1x05120x1x021x02120x1x0x1x03120x1x0x1202120x1204120x120x1x02120x1303120x1402130613031x0213021x0313021202130x1x04130x1x0x1x02130x1203130x1302140514021x02140x1x03140x12021504150x1x021603170x1x081x061x0x1x051x021x05120x1x041x031x041x0x1x0x1x0412021x04130x1x031x031x031x0x1x021x031x0x120x1x0312031x03120x1x0x1x0313021x03140x1x021x051x021x031x0x1x021x021x021x021x02120x1x021x0x1x031x021x0x1x0x1x0x1x021x0x12021x021x0x130x1x0212041x02120

In [113]:
with open(file_path, "wb+") as fp:
    fp.write(byte_string_compressed)
    fp.close()
process_signal.print_file_size(file_path)

File Name: "test_file.txt".
Size: 2181 Bytes.


2181

In [110]:
rle_bit_compression(byte_string)

b'0171x071x0812061x081x0x1x06120713051x081x021x051x0x1x061x0x12051207120x1x05130614041x081x031x041x021x051x0212041x0x1x061x0x1x0x1x041x0x12051x0x1304120712021x04120x1x05120x12041305140515031x021x081x051x021x041x031x0412021x031x041x031x0x1x021x0312031x0313021x021x051x021x021x021x021x0x1x031x021x0x12021x0212041x02120x1x021x0213031x0214021x0x1x061x0x1x031x021x0x1x021x031x0x1x0212021x0x1x0x1x041x0x1x0x1x0x1x021x0x1x0x12031x0x1x0x13021x0x12051x0x12021x021x0x120x1x021x0x13041x0x130x1x021x0x14031x0x1502120712041x0212031x031203120212021x0412021x0x1x021202120312021302120x1x05120x1x021x02120x1x0x1x03120x1x0x1202120x1204120x120x1x02120x1303120x1402130613031x0213021x0313021202130x1x04130x1x0x1x02130x1203130x1302140514021x02140x1x03140x12021504150x1x021603170x1x081x061x0x1x051x021x05120x1x041x031x041x0x1x0x1x0412021x04130x1x031x031x031x0x1x021x031x0x120x1x0312031x03120x1x0x1x0313021x03140x1x021x051x021x031x0x1x021x021x021x021x02120x1x021x0x1x031x021x0x1x0x1x0x1x021x0x12021x021x0x130x1x0212041x02120

In [114]:
with open(file_path, "rb+") as fp:
    byte_string_compressed_bytes = fp.read()
    fp.close()

In [118]:
byte_string_compressed_string = (
    str(byte_string_compressed_bytes).lstrip("b'").rstrip("'")
)

In [151]:
byte_string_compressed_string[1]

'1'

In [122]:
expanded_byte_string = []
for index in range(0, len(byte_string_compressed_string), 2):
    byte_sub_string = [
        byte_string_compressed_string[index]
        for value in range(byte_string_compressed_string[index + 1])
    ]
    expanded_byte_string.extend(byte_sub_string)

TypeError: 'str' object cannot be interpreted as an integer

In [145]:
bit_string_test = b"000000000000"

In [148]:
bit_string_test

b'000000000000'