In [None]:
import os
import time
import math
import re
import struct
import traceback
import codecs
import bitstring

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import cv2

from PIL import Image
import IPython.display as display

```
.
├── ETL4
│   ├── ETL4C
│   └── ETL4INFO
├── README.md
├── classes.tsv
├── co59-utf8.txt
└── note.ipynb
```

In [None]:
t56s = '0123456789[#@:>? ABCDEFGHI&.](<  JKLMNOPQR-$*);\'|/STUVWXYZ ,%="!'

def T56(c):
    return t56s[c]

with codecs.open('co59-utf8.txt', 'r', 'utf-8') as co59f:
    co59t = co59f.read()
    
co59l = co59t.split()
CO59 = {}
for c in co59l:
    ch = c.split(':')
    co = ch[1].split(',')
    CO59[(int(co[0]), int(co[1]))] = ch[0]

In [None]:
file_list = [
    'ETL4C',
]

In [None]:
filename = file_list[0]
file_stream = bitstring.ConstBitStream(filename=filename)

In [None]:
RECORD_LENGTH = 2952
skip = 0
file_stream.pos = skip * RECORD_LENGTH

In [None]:
record = file_stream.readlist(','.join([
    '2*uint:36',
    'uint:8',
    'pad:28',
    'uint:8',
    'pad:28',
    '4*uint:6', # 4 Character Code - [4:8]
    'pad:12',
    '15*uint:36',
    'pad:1008',
    'bytes:21888', # 16 Gray Level (4bit/pixel) Image Data 72(X-axis size) * 76(Y-axis size) = 5472 pixels - [23]
]))

In [None]:
print(type(record), len(record))
for idx, v in enumerate(record[:-1]):
    print(f'{idx} - {v}')

In [None]:
record_dict = {
    'character': ''.join(map(T56, record[4:8])), # 4 Character Code - [4:8]
    'image_data': record[23], # 16 Gray Level (4bit/pixel) Image Data 72(X-axis size) * 76(Y-axis size) = 5472 pixels - [23]
}

In [None]:
record_dict['character']

In [None]:
width = 72
height = 76
np_img = np.array(Image.frombytes('F', (width, height), record_dict['image_data'], 'bit', 4))

plt.imshow(np_img)
plt.colorbar()

In [None]:
def show_image(filename, pos=0):
    f = bitstring.ConstBitStream(filename=filename)
    f.bytepos = pos * 2952
    r = f.readlist('2*uint:36,uint:8,pad:28,uint:8,pad:28,4*uint:6,pad:12,15*uint:36,pad:1008,bytes:2736')
    print(''.join([t56s[c] for c in r[4:8]]))
    np_img = np.array(Image.frombytes('F', (r[18], r[19]), r[-1], 'bit', 4))
    plt.imshow(np_img)
    plt.colorbar()

In [None]:
idx = 500

In [None]:
show_image(filename, pos=idx)
idx += 1

In [None]:
total_samples = 0
record_count = {}

for filename in file_list:
    file_stream = bitstring.ConstBitStream(filename=filename)
    while True:
        try:
            r = file_stream.readlist('2*uint:36,uint:8,pad:28,uint:8,pad:28,4*uint:6,pad:12,15*uint:36,pad:1008,bytes:2736')
        except:
            break
            
        total_samples += 1
        
        record_dict = {
            'character': ''.join([t56s[c] for c in r[4:8]]), # 4 Character Code - [4:8]
            'image_data': r[23], # 16 Gray Level (4bit/pixel) Image Data 72(X-axis size) * 76(Y-axis size) = 5472 pixels - [23]
        }

        name = record_dict['character']

        if name in record_count.keys():
            record_count[name] += 1
        else:
            record_count[name] = 1

In [None]:
total_samples

In [None]:
len(record_count.keys())

In [None]:
record_count

In [None]:
csv_filename = 'classes.tsv'
with open(csv_filename, mode='w', encoding='utf-8') as f:
    f.write('class\tnum_samples\n')
    for k in record_count.keys():
        log_str = f'{k}\t{record_count[k]}\n'
        f.write(log_str)
pd_df = pd.read_csv(csv_filename, encoding='utf-8', sep='\t')
pd_df = pd_df.sort_values(['class'])
pd_df.to_csv(csv_filename, encoding='utf-8', index=False, sep='\t')