In [1]:
# !unzip ./data/normal.zip -d ./data/unzipped_data/

In [2]:
# !unzip ./data/hdr.zip -d ./data/unzipped_data/

In [3]:
import pandas as pd
from PIL import Image
import os
import cv2
import pickle
from segment_chars import get_segmented_chars

In [4]:
dataset_df = pd.read_csv('./data/dataset.csv', header=None)
dataset_df.columns = ['file_location', 'annotation']

In [5]:
dataset_df.head(5)

Unnamed: 0,file_location,annotation
0,crop_m1/I00000.png,9B52145
1,crop_h1/I00000.png,9B52145
2,crop_m1/I00001.png,6B94558
3,crop_h1/I00001.png,6B94558
4,crop_m1/I00002.png,8B90164


In [6]:
def get_abs_file_location(file_location):
#     if file_location.startswith('crop_m'):
#         abs_file_location = os.path.join(os.path.abspath(os.getcwd()), 'data/normal', file_location)
#     else:
#         abs_file_location = os.path.join(os.path.abspath(os.getcwd()), 'data/hdr', file_location)
    abs_file_location = os.path.join(os.path.abspath(os.getcwd()), 'data/unzipped_data', file_location)
    return abs_file_location

In [7]:
dataset_df['abs_file_location'] = dataset_df.file_location.apply(get_abs_file_location)

In [8]:
print(dataset_df.loc[0, 'file_location'])
print(dataset_df.loc[0, 'abs_file_location'])

crop_m1/I00000.png
/mnt/ebs-1/indranil_chandra/code/code_snippets/anpr/ANPR/Licence_plate_recognition/USA_plates/data/unzipped_data/crop_m1/I00000.png


In [9]:
dataset_df['segmented_chars'], dataset_df['segmented_img'] =  zip(*dataset_df.apply(lambda x: get_segmented_chars(x['abs_file_location'], x['annotation']), axis=1))

In [10]:
dataset_df['segmentation_success_flag'] = dataset_df.apply(lambda x: len(x['segmented_chars']) == len(x['annotation']), axis=1)

In [11]:
dataset_df.head(5)

Unnamed: 0,file_location,annotation,abs_file_location,segmented_chars,segmented_img,segmentation_success_flag
0,crop_m1/I00000.png,9B52145,/mnt/ebs-1/indranil_chandra/code/code_snippets...,"([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...","[[[17, 13, 12], [17, 13, 12], [17, 13, 12], [1...",False
1,crop_h1/I00000.png,9B52145,/mnt/ebs-1/indranil_chandra/code/code_snippets...,"([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...","[[[54, 44, 37], [53, 43, 36], [52, 42, 35], [5...",False
2,crop_m1/I00001.png,6B94558,/mnt/ebs-1/indranil_chandra/code/code_snippets...,"([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...","[[[9, 11, 111], [9, 11, 111], [9, 11, 111], [1...",False
3,crop_h1/I00001.png,6B94558,/mnt/ebs-1/indranil_chandra/code/code_snippets...,"([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...","[[[44, 45, 227], [42, 43, 225], [44, 45, 227],...",False
4,crop_m1/I00002.png,8B90164,/mnt/ebs-1/indranil_chandra/code/code_snippets...,"([[0.0, 25.18518556047369, 25.18518556047369, ...","[[[92, 80, 68], [95, 80, 71], [96, 81, 72], [9...",False


In [12]:
dataset_df.segmentation_success_flag.value_counts(normalize=True)

False    0.759202
True     0.240798
Name: segmentation_success_flag, dtype: float64

In [13]:
from collections import Counter 
count = Counter(''.join(dataset_df[dataset_df['segmentation_success_flag'] == True].annotation.tolist()))
print(dict(count))

{'M': 7, 'J': 4, 'T': 10, '8': 115, '6': 93, 'S': 2, 'Z': 12, '9': 81, '0': 82, '4': 86, 'E': 2, '1': 92, 'L': 8, 'B': 128, '5': 83, 'G': 2, '3': 94, 'A': 6, 'D': 1, '2': 83, '7': 92, 'I': 3, 'H': 4, 'C': 5}


In [14]:
def save_segmented_image(row):
    for index, annotated_char in enumerate(row['annotation']):
        image_path = os.path.join(os.path.abspath(os.getcwd()), 'data/segregated_data', annotated_char.upper() + '/' + annotated_char.upper() + '_' + row['file_location'].split('/')[1].split('.')[0] + '_' + str(index) + '.jpg')
        os.makedirs(os.path.dirname(image_path), exist_ok=True)
        cv2.imwrite(image_path, row['segmented_chars'][index]) 
    return

In [15]:
def save_annotated_image(row):
    image_path = os.path.join(os.path.abspath(os.getcwd()), 'data/segmented_data', row['file_location'].split('/')[1].split('.')[0] + '_' + row['annotation'].upper() + '.jpg')
    os.makedirs(os.path.dirname(image_path), exist_ok=True)
    cv2.imwrite(image_path, row['segmented_img']) 
    return

In [16]:
dataset_df[dataset_df['segmentation_success_flag'] == True].apply(lambda x: save_segmented_image(x), axis=1)
dataset_df[dataset_df['segmentation_success_flag'] == True].apply(lambda x: save_annotated_image(x), axis=1)

7      None
11     None
24     None
28     None
29     None
30     None
37     None
41     None
68     None
69     None
72     None
73     None
80     None
81     None
88     None
89     None
90     None
91     None
96     None
97     None
98     None
99     None
100    None
101    None
102    None
103    None
104    None
105    None
106    None
107    None
       ... 
569    None
576    None
585    None
586    None
587    None
592    None
593    None
601    None
603    None
608    None
609    None
611    None
612    None
613    None
615    None
616    None
617    None
633    None
636    None
637    None
640    None
641    None
643    None
644    None
645    None
647    None
648    None
649    None
650    None
651    None
Length: 157, dtype: object

## Prepare alternate dataset for the training pipeline

In [17]:
# List of images
data = []

# List of labels
labels = []

input_path = './data/alternate_data'
# Load all directory
for root, dirs, files in os.walk(input_path):
    # Filter every folder
    for dir in dirs:
        print(" Class : \t \t " + dir)
        # Filter all files in the directory
        for filename in os.listdir(input_path + "/" + dir):
            # Filter only the files which are image
            if filename.endswith('.jpg'):
                img = cv2.imread(input_path + "/" + dir + "/" + filename)
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                data.append(gray)
                labels.append(dir)

 Class : 	 	 7
 Class : 	 	 P
 Class : 	 	 M
 Class : 	 	 D
 Class : 	 	 A
 Class : 	 	 H
 Class : 	 	 6
 Class : 	 	 2
 Class : 	 	 4
 Class : 	 	 L
 Class : 	 	 8
 Class : 	 	 F
 Class : 	 	 Y
 Class : 	 	 C
 Class : 	 	 X
 Class : 	 	 5
 Class : 	 	 Q
 Class : 	 	 G
 Class : 	 	 E
 Class : 	 	 J
 Class : 	 	 0
 Class : 	 	 U
 Class : 	 	 W
 Class : 	 	 R
 Class : 	 	 9
 Class : 	 	 N
 Class : 	 	 1
 Class : 	 	 3
 Class : 	 	 S
 Class : 	 	 B
 Class : 	 	 Z
 Class : 	 	 T
 Class : 	 	 I
 Class : 	 	 K
 Class : 	 	 O
 Class : 	 	 V


In [18]:
# Save test data and labels
pickle.dump(data, open("./data/alternate_data.pickle", "wb"))
pickle.dump(labels, open("./data/alternate_data_labels.pickle", "wb"))

print('Length data: ' + str(len(data)))
print('Length labels: ' + str(len(labels)))

Length data: 36576
Length labels: 36576
