## Preprocess image files for CNN model input

# Imports

In [6]:
import json
import cv2
import pandas as pd
import numpy as np

# Read json to DataFrame

In [16]:
# Load json file into data
with open('Images/classes.json') as f:
    data_class = json.load(f)

with open('Images/annotations.json') as f:
    data = json.load(f)

In [17]:
# Pretty print json
print(json.dumps(data_class, indent=4, sort_keys=True))

[
    {
        "attribute_groups": [],
        "color": "#32a852",
        "id": 1,
        "name": "Heat Number",
        "opened": true
    },
    {
        "attribute_groups": [],
        "color": "#8293b3",
        "id": 2,
        "name": "Dimension",
        "opened": true
    },
    {
        "attribute_groups": [],
        "color": "#962de6",
        "id": 3,
        "name": "Grade",
        "opened": true
    }
]


In [18]:
data_class[0]["name"]

'Heat Number'

In [20]:
# Loop over dict in data to create lists
key_list = []
class_list = []
class_type = []
x1_list = []
x2_list = []
y1_list = []
y2_list = []

for k in data.keys():
    # Note: This assumes 3 labels per image, if more labels this code needs to be modified
    for x in range(0,3):
        try:
            if "classId" in data[k][x].keys():
                key_list.append(k)
                class_list.append(data[k][x]["classId"])
                x1_list.append(data[k][x]["points"]["x1"])
                x2_list.append(data[k][x]["points"]["x2"])
                y1_list.append(data[k][x]["points"]["y1"])
                y2_list.append(data[k][x]["points"]["y2"])
                for j in range(0,3):
                    try:
                        if data_class[j]['id'] == data[k][x]["classId"]:
                            class_type.append(data_class[j]['name'])
                        else:
                            continue
                    except IndexError:
                        pass
            else:
                continue
        except IndexError:
            pass

In [21]:
# Combine lists into dataframe
coord_df = pd.DataFrame([key_list, class_list, class_type, x1_list, x2_list, y1_list, y2_list]).T
coord_df.columns = ["filename", "class", "class_type", "x1", "x2", "y1", "y2"]
coord_df

Unnamed: 0,filename,class,class_type,x1,x2,y1,y2
0,IMG_6149.JPG,1,Heat Number,2551.5,3727.7,1154.3,1439.6
1,IMG_6149.JPG,2,Dimension,1327.0,2533.9,1163.1,1487.9
2,IMG_6149.JPG,3,Grade,602.8,1261.1,1198.2,1487.9
3,IMG_6148.JPG,3,Grade,374.54541,1081.199951,1176.249146,1439.599976
4,IMG_6148.JPG,2,Dimension,1147.007568,2362.800049,1154.304199,1444.0
5,IMG_6148.JPG,1,Heat Number,2375.9,3503.9,1171.9,1422.0
6,IMG_6147.JPG,3,Grade,84.872086,835.400024,1246.473022,1544.900024
7,IMG_6147.JPG,2,Dimension,901.2,2222.3,1228.9,1566.9
8,IMG_6147.JPG,1,Heat Number,2257.4,3402.9,1268.4,1562.5
9,IMG_6126.JPG,3,Grade,1454.236816,2222.300049,1211.361084,1382.5


## Batch crop images and resize

In [22]:
# Batch crop and resize images
im_count = 0
for x in coord_df.index:
    image = cv2.imread('Images/' + coord_df['filename'][x])
    x1, x2, y1, y2 = round(coord_df['x1'][x]), round(coord_df['x2'][x]), round(coord_df['y1'][x]), round(coord_df['y2'][x])
    print('Processing ' + coord_df['filename'][x] + ' Class ' + str(coord_df['class'][x]) + ' ..')
    ROI = image[y1:y2,x1:x2]
    resized = cv2.resize(ROI, (224,224), interpolation = cv2.INTER_AREA)
    if (coord_df['class'][x] == 1):
        cv2.imwrite('cropped_images/heat_number/' + coord_df['filename'][x], ROI)
        cv2.imwrite('resized_images/heat_number/' + coord_df['filename'][x], resized)
    elif (coord_df['class'][x] == 2):
        cv2.imwrite('cropped_images/dimension/' + coord_df['filename'][x], ROI)
        cv2.imwrite('resized_images/dimension/' + coord_df['filename'][x], resized)
    else:
        cv2.imwrite('cropped_images/grade/' + coord_df['filename'][x], ROI)
        cv2.imwrite('resized_images/grade/' + coord_df['filename'][x], resized)
    im_count+=1
print("Total Images Processed: " + str(im_count))

Processing IMG_6149.JPG Class 1 ..
Processing IMG_6149.JPG Class 2 ..
Processing IMG_6149.JPG Class 3 ..
Processing IMG_6148.JPG Class 3 ..
Processing IMG_6148.JPG Class 2 ..
Processing IMG_6148.JPG Class 1 ..
Processing IMG_6147.JPG Class 3 ..
Processing IMG_6147.JPG Class 2 ..
Processing IMG_6147.JPG Class 1 ..
Processing IMG_6126.JPG Class 3 ..
Processing IMG_6126.JPG Class 2 ..
Processing IMG_6126.JPG Class 1 ..
Processing IMG_6122.JPG Class 3 ..
Processing IMG_6122.JPG Class 2 ..
Processing IMG_6122.JPG Class 1 ..
Processing IMG_6121.JPG Class 1 ..
Processing IMG_6121.JPG Class 2 ..
Processing IMG_6121.JPG Class 3 ..
Processing IMG_6131.JPG Class 1 ..
Processing IMG_6131.JPG Class 3 ..
Processing IMG_6131.JPG Class 2 ..
Processing IMG_6139.JPG Class 2 ..
Processing IMG_6139.JPG Class 3 ..
Processing IMG_6139.JPG Class 1 ..
Processing IMG_6137.JPG Class 1 ..
Processing IMG_6137.JPG Class 2 ..
Processing IMG_6137.JPG Class 3 ..
Processing IMG_6143.JPG Class 1 ..
Processing IMG_6143.