In [1]:
import pandas as pd
import numpy as np
import cv2
from keras.preprocessing import image
import os

Using TensorFlow backend.


In [3]:
images_directory = "data/resized_train_cropped/resized_train_cropped"

df_labels = pd.read_csv('data/trainLabels_cropped.csv')
images_names = sorted(df_labels.image.values) 
df_labels.head()
images_names


['10003_left',
 '10003_right',
 '10007_left',
 '10007_right',
 '10009_left',
 '10009_right',
 '1000_left',
 '1000_right',
 '10010_left',
 '10010_right',
 '10013_left',
 '10013_right',
 '10014_left',
 '10014_right',
 '10015_left',
 '10015_right',
 '10017_left',
 '10017_right',
 '10022_left',
 '10022_right',
 '10028_left',
 '10028_right',
 '10029_left',
 '10029_right',
 '1002_left',
 '1002_right',
 '10030_left',
 '10030_right',
 '10031_left',
 '10031_right',
 '10032_left',
 '10032_right',
 '10035_left',
 '10035_right',
 '10042_left',
 '10042_right',
 '10043_left',
 '10043_right',
 '10046_left',
 '10046_right',
 '10047_left',
 '10047_right',
 '10048_left',
 '10048_right',
 '10050_left',
 '10050_right',
 '10053_left',
 '10053_right',
 '10058_left',
 '10058_right',
 '10059_left',
 '10059_right',
 '10061_left',
 '10061_right',
 '10065_left',
 '10065_right',
 '10069_left',
 '10069_right',
 '10073_left',
 '10073_right',
 '10078_left',
 '10078_right',
 '10081_left',
 '10081_right',
 '10085_left

In [4]:
def show_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    
def get_severity(image_name):
    """
    Finds an appropriate observatiion by image name. Returns its image severity
    """
    return df_labels[df_labels['image'] == image_name].level.values[0]

def create_multilabels(severity):
    """
    Create a multilabel from severity level.
    severity: 0 -> multilabel: [1, 0, 0, 0, 0]
    severity: 1 -> multilabel: [1, 1, 0, 0, 0]
    ...
    severity: 4 -> multilabel: [1, 1, 1, 1, 1]
    """
    multi_label = [0, 0, 0, 0, 0]
    for i in range(severity+1):
        multi_label[i] = 1
    return multi_label

def get_image_array(path_to_image):
    """
    Return an resised array of an image.
    """
    image = cv2.imread(path_to_image)
    image = cv2.resize(image, dsize=(224, 224))
    return image 
    
def get_images_and_labels():
    """
    Goes through all sorted images names. Return 3 arrays: images, labels and multilabels.
    Order of those 3 is the same, because images_names is sorted,
    so images[0], labels[0] and multi_labels[0] represent the same image
    """
    images = []
    labels = []
    multi_labels = []
    for filename in images_names[:1000]:
        image_name = filename
        filename = filename + ".jpeg"
        path_to_img = os.path.join(images_directory, filename)
        
        image = get_image_array(path_to_img)
        images.append(image)
        
        label = get_severity(image_name)
        labels.append(label)
        
        multi_label = create_multilabels(label)
        multi_labels.append(multi_label)
        
    return np.array(images), np.array(labels), np.array(multi_labels)

In [5]:
images, labels, multi_labels = get_images_and_labels()

In [6]:
np.save("data/npy_files/images_arrays.npy", images)
np.save("data/npy_files/labels.npy", labels)
np.save("data/npy_files/multi_labels.npy", multi_labels)