In [1]:
import pandas as pd
import numpy as np
import cv2
from keras.preprocessing import image
import os
import matplotlib.pyplot as plt
import seaborn as sns

Using TensorFlow backend.


In [2]:
images_directory = "data/resized_train_cropped/resized_train_cropped"

df_labels = pd.read_csv('data/trainLabels_cropped.csv')
images_names = sorted(df_labels.image.values) 

### Preprocessing functions

In [3]:
def show_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    
def get_severity(image_name):
    """
    Finds an appropriate observatiion by image name. Returns its image severity
    """
    return df_labels[df_labels['image'] == image_name].level.values[0]

def create_multilabels(severity):
    """
    Create a multilabel from severity level.
    severity: 0 -> multilabel: [1, 0, 0, 0, 0]
    severity: 1 -> multilabel: [1, 1, 0, 0, 0]
    ...
    severity: 4 -> multilabel: [1, 1, 1, 1, 1]
    """
    multi_label = [0, 0, 0, 0, 0]
    for i in range(severity+1):
        multi_label[i] = 1
    return multi_label

def get_image_array(path_to_image):
    """
    Return an resised array of an image.
    """
    image = cv2.imread(path_to_image)
    return image 
    
def get_images_and_labels():
    """
    Goes through all sorted images names. Return 3 arrays: images, labels and multilabels.
    Order of those 3 is the same, because images_names is sorted,
    so images[0], labels[0] and multi_labels[0] represent the same image
    """
    images = []
    labels = []
    multi_labels = []
    for filename in images_names:
        image_name = filename
        filename = filename + ".jpeg"
        path_to_img = os.path.join(images_directory, filename)
        
        image = get_image_array(path_to_img)
        images.append(image)
        
        label = get_severity(image_name)
        labels.append(label)
        
        multi_label = create_multilabels(label)
        multi_labels.append(multi_label)
        
    return np.array(images), np.array(labels), np.array(multi_labels)

In [4]:
images, labels, multi_labels = get_images_and_labels()

In [5]:
np.save("data/npy_files/images_arrays.npy", images)
np.save("data/npy_files/labels.npy", labels)
np.save("data/npy_files/multi_labels.npy", multi_labels)