In [None]:
import time

import numpy as np
import pandas as pd
from PIL import Image

In [None]:
resized_train_path = '../../data/diabetic-retinopathy/train-resized-256/'

In [None]:
def change_image_name(df, column):
    """
    Appends the suffix '.jpeg' for all image names in the DataFrame

    INPUT
        df: Pandas DataFrame, including columns to be altered.
        column: The column that will be changed. Takes a string input.

    OUTPUT
        Pandas DataFrame, with a single column changed to include the
        aforementioned suffix.
    """
    return [i + '.jpeg' for i in df[column]]


def convert_images_to_arrays_train(file_path, df):
    """
    Converts each image to an array, and appends each array to a new NumPy
    array, based on the image column equaling the image file name.

    INPUT
        file_path: Specified file path for resized test and train images.
        df: Pandas DataFrame being used to assist file imports.

    OUTPUT
        NumPy array of image arrays.
    """

    lst_imgs = [l for l in df['train_image_name']]

    return np.array([np.array(Image.open(file_path + img)) for img in lst_imgs])


def save_to_array(arr_name, arr_object):
    """
    Saves data object as a NumPy file. Used for saving train and test arrays.

    INPUT
        arr_name: The name of the file you want to save.
            This input takes a directory string.
        arr_object: NumPy array of arrays. This object is saved as a NumPy file.

    OUTPUT
        NumPy array of image arrays
    """
    return np.save(arr_name, arr_object)

In [None]:
# labels = pd.read_csv("trainLabels_master_256_v2.csv")
# X_train = convert_images_to_arrays_train(resized_train_path, labels)
# print(X_train.shape)
# print("Saving Train Array")
# save_to_array('X_train.npy', X_train)

In [None]:
labels = pd.read_csv("trainLabels_master_256_v2.csv")

df_split = np.array_split(labels, 2)

In [None]:
df_split[0].to_csv("0X_trainLabels_master_256_v2.csv",index=False)
df_split[1].to_csv("1X_trainLabels_master_256_v2.csv",index=False)
# df_split[2].to_csv("2X_trainLabels_master_256_v2.csv",index=False)
# df_split[3].to_csv("3X_trainLabels_master_256_v2.csv",index=False)
# df_split[4].to_csv("4X_trainLabels_master_256_v2.csv",index=False)

In [None]:
start_time = time.time()

labels = pd.read_csv("trainLabels_master_256_v2.csv")

df_split = np.array_split(labels, 2)
print("Writing Train Array")

for i in range(0,len(df_split)):
    X_train = convert_images_to_arrays_train(resized_train_path, df_split[i])

    print(X_train.shape)

    print("Saving Train Array")
    save_to_array(str(i)+'X_train.npy', X_train)

print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
del X_train

In [None]:
import numpy as np
X_train0 = np.load('0X_train.npy')

In [None]:
X_train0.shape

In [None]:
X_train1 = np.load('1X_train.npy')

In [None]:
X_train1.shape

In [None]:
x_train = np.concatenate([X_train0,X_train1])

In [None]:
save_to_array('x_train.npy', x_train)