In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Input, Dropout, Convolution2D, MaxPooling2D, Flatten
from keras.utils import np_utils


In [None]:
PICTURE_SIZE = 64
CHANNELS = 'rgb'

INPUT_COLUMNS = []

for color in CHANNELS:
    INPUT_COLUMNS.extend(['%s%i' % (color, i)
                          for i in range(PICTURE_SIZE ** 2)])
old_column=list(range(0,12288))

In [None]:
import cv2

def load_images(path):
    img_data = []
    labels = []
    idx_to_label = []
    i = -1
    for fruit in os.listdir(path):
        fruit_path = os.path.join(path,fruit)
        labels.append(fruit)
        i = i+1
        for img in os.listdir(fruit_path):
            img_path = os.path.join(fruit_path,img)
            image = cv2.imread(img_path)
            image = cv2.resize(image, (64, 64))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            img_data.append(image)
            idx_to_label.append(i)
    return np.array(img_data),np.array(idx_to_label),labels

In [None]:
training_data_path = '/home/fede/Documentos/Machine-learning/PT-1/data/Training'
validation_data_path = '/home/fede/Documentos/Machine-learning/PT-1/data/Validation'
X_train,y_train,label_data = load_images(training_data_path)
X_test,y_test,label_data_garbage = load_images(validation_data_path)

In [None]:
train_rearmado=X_train.reshape(len(y_train),-1)#Rearma el array en dos dimensiones 

trainDF=pd.DataFrame(data=train_rearmado) 

etiqueta=pd.Series(data=y_train) #crea una serie con el index de las imagenes y el index de las etiquetas

etiqueta=etiqueta.replace({v: k for v, k in enumerate(label_data)},)#remplaza el index de las etiquetas con nombre de las mismas


trainDF['etiqueta']=etiqueta
print(trainDF.shape)

In [None]:
trainDF.sample(5)

In [None]:
test_rearmado=X_test.reshape(len(y_test),12288)#Rearma el array en dos dimensiones 

testDF=pd.DataFrame(data=test_rearmado) 

etiqueta=pd.Series(data=y_test) #crea una serie con el index de las imagenes y el index de las etiquetas

etiqueta=etiqueta.replace({v: k for v, k in enumerate(label_data)},)#remplaza el index de las etiquetas con nombre de las mismas


testDF['etiqueta']=etiqueta

In [None]:
def show_images(samples):
    for index, sample in samples.iterrows():

        sample_as_grid = sample[old_column].values.reshape(len(CHANNELS), PICTURE_SIZE, PICTURE_SIZE).astype(np.float)
        sample_as_grid = np.transpose(sample_as_grid, (1, 2, 0)) / 255
        
        plt.axis('off')
        plt.imshow(sample_as_grid, interpolation='nearest')

        plt.show()

testDF.sample(5)

In [None]:
show_images(trainDF.sample(5))

In [None]:
train = trainDF.copy()
test = testDF.copy()

sets = (
    ('train', train),
    ('test', test),
)

In [1]:
def extract_inputs(dataset):
    """
    Extrae del conjunto de datos original solo las 
    columnas que se deben utilizar como entrada. 
    """
    # estandarización rápida y simple: dividir todo por 255 :)
    #return dataset[INPUT_COLUMNS].values / 255
    return dataset[old_column].values.reshape(len(dataset), PICTURE_SIZE, PICTURE_SIZE, len(CHANNELS)) / 255


def extract_outputs(dataset):
    """
    Extrae del conjunto de datos original solo la 
    columna que se debe utilizar como salida y retorna
    tantas columnas como etiquetas distintas existan.
    Por ejemplo, podríamos pensar que las columnas resultantes
    serían: es_fisa, es_gabi, es_mariano.
    """
    is_fruit_columns = [(dataset.etiqueta == fruit).values for fruit in label_data]
    return np.array(is_fruit_columns).T

In [None]:
model = Sequential([
    #Dense(10, activation='tanh', input_shape=(len(INPUT_COLUMNS), )),
    Convolution2D(8, (4, 4), activation='relu', input_shape=(PICTURE_SIZE, PICTURE_SIZE, len(CHANNELS)),),
    Convolution2D(8, (4, 4), activation='relu'),
    MaxPooling2D(pool_size=(4, 4)),
    Dropout(0.2),
    Flatten(),
        
    Dense(10, activation='tanh'),
    Dropout(0.2),
    Dense(10, activation='tanh'),
    Dropout(0.2),

    Dense(len(label_data), activation='softmax'),
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy',],
)

In [None]:
history = model.fit(
    extract_inputs(train), 
    extract_outputs(train), 
    epochs=5,
    batch_size=128,
    validation_data=(
        extract_inputs(test),
        extract_outputs(test),
    )
)