### Import libraries

In [1]:
import pandas as pd
import numpy as np
import os
import glob

os.environ['KERAS_BACKEND'] = 'tensorflow'
import keras
from sklearn.model_selection import train_test_split
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from PIL import Image
from matplotlib.pyplot import imshow


Using TensorFlow backend.


### Exploratory Analysis

In [2]:
image_filepaths = glob.glob('dataset-original/*')
classes = [] # list of files for each class
category_mapping = {} # map category name to integer

for i in range(len(image_filepaths)):
    classes.append(glob.glob(image_filepaths[i] + '/*.jpg'))
    category_mapping[i] = image_filepaths[i].split('/')[1]

In [3]:
print(category_mapping)

{0: 'paper', 1: 'metal', 2: 'cardboard', 3: 'trash', 4: 'glass', 5: 'plastic'}


In [4]:
print("Number of classes:",len(classes),'\n')

for idx,ic in enumerate(image_filepaths):
    print(ic.split('/')[1],":",len(classes[idx]),"images")

NUM_IMAGES = sum(len(c) for c in classes)
print("\nTotal number of images",NUM_IMAGES)

Number of classes: 6 

paper : 594 images
metal : 410 images
cardboard : 403 images
trash : 137 images
glass : 501 images
plastic : 482 images

Total number of images 2527


### Load images

In [5]:
HEIGHT,WIDTH = 150,150

In [6]:
X = np.zeros((NUM_IMAGES,HEIGHT,WIDTH,3))
y = np.zeros((NUM_IMAGES,1))
print(X.shape,y.shape)

(2527, 150, 150, 3) (2527, 1)


## IMPORTANT! ONLY LOADING 10 IMAGES FROM EACH CLASS NOW, PLS CHANGE THIS

In [7]:
k = 0
images_per_class = 10 # TODO: CHANGE HERE

for idx,c in enumerate(classes): 
    for image in c[:images_per_class]: 
        im = Image.open(image).resize((HEIGHT,WIDTH))     
        X[k] = im
        y[k] = idx
        k+=1

### Basic CNN 

In [8]:
batch_size = 1
num_classes = 6
epochs = 2


total_images = num_classes * images_per_class
# the data, split between train and test sets
## TODO: CHANGE 6*10 here too to - (6* num images per category)

X_train, X_test, y_train, y_test = train_test_split(X[:total_images], y[:total_images], test_size=0.20) 

In [9]:
print(X_train.shape,X_test.shape, y_train.shape, y_test.shape)

(48, 150, 150, 3) (12, 150, 150, 3) (48, 1) (12, 1)


In [13]:
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'training samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices (one-hot representation)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (48, 150, 150, 3)
48 training samples
12 test samples


In [10]:
print(y_train.shape,y_test.shape)

(48, 1) (12, 1)


In [11]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='Adam',
              metrics=['accuracy'])


In [None]:
model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test))
score = model.evaluate(X_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Trying out data augmentation - can also be done when reading data from directory

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)


# fits the model on batches with real-time data augmentation:
model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=batch_size),
                    steps_per_epoch=len(X_train)/batch_size, 
                    epochs=epochs, verbose = 1,
                    validation_data = test_datagen.flow(X_test, y_test),
                    validation_steps = len(X_test)/batch_size)

score = model.evaluate(X_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

Epoch 1/2
