# Data Challenge - Group 16

This is the model and results for Data Challenge 1, made by group 16

Niels van der Heijden, Oula Osman Abou, Dalton Harmsen, Jip van Rooij, Phillipp Hauck, Shahrukh Tufail, Shadiah Ricardo

### Importing all the libraries we need

In [0]:
%tensorflow_version 1.x
import itertools
import multiprocessing.pool
import threading
from functools import partial
import argparse
import glob
import json
import os
import time

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

import pandas as pd

import keras
from keras import backend as K
from keras import layers, models
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.backend import relu, sigmoid
from keras.preprocessing.image import ImageDataGenerator
from keras import applications

import numpy as np

import tensorflow as tf
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import utils
from tensorflow.python.saved_model import tag_constants, signature_constants
from tensorflow.python.saved_model.signature_def_utils_impl import build_signature_def, predict_signature_def
from tensorflow.contrib.session_bundle import exporter
from tensorflow.python.lib.io import file_io

Using TensorFlow backend.


## Loading the data

In [0]:
![ -f testDataSmall.npz ] || wget -O testDataSmall.npz "https://www.win.tue.nl/~cdecampos/testDataSmall.npz"
![ -f trainDataSmall.npz ] || wget -O trainDataSmall.npz "https://www.win.tue.nl/~cdecampos/trainDataSmall.npz"
!rm -fr jobdir/

In [0]:
 def read_train_data():
    start_time = time.time()
    data = np.load("trainDataSmall.npz")
    X_train = data["X_train"]
    Y_train = data["Y_train"]
    rot = rotateimg(X_train)
    rotclass = rotateclass(Y_train)
    return [rot, rotclass]      


def read_test_data():
    start_time = time.time()
    data = np.load("testDataSmall.npz")
    X_test = data["X_test"]
    Y_test = data["Y_test"]
    return [X_test, Y_test]

def rotateimg(data):
    """
    Rotates the images given in shape [?,128,128,3]
    return images rotated by 3 times 90 degrees in shape [?*4,128,128,3]
    """
    index=0
    result=np.empty((len(data)*4,128,128,3))
    for rotation in range(0,4):
        for img in data:
            result[index]=np.rot90(img,rotation)
            index=index+1
    return result 

def rotateclass(data):
    """
    Adds up the classification arrays for all the rotated images. Takes input in shape [?,5]
    return classifications in shape [?*4,5]
    """
    index=0
    result=np.empty((len(data)*4,5))
    for rotation in range(0,4):
        for classification in data:
            result[index]=classification
            index=index+1
    return result

In [0]:
train_data = read_train_data()
test_data = read_test_data()

## Our model

In [0]:
model = models.Sequential()
model.add(Conv2D(32, kernel_size=(6, 6), 
                  activation='relu',
                  input_shape=(128, 128, 3)))
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='glorot_uniform'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(5, activation='softmax', kernel_initializer='glorot_uniform'))
model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer='adam',
                  metrics=['accuracy'])

In [0]:
model.fit (train_data[0], train_data[1], validation_split=0.2,  epochs=10)

In [0]:
model.evaluate(test_data[0], test_data[1])

## Post-processing

### Average confidence over classes

#### Functions

In [0]:
def average_confidence(model, test):
    #initialize index and prediction lists
    index1 = []
    index2 = []
    index3 = []
    index4 = []
    index5 = []
    predictions1 = []
    predictions2 = []
    predictions3 = []
    predictions4 = []
    predictions5 = []

    #get index of each class
    for i in range(len(test[1])):
      if np.array_equal(test[1][i], [1., 0., 0., 0., 0.]):
        index1.append(i)
      elif np.array_equal(test[1][i], [0., 1., 0., 0., 0.]):
        index2.append(i)
      elif np.array_equal(test[1][i], [0., 0., 1., 0., 0.]):
        index3.append(i)
      elif np.array_equal(test[1][i], [0., 0., 0., 1., 0.]):
        index4.append(i)
      elif np.array_equal(test[1][i], [0., 0., 0., 0., 1.]):
        index5.append(i)
    for classnum in range(5):
      #predict outcomes per class and take mean
      if classnum == 0:
        for index in index1:
          predictions1.append(model.predict(np.expand_dims(test[0][index], axis=0)))
      elif classnum == 1:
        for index in index2:
          predictions2.append(model.predict(np.expand_dims(test[0][index], axis=0)))
      elif classnum == 2:
        for index in index3:
          predictions3.append(model.predict(np.expand_dims(test[0][index], axis=0)))
      elif classnum == 3:
        for index in index4:
          predictions4.append(model.predict(np.expand_dims(test[0][index], axis=0)))
      elif classnum == 4:
        for index in index5:
          predictions5.append(model.predict(np.expand_dims(test[0][index], axis=0)))
    
    
    print([list(np.mean(x, axis=0)) for x in zip(*predictions1)])
    print([list(np.mean(x, axis=0)) for x in zip(*predictions2)])
    print([list(np.mean(x, axis=0)) for x in zip(*predictions3)])
    print([list(np.mean(x, axis=0)) for x in zip(*predictions4)])
    print([list(np.mean(x, axis=0)) for x in zip(*predictions5)])

def average_confidence3(model, test):
    #initialize index and prediction lists
    index1 = []
    index2 = []
    index3 = []
    predictions1 = []
    predictions2 = []
    predictions3 = []

    #get index of each class
    for i in range(len(test[1])):
      if np.array_equal(test[1][i], [1., 0., 0., 0., 0.]):
        index1.append(i)
      elif np.array_equal(test[1][i], [0., 1., 0., 0., 0.]) or np.array_equal(test[1][i], [0., 0., 1., 0., 0.]):
        index2.append(i)
      elif np.array_equal(test[1][i], [0., 0., 0., 1., 0.] or np.array_equal(test[1][i], [0., 0., 0., 0., 1.])):
        index3.append(i)
    for classnum in range(3):
      #predict outcomes per class and take mean
      if classnum == 0:
        for index in index1:
          predictions1.append(model.predict(np.expand_dims(test[0][index], axis=0)))
      elif classnum == 1:
        for index in index2:
          predictions2.append(model.predict(np.expand_dims(test[0][index], axis=0)))
      elif classnum == 2:
        for index in index3:
          predictions3.append(model.predict(np.expand_dims(test[0][index], axis=0)))
    
    
    list1 = ([list(np.mean(x, axis=0)) for x in zip(*predictions1)])
    print([list1[0][0], list1[0][1] + list1[0][2], list1[0][3]+ list1[0][4]])
    list2 = ([list(np.mean(x, axis=0)) for x in zip(*predictions2)])
    print([list2[0][0], list2[0][1] + list2[0][2], list2[0][3]+ list2[0][4]])
    list3 = ([list(np.mean(x, axis=0)) for x in zip(*predictions3)])
    print([list3[0][0], list3[0][1] + list3[0][2], list3[0][3]+ list3[0][4]])


#### Computing the average confidence over 3 and 5 classes

In [0]:
#shows the average confidence per class. The first list means when 0 is the true value, and so on. the final 3 lists are the confidences after post-processing.
average_confidence(model, test_data)
average_confidence3(model, test_data)

### Confusion matrices

#### Functions

In [0]:
#creates a list of all of the true class values.
def label_creator(array):
  new_array = array
  lst = []
  for n in new_array:
    for i in range(0,5):
      if int(n[i]) == 1:
        lst.append(i)
     

  return lst

#creates a function which you can use to plot confusion matrixes
import numpy as np
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

#### Plotting the confusion matrix

In [0]:
#plots 5x5 confusion matrix
classes = [0,1,2,3,4]
y_predicted = model.predict_classes(test_data[0])
Y_test_lst = label_creator(test_data[1])
con_mat = tf.math.confusion_matrix(labels = Y_test_lst, predictions = y_predicted)
cm = confusion_matrix(Y_test_lst, y_predicted, classes)

plot_confusion_matrix(cm, classes, title = 'frist_prototype_contrast', normalize = True, cmap = 'Reds')

In [0]:
#plots 3x3 confusion matrix
new_cm = np.array([[cm[0][0], cm[0][1]+cm[0][2], cm[0][3]+cm[0][4]],
                   [cm[1][0], cm[1][1]+cm[1][2], cm[1][3]+cm[1][4]],
                   [cm[2][0], cm[2][1]+cm[2][2], cm[2][3]+cm[2][4]],
                   [cm[3][0], cm[3][1]+cm[3][2], cm[3][3]+cm[3][4]],
                   [cm[4][0], cm[4][1]+cm[4][2], cm[4][3]+cm[4][4]]
                   ])

new_cm2 = np.array([[new_cm[0][0], new_cm[0][1], new_cm[0][2]],
                   [new_cm[1][0] + new_cm[2][0], new_cm[1][1]+new_cm[2][1], new_cm[1][2]+new_cm[2][2]],
                   [new_cm[3][0] + new_cm[4][0], new_cm[3][1]+new_cm[4][1], new_cm[3][2]+new_cm[4][2]]
                   ])

plot_confusion_matrix(new_cm2, classes, title = 'frist_prototype_contrast', normalize = True, cmap = 'Reds')