In [1]:
import numpy as np
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Activation, Input, Dropout, Add
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam

Using TensorFlow backend.


In [7]:
ls

A2 Tests.ipynb               nohup.out
A2 model evaluation.ipynb    predicate-matrix-binary.txt
HW2.pdf                      predicates.txt
[1m[34m__pycache__[m[m/                 sample_submission.txt
classes.txt                  test_images.txt
eval_awa.py                  testclasses.txt
[1m[34mimages_128x128[m[m/              train_model_1.py
model_evaluate.py            trainclasses.txt
[1m[34mmodels[m[m/


In [8]:
cat testclasses.txt

chimpanzee
giant+panda
leopard
persian+cat
pig
hippopotamus
humpback+whale
raccoon
rat
seal


In [None]:
def load_imgs(data_dir):
    animal_to_imgs = {}
    for animal_name in os.listdir(data_dir):
        animal_to_imgs[animal_name] = []
        animal_dir = data_dir + "/" + animal_name + "/"
        for img_name in os.listdir(animal_dir):
            img = plt.imread(animal_dir + img_name)
            animal_to_imgs[animal_name].append(img)
    return animal_to_imgs

def load_info():
    df_classes = pd.read_csv("classes.txt", header=None) 
    df_predicate_matrix = pd.read_csv("predicate-matrix-binary.txt", header=None)
    df_test_classes = pd.read_csv("testclasses.txt", header=None)
    df_train_classes = pd.read_csv("trainclasses.txt", header=None)

    animal_to_feat = {}
    id_to_name, name_to_id = {}, {}
    for i, c in enumerate(df_classes[0]):
        c_name = c.split()[1]
        id_to_name[i] = c_name
        name_to_id[c_name] = i
        animal_to_feat[c_name] = np.array([int(binary) for binary in df_predicate_matrix.iloc[i, 0].split()])

    train_classes, test_classes = [], []
    for c in df_train_classes[0]: train_classes.append(c.split()[0])
    for c in df_test_classes[0]: test_classes.append(c.split()[0])

    return animal_to_feat, id_to_name, name_to_id, train_classes, test_classes

In [None]:
animal_to_imgs = load_imgs("images_128x128")
animal_to_feat, id_to_name, name_to_id, train_classes, test_classes = load_info()
all_classes = train_classes + test_classes

In [None]:
"""
def pred_class(model, img, classes):
    s = model.predict(np.expand_dims(img, axis=0))[0]
    probs = np.zeros(len(classes))
    for i, animal in enumerate(classes):
        probs[i] = np.prod(np.abs(s - 1.0 + animal_to_feat[animal]))
    return probs.argsort()[-1]
"""
def pred_class(model, img, classes):
    eps = 1e-5
    s = model.predict(np.expand_dims(img, axis=0))[0]
    probs = np.zeros(len(classes))
    for i, animal in enumerate(classes):
        pos = np.sum(np.log(eps + np.abs(s - 1.0 + animal_to_feat[animal])))
        neg = np.sum(np.log(eps + np.abs(s - animal_to_feat[animal])))
        probs[i] = pos - neg
    return probs.argsort()[-1]

def pred_features(model, img):
    return np.round(model.predict(np.expand_dims(img, axis=0))[0]).astype(int)

def predictions(model, classes, animal_to_images):
    y_pred, y_true = [], []
    for i, animal in enumerate(classes):
        for img in animal_to_images[animal]:
            y_true.append(i)
            y_pred.append(pred_class(model, img, classes))
    return y_pred, y_true

def feature_preds(model, classes, animal_to_images):
    y_pred, y_true = [], []
    for animal in classes:
        for img in animal_to_images[animal]:
            y_true.append(animal_to_feat[animal])
            y_pred.append(pred_features(model, img))
    return y_pred, y_true

In [None]:
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(y_true, y_pred, classes, title="", normalize=False, cmap=plt.cm.Blues):
    np.set_printoptions(precision=2) # auto-rounds np numbers
    cm = confusion_matrix(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    fig, ax = plt.subplots(figsize=(8,8))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True class',
           xlabel='Predicted class')

    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax

In [None]:
model = load_model("models/model_1.h5")

In [None]:
y_pred, y_test = predictions(model, test_classes, animal_to_imgs)

In [None]:
print(np.mean(np.array(y_pred) == np.array(y_test)))
plot_confusion_matrix(y_test, y_pred, classes=test_classes, normalize=True, title="Confusion matrix, model 1")
plt.show()

In [None]:
scores = np.mean(np.array(f_pred) == np.array(f_test), axis=0)

In [None]:
predicate_file = pd.read_csv("predicates.txt", header=None)
predicates = []
for line in predicate_file.iloc[:,0]: predicates.append(line.split()[-1])

In [None]:
final_model = load_model("models/final-model-90.hdf5")

In [None]:
final_pred, final_test = predictions(final_model, test_classes, animal_to_imgs)

In [None]:
print(np.mean(np.array(final_pred) == np.array(final_test)))
plot_confusion_matrix(final_test, final_pred, classes=test_classes, normalize=True, title="Confusion matrix, model 2")
plt.show()

In [None]:
final_pred_train, final_true_train = feature_preds(final_model, test_classes, animal_to_imgs)

In [None]:
np.mean(np.array(final_feat_pred) == np.array(final_feat_test))

In [None]:
import texttable as tt
table = tt.Texttable()
table.set_cols_align(["l", "r", "l", "r", "l", "r", "l", "r", "l", "r"])
table.set_cols_valign(["m", "m", "m", "m", "m", "m", "m", "m", "m", "m"])
table.set_cols_width([8, 8, 8, 8, 8, 8, 8, 8, 8, 8])
header = ["Feature", "Score", "Feature", "Score", "Feature", "Score", "Feature", "Score", "Feature", "Score"]
rows = [header]
for i in range(0, len(scores), 5):
    temp = []
    for j in range(5): 
        temp.append(predicates[i + j])
        temp.append(round(final_scores[i + j], 3))
    rows.append(temp)
table.add_rows(rows)
print(table.draw())