# Montevideo: Evaluation

Now we evaluate model over validation set and only "schools" class.

In [1]:
#@title Check GPU presence { display-mode: "form" }

import tensorflow as tf
print("GPU device name: {}".format(tf.test.gpu_device_name()))

GPU device name: /device:GPU:0


In [2]:
#@title Download datasets { display-mode: 'form' }
#@test {'output': 'ignore'}

#!mkdir -p 1/
#!gcloud config set project golden-system-178513
#!gsutil -m cp gs://dym-temp/school-mapping/datasets/1.tar.gz 1/
#!cd 1/ && tar xzf 1.tar.gz

In [3]:
import numpy as np
import os
import csv
import gc

from glob import glob
from keras.applications.resnet50 import ResNet50
from keras.layers import Flatten, Dense, Dropout
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras import optimizers
from keras import backend as K 

Using TensorFlow backend.


In [4]:
DATASET_DIR = '../data/ds2/'

WIDTH = 300
HEIGHT = 300
CLASSES = ('urban', 'rural', 'school')

In [5]:
TRAIN_DIR = os.path.join(DATASET_DIR, 'train')
VAL_DIR = os.path.join(DATASET_DIR, 'test')

train_files = glob(os.path.join(TRAIN_DIR, '*.jpg'))
val_files = glob(os.path.join(VAL_DIR, '*.jpg'))

n_train_samples = len(train_files)
n_val_samples = len(val_files)

n_train_samples, n_val_samples

(6055, 9054)

## Data augmentation

In [6]:
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator

In [7]:
BATCH_SIZE = 40

## Evaluation

Now we evaluate model over validation set and only "schools" class

In [8]:
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
from sklearn.metrics import confusion_matrix
import numpy as np

In [9]:
#!gsutil cp gs://dym-temp/school-mapping/models/*.h5 .

In [10]:
WEIGHTS_PATH = '../data/models/checkpoint.h5'

FC_SIZE = 1024
DROPOUT = 0.5

print("Building model...")
model = ResNet50(weights='imagenet',
                 include_top=False,
                 input_shape=(WIDTH, HEIGHT, 3))
x = model.output
x = Flatten()(x)
x = Dense(FC_SIZE, activation='relu')(x)
x = Dropout(DROPOUT)(x)
predictions = Dense(len(CLASSES), activation='sigmoid')(x)
model_final = Model(inputs=model.input, outputs=predictions)

print("Loading weights...")
model_final.load_weights(WEIGHTS_PATH)
print("Done")

Building model...




Loading weights...
Done


In [11]:
model_final.output_shape

(None, 3)

In [12]:
target_size = model_final.input_shape[1:3]

In [13]:
img_path = os.path.join(DATASET_DIR, 'test', '0_93.jpg')
img = image.load_img(img_path, target_size=target_size)
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model_final.predict(x)
preds

array([[0.19569321, 0.12140249, 0.0098942 ]], dtype=float32)

In [14]:
from itertools import zip_longest
from random import shuffle
from tqdm import tqdm_notebook as tqdm

def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)

def predict_images(img_path, batch_size=40):
    images = glob(os.path.join(img_path, '*.jpg'))
    #shuffle(images)
    all_preds = None
    groups = list(enumerate(grouper(images, batch_size)))
    for g, img_group in tqdm(groups):
        imgs = [image.load_img(path, target_size=target_size) for path in img_group if path]
        arrays = np.array([image.img_to_array(img) for img in imgs])
        pre_arrays = preprocess_input(arrays)
        preds = model_final.predict(pre_arrays)
        preds = np.round(preds)
        if all_preds is None:
            all_preds = preds
        else:    
            all_preds = np.concatenate([all_preds, preds])            
    return all_preds

In [15]:
y_pred = predict_images(os.path.join(DATASET_DIR, 'train'))
y_pred.shape

HBox(children=(IntProgress(value=0, max=152), HTML(value='')))




(6055, 3)

In [16]:
def print_class_stats(y):
    total = y.shape[0]

    c_none = np.sum(np.all(y[:] == 0, axis=1))

    c_urban = np.sum(y[:, 0])
    c_rural = np.sum(y[:, 1])
    c_school = np.sum(y[:, 2])

    print("Total", total)
    print("None", c_none, c_none / total)
    print("Urban", c_urban, c_urban / total)
    print("Rural", c_rural, c_rural / total)
    print("School", c_school, c_school / total)

In [17]:
print_class_stats(y_pred)

Total 6055
None 5771 0.9530966143682906
Urban 0.0 0.0
Rural 284.0 0.04690338563170933
School 0.0 0.0


## To do

* Get labels from test dataset
* Evaluate: Confusion matrix

In [18]:
def parse_label_row(row):
    labels = list(row.values())[1:]
    labels = [int(label) for label in labels]
    return labels

def get_labels_from_data_subset(dataset_dir, subset_name):
    labels_path = os.path.join(dataset_dir, 'labels.csv')
    with open(labels_path, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        rows_by_img = { row['img']: row for row in reader }
    images = glob(os.path.join(dataset_dir, subset_name, '*.jpg'))
    basenames = [os.path.basename(img) for img in images]
    instances_subset = [rows_by_img[img] for img in basenames]
    y = np.array([parse_label_row(instance) for instance in instances_subset])
    return y

In [19]:
y_true = get_labels_from_data_subset(DATASET_DIR, 'test')
y_true.shape, y_true

ValueError: invalid literal for int() with base 10: '99_249.jpg'