In [8]:
import os
import keras.backend as K
import numpy as np
import numpy.ma as ma
import pandas as pd
import pylab as pl
import matplotlib.cm as cm
import math

from scipy import misc
from sklearn.metrics import accuracy_score, precision_score, recall_score
from scipy.stats import pearsonr

from keras.models import model_from_json
from mpl_toolkits.axes_grid1 import make_axes_locatable

BATCH_SIZE = 20

In [2]:
def restore_model(json_filepath, weights_filepath):
    """
    Restore the pretrained model.
    :param json_filepath: Path of the .json file containing model architecture
    :param weights_filepath: Path of the .h5 file containing weights of pretrained model
    :return `model` object
    """
    print("Loading model from disk...")
    json_file = open(json_filepath, "r")
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights(weights_filepath)
    print("Done")
    return model


# -----------------------------------------------------------------------------
# https://github.com/julienr/
# ipynb_playground/blob/master/keras/convmnist/keras_cnn_mnist.ipynb
def nice_imshow(ax, data, vmin=None, vmax=None, cmap=None):
    """Wrapper around pl.imshow"""
    if cmap is None:
        cmap = cm.jet
    if vmin is None:
        vmin = data.min()
    if vmax is None:
        vmax = data.max()
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    im = ax.imshow(data, vmin=vmin, vmax=vmax, interpolation='nearest', cmap=cmap)
    pl.colorbar(im, cax=cax)


def make_mosaic(imgs, nrows, ncols, border=1):
    """
    Given a set of images with all the same shape, makes a
    mosaic with nrows and ncols
    """
    nimgs = imgs.shape[0]
    imshape = imgs.shape[1:]

    mosaic = ma.masked_all((nrows * imshape[0] + (nrows - 1) * border,
                            ncols * imshape[1] + (ncols - 1) * border),
                           dtype=np.float32)

    paddedh = imshape[0] + border
    paddedw = imshape[1] + border
    for i in range(nimgs):
        row = int(np.floor(i / ncols))
        col = i % ncols

        mosaic[row * paddedh:row * paddedh + imshape[0],
        col * paddedw:col * paddedw + imshape[1]] = imgs[i]
    return mosaic

    
# -----------------------------------------------------------------------------

# https://blog.keras.io/
# how-convolutional-neural-networks-see-the-world.html

# http://ankivil.com/visualizing-deep-neural-networks-classes-and-features/

# -----------------------------------------------------------------------------
def deprocess_image(x, alter_dim=True):
    """
    Utility function to convert a tensor into a valid image
    """
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    if alter_dim and K.image_dim_ordering() == 'th':
        x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [9]:
import util
model = util.restore_model('../models/developmental/best_model_architecture.json', '../models/developmental/best_model_weights.h5')

Loading model from disk...
Done


In [4]:
def img_data_generator(file_paths, batch_size):
    """
    Data generator for the model.
    :param file_paths: List of paths for the images
    :param batch_size: Batch size to be used for prediction
    """
    while True:
        x_train = []
        for file_path in file_paths:
            img = misc.imread(file_path)
            x_train.append(img)
            if len(x_train) == batch_size:
                x_to_yield = np.array(x_train, dtype=np.float32)
                if K.image_dim_ordering() == "th":
                    x_to_yield = x_to_yield.transpose((0, 3, 1, 2))
                yield x_to_yield
                x_train = []
        if len(x_train) > 0:
            x_to_yield = np.array(x_train, dtype=np.float32)
            if K.image_dim_ordering() == "th":
                x_to_yield = x_to_yield.transpose((0, 3, 1, 2))
            yield x_to_yield
                

def generate_predictions(model, img_dir, out_filepath, batch_size=BATCH_SIZE):
    """
    Generate predictions for the model and save them to the specified path.
    :param model: The pretrained model object
    :param img_dir: The directory containing images that are to be fed to the model
    :param out_filepath: File path to write the predictions
    :param batch_size: Batch size to be used for generating predictions
    """
    file_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))]
    steps = int(len(file_paths) / batch_size)
    if len(file_paths) % batch_size > 0:
        steps += 1
    data_generator_obj = img_data_generator(file_paths, batch_size)

    print("Generating predictions...")
    predictions = model.predict_generator(data_generator_obj,
                                          val_samples=steps * batch_size,
                                          pickle_safe=True)
    
    pd_dict = dict()
    order = ['village_code']
    pd_dict['village_code'] = [os.path.split(f)[1].split('.')[0] for f in file_paths]
    for ii in range(len(predictions)):
        predictions[ii] = np.array(predictions[ii], dtype=np.float32)

        for idx in range(predictions[ii].shape[-1]):
            pd_dict[str(ii) + "_" + str(idx)] = np.transpose(predictions[ii])[idx]
            order.append(str(ii) + "_" + str(idx))
            
    compare = pd.DataFrame.from_dict(data=pd_dict)
    
    compare = compare[order]
    compare.to_csv(out_filepath, index=False)
    print("Done")  

    
def show_filter_responses(model, layer_index, input_img_path, save_dir=None, filter_index=None, dpi=100.0, save_original=False):
    """
    Show and save the filter responses for all or a selected filter at given layer.
    :param model: pre-trained model object
    :param layer_index: index of the layer
    :param input_img_path: path of the input image
    :param save_dir: path of directory to save the filters/original image. Filters are only displayed but not saved if this is None
    :param filter_index: index of the filter in the given layer. All filter responses are displayed if this is None
    :param dpi: DPI of the display
    """
    input_img = np.array(misc.imread(input_img_path), dtype=np.float32)
    if K.image_dim_ordering() == "th":
        input_img = input_img.transpose((2, 0, 1))
    
    layer = model.layers[layer_index]
    inputs = [K.learning_phase()] + model.inputs
    _layer_f = K.function(inputs, [layer.output])

    def layer_f(X):
        # The [0] is to disable the training phase flag
        return _layer_f([0] + [X])

    if K.image_dim_ordering() == "th":
        display_image = input_img.transpose((1, 2, 0))
    else:
        display_image = np.copy(input_img)
    display_image = util.deprocess_image(display_image, alter_dim=False)

    pl.figure(
        figsize=(display_image.shape[0] / dpi, display_image.shape[1] / dpi),
        dpi=dpi
    )
    util.nice_imshow(pl.gca(), display_image, cmap=cm.binary)

    images = np.array([input_img])
    c1 = layer_f(images)
    c1 = np.squeeze(c1)

    if K.image_dim_ordering() == "tf":
        c1 = c1.transpose((2, 0, 1))

    print("c1 shape : ", c1.shape)

    if filter_index is None:
        grid_dim = int(math.ceil(math.sqrt(c1.shape[0])))
        out_img = make_mosaic(c1, grid_dim, grid_dim)
    else:
        out_img = c1[filter_index]
    
    if save_dir is not None:
        prefix = "layer_" + str(layer_index)
        if filter_index is not None:
            prefix += "_filter_" + str(filter_index)
        if save_original:
            misc.imsave(os.path.join(save_dir, prefix + "_input.png"), display_image)
        misc.imsave(os.path.join(save_dir, prefix + "_output.png"), util.deprocess_image(out_img, alter_dim=False))

    pl.figure(
        figsize=(out_img.shape[0] / dpi, out_img.shape[1] / dpi),
        dpi=dpi
    )
    pl.suptitle(layer.name)
    util.nice_imshow(pl.gca(), out_img, cmap=cm.binary)

In [12]:
generate_predictions(model, '../images', '../data/predicted_developmental.csv')

Generating predictions...


Process Process-4:


TypeError: Error when checking : data should be a Numpy array, or list/dict of Numpy arrays. Found: None...

In [11]:
model_pd = util.restore_model('../models/income_poverty_pd/best_model_architecture.json', '../models/income_poverty_pd/best_model_weights.h5')
model_cd = util.restore_model('../models/income_poverty_cd/best_model_architecture.json', '../models/income_poverty_cd/best_model_weights.h5')

Loading model from disk...
Done
Loading model from disk...
Done


In [6]:
def generate_prediction_income(model, developmental_filepath, village_info_filepath, out_filepath, batch_size=BATCH_SIZE):
    """
    Generate predictions of income level for each sub-district and save the results at specified path.
    :param model: pre-trained 'Model' object
    :param developmental_filepath: path of file containing values of developmental parameters for each village
    :param village_info_filepath: path of file containing information (including 'subdistrict_code' and 'num_households') 
    for each village
    :param out_filepath: path of file to write the predictions
    :param batch_size: Batch size to be used for generating predictions
    """
    print("Reading developmental parameter values...")
    village_data_dict = {}
    data = pd.read_csv(developmental_filepath)
    header_list = list(data)[1:]
    for ii, row in data.iterrows():
        village_code = row['village_code']
        village_data_dict[village_code] = {}
        for header in header_list:
            param_idx, column_idx = int(header.split('_')[0]), int(header.split('_')[1])
            if param_idx not in village_data_dict[village_code]:
                village_data_dict[village_code][param_idx] = {}
            village_data_dict[village_code][param_idx][column_idx] = row[header]
    print("Done.")
    print(village_data_dict)

    print("Aggregating values at sub-district level...")
    subdistrict_data_dict = {}
    data = pd.read_csv(village_info_filepath)
    for ii, row in data.iterrows():
        village_code = row['village_code']
        subdistrict_code = row['subdistrict_code']
        num_households = row['num_households']
        if village_code in village_data_dict:
            if subdistrict_code not in subdistrict_data_dict:
                subdistrict_data_dict[subdistrict_code] = {
                    'num_households': 0,
                }
            village_dict = village_data_dict[village_code]
            subdistrict_data_dict[subdistrict_code]['num_households'] += num_households
            for param_idx in village_dict:
                if param_idx not in subdistrict_data_dict[subdistrict_code]:
                    subdistrict_data_dict[subdistrict_code][param_idx] = {}
                for column_idx in village_dict[param_idx]:
                    if column_idx not in subdistrict_data_dict[subdistrict_code][param_idx]:
                        subdistrict_data_dict[subdistrict_code][param_idx][column_idx] = 0
                    subdistrict_data_dict[subdistrict_code][param_idx][column_idx] += \
                        village_dict[param_idx][column_idx] * num_households

    data_matrix = []
    subdistrict_code_list = []
    for subdistrict_code in subdistrict_data_dict:
        data_row = []
        subdistrict_dict = subdistrict_data_dict[subdistrict_code]
        num_households = subdistrict_dict.pop('num_households')
        for param_idx in sorted(subdistrict_dict.keys()):
            for column_idx in sorted(subdistrict_dict[param_idx].keys()):
                data_row.append(subdistrict_dict[param_idx][column_idx] / num_households)
        data_matrix.append(data_row)
        subdistrict_code_list.append(subdistrict_code)

    data_matrix = np.array(data_matrix)
    print("Done.")

    print("Predicting income level values...")
    predictions = model.predict(data_matrix, batch_size=batch_size)
    print("Done.")

    print("Writing predictions to file...")
    param_idx = 0
    pd_dict = dict()
    order = ['subdistrict_code']
    pd_dict['subdistrict_code'] = subdistrict_code_list
    predictions = np.array(predictions, dtype=np.float32)
    for column_idx in range(predictions.shape[-1]):
        pd_dict[str(param_idx) + "_" + str(column_idx)] = np.transpose(predictions)[column_idx]
        order.append(str(param_idx) + "_" + str(column_idx))

    compare = pd.DataFrame(data=pd_dict)

    compare = compare[order]
    compare.to_csv(out_filepath, index=False)
    print("Done.")


def compare_income_predictions(original_filepath, predicted_filepath):
    """
    Compare actual and predicted income levels
    :param original_filepath: Path of the file containing actual income level values
    :param predicted_filepath:  Path of the file containing predicted income level values
    :return: 
    """
    original_subdistrict_dict = {}
    original_values = []
    predicted_values = []
    data_original = pd.read_csv(original_filepath)
    data_predicted = pd.read_csv(predicted_filepath)
    header_list = list(data_predicted)[1:]
    for ii, row in data_original.iterrows():
        original_subdistrict_dict[row['subdistrict_code']] = [row[header] for header in header_list]
    for ii, row in data_predicted.iterrows():
        predicted_values.append([row[header] for header in header_list])
        original_values.append(original_subdistrict_dict[row['subdistrict_code']])
    original_values = np.array(original_values)
    predicted_values = np.array(predicted_values)
    print(original_values, predicted_values)

    print("Correlation for each class:")
    print("[0] " + str(pearsonr(original_values[:, 0], predicted_values[:, 0])))
    print("[1] " + str(pearsonr(original_values[:, 1], predicted_values[:, 1])))
    print("[2] " + str(pearsonr(original_values[:, 2], predicted_values[:, 2])))

    print("\nPoverty prediction after thresholding on class [0]: ")
    t = 0.1
    while t < 1.0:
        p1m = np.copy(original_values[:, 0])
        p1m[p1m >= t] = 1
        p1m[p1m < t] = 0
        frac = np.sum(p1m) / len(p1m)
        ot = [1 if i >= t else 0 for i in original_values[:, 0]]
        pt = [1 if i >= t else 0 for i in predicted_values[:, 0]]
        print(
            "Threshold: " + str(t)
            + " Accuracy: " + str(accuracy_score(ot, pt))
            + " Baseline: " + str(max(frac, 1 - frac))
            + " Precision: " + str(precision_score(ot, pt))
            + " Recall: " + str(recall_score(ot, pt))
        )
        t += 0.1

In [9]:
generate_prediction_income(model_pd, '../data/predicted_developmental.csv', '../data/region_info.csv', '../data/pd_subdistrict_income.csv')
generate_prediction_income(model_cd, '../data/data_developmental.csv', '../data/region_info.csv', '../data/cd_subdistrict_income.csv')

Reading developmental parameter values...
Done.
{147351.0: {0: {0: 0.28278059999999994, 1: 0.011615037, 2: 0.13054937, 3: 0.011615037, 4: 0.049804337000000004, 5: 0.39766294, 6: 0.011615037, 7: 0.09274268, 8: 0.011615037}, 1: {0: 0.003231786, 1: 0.003231786, 2: 0.003231786, 3: 0.797096, 4: 0.18997684, 5: 0.003231786}, 2: {0: 0.0012439464, 1: 0.13586083, 2: 0.06828685, 3: 0.0012439464, 4: 0.76895934, 5: 0.0012439464, 6: 0.0012439464, 7: 0.0012439464, 8: 0.0012439464, 9: 0.01942938}}, 167240.0: {0: {0: 0.24246058, 1: 0.011961095, 2: 0.15415749, 3: 0.011961095, 4: 0.091117874, 5: 0.34361759999999997, 6: 0.011961095, 7: 0.12080202, 8: 0.011961095}, 1: {0: 0.00023692388, 1: 0.00023692388, 2: 0.00023692388, 3: 0.8125315, 4: 0.18652086, 5: 0.00023692388}, 2: {0: 0.0012364873, 1: 0.13572401, 2: 0.06850255, 3: 0.0012364873, 4: 0.76859534, 5: 0.0012364873, 6: 0.0012364873, 7: 0.0012364873, 8: 0.0012364873, 9: 0.019759275}}, 122907.0: {0: {0: 0.21550864, 1: 0.013756309, 2: 0.21235971, 3: 0.013756

Done.
{143688.0: {0: {0: 0.155, 1: 0.06, 2: 0.0, 3: 0.24100000000000002, 4: 0.0, 5: 0.5429999999999999, 6: 0.0, 7: 0.0, 8: 0.0}, 1: {0: 0.517, 1: 0.483, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0}, 2: {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0, 6: 0.026000000000000002, 7: 0.9740000000000001, 8: 0.0, 9: 0.0}}, 143703.0: {0: {0: 0.564, 1: 0.0, 2: 0.09, 3: 0.0, 4: 0.0, 5: 0.33299999999999996, 6: 0.006, 7: 0.006, 8: 0.0}, 1: {0: 0.436, 1: 0.5579999999999999, 2: 0.0, 3: 0.0, 4: 0.006, 5: 0.0}, 2: {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.276, 4: 0.22399999999999998, 5: 0.0, 6: 0.0, 7: 0.5, 8: 0.0, 9: 0.0}}, 122957.0: {0: {0: 0.006999999999999999, 1: 0.0, 2: 0.017, 3: 0.028999999999999998, 4: 0.934, 5: 0.01, 6: 0.002, 7: 0.0, 8: 0.0}, 1: {0: 0.433, 1: 0.5670000000000001, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0}, 2: {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.254, 4: 0.076, 5: 0.002, 6: 0.0, 7: 0.66, 8: 0.006999999999999999, 9: 0.0}}, 122905.0: {0: {0: 0.09, 1: 0.0, 2: 0.0, 3: 0.019, 4: 0.7170000000000001, 5: 0.172, 6: 0.0, 7: 0.0, 8

In [10]:
compare_income_predictions('../data/data_subdistrict_income.csv', '../data/pd_subdistrict_income.csv')  # For model trained on predicted data, model P.D.

[[0.6164 0.2845 0.0991]
 [0.6469 0.2514 0.1018]
 [0.7687 0.1462 0.0851]] [[0.36849174 0.5237405  0.10776771]
 [0.7322767  0.21580862 0.05191469]
 [0.7534018  0.18838634 0.05821174]]
Correlation for each class:
[0] (0.6909683011591856, 0.5143688083255451)
[1] (0.7369407973006125, 0.47253926280199154)
[2] (0.26608199524575404, 0.8285416106548102)

Poverty prediction after thresholding on class [0]: 
Threshold: 0.1 Accuracy: 1.0 Baseline: 1.0 Precision: 1.0 Recall: 1.0
Threshold: 0.2 Accuracy: 1.0 Baseline: 1.0 Precision: 1.0 Recall: 1.0
Threshold: 0.30000000000000004 Accuracy: 1.0 Baseline: 1.0 Precision: 1.0 Recall: 1.0
Threshold: 0.4 Accuracy: 0.6666666666666666 Baseline: 1.0 Precision: 1.0 Recall: 0.6666666666666666
Threshold: 0.5 Accuracy: 0.6666666666666666 Baseline: 1.0 Precision: 1.0 Recall: 0.6666666666666666
Threshold: 0.6 Accuracy: 0.6666666666666666 Baseline: 1.0 Precision: 1.0 Recall: 0.6666666666666666
Threshold: 0.7 Accuracy: 0.6666666666666666 Baseline: 0.6666666666666667 

In [17]:
# First 3 convolutional layer
first3_conv_layers = [1, 5, 8]
for i in first3_conv_layers:
    layer_index = i
    filter_index = None
    input_img_path = '../images/122913.png'
    save_dir = '../images_filter'
    show_filter_responses(model, layer_index, input_img_path, save_dir, filter_index)
"""
layer_index = 1
filter_index = None
input_img_path = '../images/122913.png'
save_dir = '../images_filter'
show_filter_responses(model, layer_index, input_img_path, save_dir, filter_index)
"""

c1 shape :  (8, 960, 960)
c1 shape :  (16, 480, 480)
c1 shape :  (16, 480, 480)


"\nlayer_index = 1\nfilter_index = None\ninput_img_path = '../images/122913.png'\nsave_dir = '../images_filter'\nshow_filter_responses(model, layer_index, input_img_path, save_dir, filter_index)\n"

In [19]:
# Roof type, source of light, source of water
rlw_conv_layers = [40, 41, 42]
for i in rlw_conv_layers:
    layer_index = i
    filter_index = None
    input_img_path = '../images/122913.png'
    save_dir = '../images_filter'
    show_filter_responses(model, layer_index, input_img_path, save_dir, filter_index)



c1 shape :  (64, 120, 120)
c1 shape :  (64, 120, 120)
c1 shape :  (64, 120, 120)
