In [13]:
import os
import tensorflow as tf

from modules.run import load_config
from modules.run import Trainer, Metrics
from modules.data import DataManager
from modules.models import pretrained_cnn, pretrained_cnn_multichannel

from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, accuracy_score

## Testing imports

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [1]:
def run_experiment_from_config(config_file, country):

    config = load_config(config_file)
    data_manager = DataManager(config)

    class_weight = None
    train_generator = None
    val_generator = None

    if country == 'kenya':
        train_generator, val_generator, dataframe = data_manager.generate_kenya()
        class_weight = data_manager.class_weight("kenya")
    elif country == 'peru':
        train_generator, val_generator, dataframe = data_manager.generate_peru()
        class_weight = class_weight=[1.64, 1, 2]
    
    convnet = pretrained_cnn_multichannel(config, image_size=config["image_size"], n_channels=config["n_channels"])

    val_steps = config["sample"]["size"] * (config["validation_split"]) // config["batch_size"] + 1

    labels = None
    if config['mask'] is not None:
        epochs = 0
        labels = []
        for data, label in val_generator:
            if epochs >= val_steps:
                break
            labels.extend(np.argmax(label, axis=1))
            epochs += 1
        labels = np.array(labels)
    trainer = Trainer(config)
    metrics_callback = Metrics(val_generator, trainer.tensorboard_dir, labels, val_steps)
    trainer.callbacks.append(metrics_callback)

    convnet.compile(loss=trainer.loss, optimizer=trainer.optimizer, metrics=config["weighted_metrics"])

    convnet.fit_generator(
        train_generator, 
        config["sample"]["size"] * (1 - config["validation_split"]) // config["batch_size"] + 1,
        epochs=config["n_epochs"],
        callbacks=trainer.callbacks,
        validation_data=val_generator, 
        validation_steps=val_steps,
        class_weight=class_weight,
        use_multiprocessing=True
    )

In [52]:
run_experiment_from_config("cls_final_xception_kenya_rgb", "kenya")
run_experiment_from_config("cls_final_xception_peru_rgb", "peru")
run_experiment_from_config("cls_final_xception_kenya_masked", "kenya")
run_experiment_from_config("cls_final_xception_kenya_masked-inverted", "kenya")
run_experiment_from_config("cls_final_xception_kenya_two_with_mask", "kenya")

Declouded dataframe length: 152505


In [2]:
def setup_cross_domain_from_config(config_file, country):

    config = load_config(config_file)
    data_manager = DataManager(config)

    class_weight = None
    train_generator = None
    val_generator = None

    if country == 'kenya':
        train_generator, val_generator, dataframe = data_manager.generate_kenya()
        class_weight = data_manager.class_weight("kenya")
    elif country == 'peru':
        train_generator, val_generator, dataframe = data_manager.generate_peru()
        class_weight = class_weight=[1.64, 1, 2]
    
    convnet = pretrained_cnn_multichannel(config, image_size=config["image_size"], n_channels=config["n_channels"])
    return convnet, val_generator

In [23]:
model, val_gen = setup_cross_domain_from_config("cls_final_xception_peru_balanced", "peru")
model.load_weights("./data/cls_final_xception_kenya_rgb/checkpoints/weights.02-0.46.hdf5")
val_predict = np.argmax(model.predict(val_gen), axis=1)
print(accuracy_score(val_gen.classes, val_predict))

Declouded dataframe length: 97281
Found 10800 validated image filenames belonging to 3 classes.
Found 1200 validated image filenames belonging to 3 classes.
0.455


In [21]:
model, val_gen = setup_cross_domain_from_config("cls_final_xception_kenya_balanced", "kenya")
model.load_weights("./data/cls_final_xception_peru_rgb/checkpoints/weights.02-0.71.hdf5")
val_predict = np.argmax(model.predict(val_gen), axis=1)
print(accuracy_score(val_gen.classes, val_predict))

Declouded dataframe length: 152505
Found 10800 validated image filenames belonging to 3 classes.
Found 1200 validated image filenames belonging to 3 classes.
0.5975


In [22]:
np.unique(val_gen.classes, return_counts=True)

(array([0, 1, 2]), array([391, 423, 386]))

In [54]:
# pred = np.argmax(val_predict, axis=1)
val_steps = 10
labels = None
epochs = 0
labels = []
pred = []
for data, label in val:
    if epochs >= val_steps:
        break
    labels.extend(np.argmax(label, axis=1))
    pred.extend(np.argmax(model.predict(data), axis=1))
    epochs += 1
labels = np.array(labels)
pred = np.array(pred)

Found 15000 validated image filenames belonging to 3 classes.
Found 15000 validated image filenames belonging to 3 classes.


In [55]:
print(pred)
print(labels)
print(accuracy_score(labels, pred))
print(f1_score(labels, pred, average='macro'))

[1 1 2 1 1 1 1 1 1 1 2 1 1 0 1 1 1 0 0 1 2 1 2 2 2 2 1 1 1 1 1 1 2 1 1 1 2
 2 1 1 1 1 1 1 1 2 2 2 2 1 2 1 1 1 1 2 1 2 1 1 1 2 2 1 1 1 2 2 1 1 2 2 1 1
 1 0 1 1 1 2 2 1 1 1 2 1 1 0 1 1 1 1 1 1 2 2 1 1 2 2 2 1 1 1 2 2 1 2 1 1 1
 1 1 1 1 1 1 1 2 1 1 1 1 2 1 2 1 1 2 1 2 2 2 1 1 2 1 2 1 2 1 1 2 1 2 0 1 1
 2 2 1 2 2 1 1 1 1 1 2 1 1 2 2 1 1 2 2 2 2 0 1 2 1 2 1 1 1 2 1 1 1 1 1 1 2
 1 1 1 2 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 2 1 2 2 1 1 1 1 2 1 1 2 2 1 1 2 1 1 2
 1 1 2 2 2 1 2 1 1 1 1 1 2 1 1 1 2 2 1 1 2 1 1 2 1 1 1 1 2 1 2 2 1 1 1 1 1
 0 1 2 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 2 1 1 1 0 1]
[1 1 1 1 1 1 1 1 2 1 2 2 1 1 1 1 2 0 0 1 2 1 2 2 1 2 1 1 1 2 1 1 2 1 1 1 2
 1 1 2 1 1 1 1 1 2 1 2 2 1 2 1 1 2 1 1 1 2 1 1 1 2 2 1 1 1 2 2 1 1 2 2 1 1
 1 0 1 1 1 2 2 1 1 1 2 1 1 0 1 1 1 1 1 1 1 2 1 1 2 1 2 1 2 1 2 1 1 2 1 1 1
 1 1 1 1 1 1 2 2 1 1 1 1 2 1 1 2 1 2 1 2 2 1 1 1 2 1 2 1 1 1 1 2 1 2 1 1 1
 2 2 1 2 1 1 2 2 2 1 2 1 1 1 2 1 1 2 2 2 1 0 1 1 1

In [32]:
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, accuracy_score
print(labels)
print(np.argmax(val_predict, axis=1))
_val_f1 = f1_score(labels, np.argmax(val_predict, axis=1), average='macro')
print(_val_f1)
print(accuracy_score(labels, np.argmax(val_predict, axis=1)))

[2 2 1 2 2 2 1 1 1 2 1 2 1 2 1 1 1 2 2 2 2 2 2 1 1 0 2 1 1 1 1 1 2 2 1 1 1
 1 1 2 1 2 2 2 2 1 2 1 1 1 1 2 1 2 1 2 1 2 1 1 1 2 1 2]
[1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 0 1 1 2 1 2 1 2 2 1 1 1 1 1 1 2 1 1 1 1
 1 1 0 1 1 1 1 1 2 2 2 2 1 2 1 1 1 1 1 1 2 1 1 1 2 2 1]
0.3282828282828283
0.53125
