In [1]:
import json
import utils
import data_manager as dm
import matplotlib.pyplot as plt

import tensorflow as tf
import numpy as np

from imp import reload

<h1><center>Cross-Validation</center></h1>

Notebook de validação cruzada da tarefa de qualidade de imagem sismográfica

### Load dataset

Carregamos as informações do dataset, já particionadas em treino-validação-teste

In [2]:
load_from = 'data/result_path.json'

with open(load_from, 'r') as f:
    data_desc = json.load(f)

Pré-carregamos o dataset em memória

In [3]:
all_indices = np.arange(len(data_desc['train'] + data_desc['dev'] + data_desc['test']))
all_images = dm.load_batch(data_desc, all_indices,partition=all_indices,normalize=True)

In [4]:
X,Y = all_images

Carregamos uma imagem para definir as dimensões da entrada

In [5]:
sample = X[0]
img_w = sample.shape[0]
img_h = sample.shape[1]
n_channels = 1
n_classes = len(data_desc['id_to_class'])

### GPU Limit

Limitamos o uso de memória do GPU à 30% da capacidade máxima e uma única gpu

In [6]:
default_config = tf.ConfigProto(device_count={'GPU':1})
default_config.gpu_options.per_process_gpu_memory_fraction=0.3

### Model

Segue o modelo que usamos para a tarefa

In [7]:
import models
import models.busson_model as my_model

In [8]:
reload(my_model)
reload(models.architecture_manager)

<module 'models.architecture_manager' from '/home/rafael/git/learngeo/classificacao_sismo/evaluate/models/architecture_manager.py'>

We get information from the model and define the placeholders

In [9]:
model_name = my_model.model_name

tf.reset_default_graph()

with tf.name_scope('Input'):
    # image input
    x = tf.placeholder(tf.float32, shape=(None, img_h, img_w, n_channels), name='X')
    # integer class output
    y = tf.placeholder(tf.int64, shape=(None,), name='Y')
    # input learning rate
    lr_placeholder = tf.placeholder(tf.float32)
    
# get model from input placeholder variable and number of classes
output_logits, pred, model_description = my_model.make_model(x, n_classes, alpha=4)
# get loss tensor, accuracy tensor, and optimizer function
loss, accuracy, optimizer = my_model.make_model_loss(y, lr_placeholder, output_logits)

In [10]:
print(model_name)
print('----')
print(model_description)

busson_model
----
inception network architecture


In [11]:
def compute_class_f1(CM):
    e = 1e-8
    TP = np.diag(CM)
    P = np.sum(CM,axis=1)
    C = np.sum(CM,axis=0)
    prec = TP/(P+e)
    recall = TP/(C+e)
    f1 = 2*prec*recall/(prec+recall+e)
    return f1

### Training

Função de treino, recebe um conjunto de índices de treino, validação e outro de teste. 
Essa função ajusta o modelo com as imagens de treino por um número máximo de épocas e mede o desempenho na validação ao fim de cada época. Se não houver melhora por 'early_stop_epochs' épocas, o treino para e o teste é avaliado com o melhor modelo de acordo com a validação.

Retorna a acurácia no treino e teste, além da matriz de confusão no teste e <i>Recall</i> da última classe

In [12]:
def train_schedule(
    x,y,lr_placeholder,
    train_indices, dev_indices,test_indices,
    lr=0.001,
    early_stop_epochs=40,
    max_epochs=40,
    batch_size=64,
    display_freq=100,
    normalization=True):
    
    """
    Train a model with input 'x' output 'y' and learning rate 'lr_placholder' for up to 'max_epochs'.
    if accuracy on 'dev_indices' does not improve for 'early_stop' epochs, it halts training and evaluates on
    'test_indices' with best_current_model
    """
    
    if len(dev_indices) == 0:
        print('no early stopping, save last model at epoch ', max_epochs)
    #Hyper Parameters
    logs_path = "./logs"  # path to the folder that we want to save the logs for Tensorboard
    checkpoint_path = 'checkpoints/'
    
    train_indices= np.array(train_indices)
    dev_indices = np.array(dev_indices)
    
    saver = tf.train.Saver()
    
    sess = tf.Session(config=default_config)
    sess.run(tf.global_variables_initializer())
    
    
    merged = tf.summary.merge_all()
    # Number of training iterations in each epoch

    indices = np.arange(len(train_indices))
    best_dev_acc = 0

    death_counter = 0
    for epoch in range(max_epochs):
        print('Training epoch: {}'.format(epoch + 1))
        lr *= 0.999
        np.random.shuffle(indices)
        for iteration in range(0,len(indices),batch_size):
            idx = indices[iteration:min(iteration+batch_size,len(indices))]
            x_batch = X[train_indices[idx]]
            y_batch = Y[train_indices[idx]]

            # Run optimization op (backprop)
            feed_dict_batch = {x: x_batch, y: y_batch, lr_placeholder:lr}
            sess.run(optimizer, feed_dict=feed_dict_batch)
            
        #train accuracy
#         mean_train_acc = 0
#         train_count = 0

#         for iteration in range(0,len(train_indices),batch_size):
#             idx = range(iteration,min(len(train_indices),iteration+batch_size))
#             x_batch = X[train_indices[idx]]
#             y_batch = Y[train_indices[idx]]
#             feed_dict_batch = {x: x_batch, y: y_batch}
#             loss_train, acc_train = sess.run([loss, accuracy], feed_dict=feed_dict_batch)
#             n = len(y_batch)
#             train_count += n
#             mean_train_acc += acc_train*n
#         mean_train_acc /= train_count
#         print('current train acc: ', mean_train_acc)
        
        #dev accuracy
        mean_valid_acc = 0
        valid_count = 0
        if len(dev_indices) > 0:
            for iteration in range(0,len(dev_indices),batch_size):
                idx = range(iteration,min(len(dev_indices),iteration+batch_size))
                x_batch = X[dev_indices[idx]]
                y_batch = Y[dev_indices[idx]]
                feed_dict_batch = {x: x_batch, y: y_batch}
                loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_batch)
                n = len(y_batch)
                valid_count += n
                mean_valid_acc += acc_valid*n
            mean_valid_acc /= valid_count
            print('current valid acc: ', mean_valid_acc)
            if mean_valid_acc > best_dev_acc:
                best_dev_acc = mean_valid_acc
                saver.save(sess, checkpoint_path + model_name)
                death_counter = 0
            else:
                death_counter += 1
        elif epoch == max_epochs-1:
            print('Save last model')
            saver.save(sess, checkpoint_path + model_name)

        if len(dev_indices) > 0 and death_counter >= early_stop_epochs:
            break

    saver.restore(sess, checkpoint_path + model_name)
        
    #train accuracy
    mean_train_acc = 0
    train_count = 0

    for iteration in range(0,len(train_indices),batch_size):
        idx = range(iteration,min(len(train_indices),iteration+batch_size))
        x_batch = X[train_indices[idx]]
        y_batch = Y[train_indices[idx]]
        feed_dict_batch = {x: x_batch, y: y_batch}
        loss_train, acc_train = sess.run([loss, accuracy], feed_dict=feed_dict_batch)
        n = len(y_batch)
        train_count += n
        mean_train_acc += acc_train*n
    mean_train_acc /= train_count

    #test accuracy
    mean_valid_acc = 0
    mean_valid_loss = 0 
    valid_count = 0

    dev_pred = []
    dev_target = []
    CM = np.zeros((3, 3))
    
    for iteration in range(0,len(test_indices),batch_size):
        idx = range(iteration,min(iteration+batch_size,len(test_indices)))
        x_batch = X[test_indices[idx]]
        y_batch = Y[test_indices[idx]]
        feed_dict_batch = {x: x_batch, y: y_batch}
        valid_pred = sess.run(pred, feed_dict=feed_dict_batch)
        
        dev_pred.extend(valid_pred)
        dev_target.extend(y_batch)
        valid_count += len(idx)
        
    c = 0
    for j in range(len(dev_pred)):
        if dev_pred[j] == dev_target[j]:
            c += 1
        CM[dev_target[j],dev_pred[j]] += 1
    
    class2_recall = CM[1][1]/(np.sum(CM[1])+1e-8)
    print(CM)
    mean_valid_acc = c/len(test_indices)
    cf1 = compute_class_f1(CM)

    print('---------------------------------------------------------')
    print('Training accuracy: {:.01%}'.format(mean_train_acc))
    print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
          format(epoch + 1, mean_valid_loss, mean_valid_acc))
    print('F1 score per class (good,bad,ugly): ', cf1)
    print('Mean F1 score: ', np.mean(cf1))
    print('---------------------------------------------------------')

    return mean_train_acc, mean_valid_acc, CM, class2_recall

### Cross-Validation

A seguir configuramos o treino em validação cruzada. Realizamos o experimento apenas na união dos conjuntos de treino, validação e teste do dataset original e particionando em 10 folds.

In [13]:
shared_indices = all_indices.copy()
np.random.shuffle(shared_indices)
n = len(shared_indices)
n_folds = 10
fold_size = n//n_folds

folds = []

for i in range(0,n,fold_size):
    folds.append(np.array(list(shared_indices[i:min(i+fold_size,n)])))

Realizamos o procedimo de validação cruzada, guardando o desempenho médio e desvio no treino e validação. Assim como cobertura da classe 2 e tempo do procedimento.

In [14]:
X.shape

(6610, 299, 299, 1)

In [15]:
import time

epochs = 40
early_stop_epochs=0
batch_size = 64
# batch_size = 16
# learning_rate = (0.001*batch_size/64)
learning_rate = 0.0001
print(learning_rate)

start = time.time()
td_results = []
findices = list(np.arange(n_folds))
for i in range(n_folds):
    print('Fold ', (i+1), '/',n_folds)
    fold_start = time.time()
    findices = utils.rotate_list(findices,1)
    train_folds = np.concatenate(np.array(folds)[findices[1:]])
#     dev_fold = folds[findices[1]]
    dev_fold = []
    test_fold = folds[findices[0]]
    
    td_results.append(train_schedule(
        x,y,lr_placeholder,
        train_folds, dev_fold,test_fold,
        max_epochs=epochs,
        early_stop_epochs=early_stop_epochs,
        normalization=True,batch_size=batch_size,lr=learning_rate))
    
    fold_end = time.time()
    fold_time = fold_end-fold_start
    print('fold processing time: ', fold_time, ' s')
    end = time.time()
    elapsed = end-start
    print('accumulated time: ', elapsed, ' s')

0.0001
Fold  1 / 10
no early stopping, save last model at epoch  40
Training epoch: 1
Training epoch: 2
Training epoch: 3
Training epoch: 4
Training epoch: 5
Training epoch: 6
Training epoch: 7
Training epoch: 8
Training epoch: 9
Training epoch: 10
Training epoch: 11
Training epoch: 12
Training epoch: 13
Training epoch: 14
Training epoch: 15
Training epoch: 16
Training epoch: 17
Training epoch: 18
Training epoch: 19
Training epoch: 20
Training epoch: 21
Training epoch: 22


KeyboardInterrupt: 

Separamos os resultados em listas

In [None]:
def compute_class_f1(CM):
    e = 1e-8
    TP = np.diag(CM)
    P = np.sum(CM,axis=1)
    C = np.sum(CM,axis=0)
    prec = TP/(P+e)
    recall = TP/(C+e)
    f1 = 2*prec*recall/(prec+recall+e)
    return f1

In [None]:
train_acc = [x[0] for x in td_results]
dev_acc = [x[1] for x in td_results]
c2_rec = [x[3] for x in td_results]
dev_cm = [x[2] for x in td_results]
class_f1 = [compute_class_f1(x) for x in dev_cm]
mean_f1 = [np.mean(x) for x in class_f1]
ugly_f1 = [x[2] for x in class_f1]

Computamos a média e desvio de cada valor de interesse, normalizada pela raiz do número de folds.
Ou seja, o erro padrão da estimativa

[https://en.wikipedia.org/wiki/Standard_error]

In [None]:
t_mean_acc = np.mean(train_acc)
factor = np.sqrt(n_folds)
t_dp_acc = np.std(train_acc)/factor

d_mean_acc = np.mean(dev_acc)
d_dp_acc = np.std(dev_acc)/factor

c2_rec_mean = np.mean(c2_rec)
c2_rec_dp = np.std(c2_rec)/factor

d_mean_f1 = np.mean(mean_f1)
d_dp_f1 = np.std(mean_f1)/factor

d_mean_uf1 = np.mean(ugly_f1)
d_dp_uf1 = np.std(ugly_f1)/factor

elapsed_time = elapsed

Seguem os resultados

In [None]:
print('Train Accuracy: ', t_mean_acc, ' +/- ', t_dp_acc)
print('Dev accuracy: ', d_mean_acc, ' +/- ', d_dp_acc)
print('Ugly recall: ', c2_rec_mean, ' +/- ', c2_rec_dp)
print('Mean F1-score', d_mean_f1, ' +/- ', d_dp_f1)
print('Ugly F1-score', d_mean_uf1, '+/-', d_dp_uf1)

Também salvamos a configuração experimental

In [None]:
experiment_template = """
{} fold cross-validation
initial learning rate = {}
Adam optimizer with decaying learning rate
batch_size = {}
max_epochs_per_fold = {}
early_stop_epochs = {}
"""

experiment_description = experiment_template.format(
    n_folds,
    learning_rate,
    batch_size,
    epochs,
    early_stop_epochs
)

In [None]:
print(experiment_description)

Salvamos os reultados em um arquivo descrevendo o experimento

In [None]:
dict_result = {
    'model_name':model_name,
    'model_description':model_description,
    'experiment_description':experiment_description,
    'train_mean_acc':t_mean_acc,
    'train_dp_acc':t_dp_acc,
    'dev_mean_acc':d_mean_acc,
    'dev_dp_acc':d_dp_acc,
    'ugly_mean_recall':c2_rec_mean,
    'ugly_dp_recall':c2_rec_dp,
    'dev_mean_f1':d_mean_f1,
    'dev_dp_f1':d_dp_f1,
    'dev_mean_ugly_f1':d_mean_uf1,
    'dev_dp_ugly_f1':d_dp_uf1,
    'train_acc_list':train_acc,
    'dev_acc_list':dev_acc,
    'ugly_recall_list':c2_rec,
    'elapsed_time':elapsed_time
}

In [None]:
dict_result

In [None]:
import time

model_stamp = model_name + '_' + str(time.time())
result_path = 'results/' + model_stamp + '.json'
with open(result_path, 'w') as f:
    json.dump(dict_result, f)

In [None]:
print('saved at: ', result_path)