In [1]:
from skimage.transform import resize
from functools import partial
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.utils.np_utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Dense, Input, Flatten
from keras.models import Model
from keras.optimizers import SGD
from keras.losses import categorical_crossentropy
from keras.utils.training_utils import multi_gpu_model

import keras.backend as K

import tensorflow as tf

Using TensorFlow backend.


In [2]:
def f1_micro(y_true, y_pred):
    tp = tf.reduce_sum(y_true * y_pred)
    prec = tp/tf.reduce_sum(y_pred)
    rec = tp/tf.reduce_sum(y_true)
    return 2*prec*rec/(prec + rec + 1e-10)

def simple_cnn(img_size, n_classes):
        
    inputs = Input(img_size)
    conv1 = Conv2D(kernel_size=5, filters=32)(inputs)
    pool1 = MaxPooling2D(pool_size=2, strides=2)(conv1)
    conv2 = Conv2D(kernel_size=5, filters=64)(pool1)
    pool2 = MaxPooling2D(pool_size=2, strides=2)(conv2)
    flat = Flatten()(pool2)
    dense = Dense(units=512, activation='relu')(flat)
    out = Dense(units=n_classes, activation='softmax')(dense)
    model = Model(inputs=inputs, outputs=out)
    
    return model
    
def get_model(config, multi_gpu=False):
    model = simple_cnn(config.data.img_size, config.data.n_classes)
    if multi_gpu:
        model = multi_gpu_model(model, 2)
    model.compile(optimizer=SGD(lr=config.train.learning_rate),
                  loss='categorical_crossentropy',
                  metrics=[f1_micro])
    return model

def datagen(files, labels, batch_size, n_classes,
            img_size, shuffle=True):
    pos = 0
    n_files = len(files)
    while True:
        if (pos == 0) and shuffle:
            files = np.random.permutation(files)
        slc = slice(pos, pos + batch_size)
        batch = list(map(plt.imread, files[slc]))
        batch = [resize_crop_or_pad(b, img_size[:2]) for b in batch]
        batch = np.array(batch)
        pos = min((pos + batch_size), n_files) % n_files
        yield (batch, to_categorical(labels[slc], config.data.n_classes))

def get_dataset(df_path, num=None, shuffle=False):
    df = pd.read_csv(df_path)
    if num is not None:
        df = df[df[config.data.client_column] == num]
    return (datagen(df.filename.values, 
                   df.label.values, 
                   config.data.batch_size,
                   config.data.n_classes,
                   config.data.img_size, shuffle=shuffle), 
int(np.ceil(len(df)/config.data.batch_size)), len(df))

        

In [3]:
from easydict import EasyDict
config = EasyDict(
{
 'data': {
     'train_df_path' : 'seedlings_train.csv',
     'val_df_path': 'seedlings_val.csv',
     'img_size': (200, 200, 3),
     'batch_size': 10,
     'n_classes': 12,
     'client_column': 'shard_non_iid',
     
 },
 'train' : {
     'learning_rate': 1e-3,
     'epochs': 5,
      'client_fraction':0.2,
     'num_clients': 10,
     'num_rounds': 10000
     
 },
    'log': {
        'path': './results/01-fed-avg-non_iid'
    },
    
#     'resume': {
#         'path': './results/03-fed-avg-non_iid'
#     }
}
)


In [4]:
def resize_crop_or_pad(img, size):
    ratio = np.floor_divide(img.shape[:2], size)
    stride = [slice(None, None, r if r > 1 else None) for r in ratio]
    img = img[tuple(stride)]
    
    if len(img.shape) == 2:
        img = img[..., None]
    if img.shape[-1] == 1:
        img = np.tile(img, [1, 1, 3])
    if img.shape[-1] == 4:
        img = img[...,:3]
    diff = np.subtract(img.shape[:2], size)
    orig_shape = img.shape
    for i, diff_i in enumerate(diff):
        if diff_i != 0:
            points = np.array([np.floor(diff_i/2), np.ceil(diff_i/2)]).astype('int')
            points = np.random.permutation(points)
            if diff_i > 0:
                slc = [slice(None, None), slice(None, None)]
                slc[i] = slice(points[0], img.shape[i]-points[1])
                img = img[tuple(slc)]
            if diff_i < 0:
                padding = [(0, 0), (0, 0), (0, 0)]
                padding[i] = tuple(-points)
                img = np.pad(img, pad_width=padding, mode='constant')
                
    assert np.all(np.equal(img.shape, [size[0], size[1], 3])), img.shape 
    return img
    

In [5]:
def client_update(config, num, model, weights):
    print(num)
    print(pd.DataFrame(pd.read_csv(config.data.train_df_path).query('{}=={}'.format(
        config.data.client_column, num)).label.value_counts()).T)
    model.set_weights(weights)
    dataset, steps_per_epoch, n_examples = get_dataset(config.data.train_df_path, num)
    history = model.fit_generator(dataset, 
                        steps_per_epoch=steps_per_epoch,
                        epochs=config.train.epochs, verbose=True)
    weights = model.get_weights()
    return (weights,
            n_examples,
            history.history['loss'][-1], 
            history.history['f1_micro'][-1])
    

In [6]:
def average_weights(weights, n_examples):
    weight_lists = map(list, zip(*weights))
    total_examples = np.sum(n_examples)
    return [np.sum(np.stack(w, axis=-1) * n_examples, axis=-1) / total_examples for w in weight_lists]

def fed_averaging(config):
    if not os.path.exists(config.log.path):
        os.makedirs(config.log.path)
    
    logpath = os.path.join(config.log.path, 'csvlogs')
    if not os.path.exists(logpath):
        os.makedirs(logpath)
    model = get_model(config, model, multi_gpu=True)
    client_model = get_model(config, model, multi_gpu=False)
    valid_data, valid_steps, _ = get_dataset(config.data.val_df_path, shuffle=False)
    valid_log = pd.DataFrame({'round': [], 
                        'loss': [],
                        'f1_micro': []})
    train_log = pd.DataFrame({'round': [], 
                    'loss': [],
                    'f1_micro': []})
    
    best_score = 0
    
    if 'resume' in config.keys():
        resume_ckpt = os.path.join(config.resume.path, 'ckpt')
        print('Resuming from {}'.format(resume_ckpt))
        model.load_weights(resume_ckpt)
        best_score = pd.read_csv(os.path.join(config.resume.path, 'csvlogs', 'valid')).f1_micro.max()
    print('Best valid f1 so far: {}'.format(best_score))
    for t in range(1, config.train.num_rounds + 1):
        print('Round {}'.format(t))
        print('-' * 10)
        print('Training')
        global_weights = model.get_weights()
        m = int(np.ceil(max(config.train.client_fraction * config.train.num_clients, 1)))
        clients = np.random.permutation(config.train.num_clients)[:m]
        local_results = []
        
        for client in clients:
            local_results.append(client_update(config, client, client_model, global_weights))
        
        
        local_weights, n_examples, _tloss, _tf1 = zip(*local_results)
        tloss = np.mean(_tloss)
        tf1 = np.mean(_tf1)
        model.set_weights(average_weights(local_weights, n_examples))
        print('train_loss {:.4f}, train_f1 {:.4f}'.format(tloss, tf1))
        print('Validation')
        vloss, vf1 = model.evaluate_generator(valid_data, valid_steps, verbose=True)
        
        valid_log = valid_log.append(pd.DataFrame({'round': [t], 
                                 'loss': vloss,
                                 'f1_micro': vf1}), ignore_index=True)
        train_log = train_log.append(pd.DataFrame({'round': [t], 
                         'loss': tloss,
                         'f1_micro': tf1}), ignore_index=True)
        
        if vf1 > best_score:
            model.save_weights(os.path.join(config.log.path, 'ckpt'))
            best_score = vf1
            
        valid_log[['round', 'loss', 'f1_micro']].to_csv(os.path.join(logpath, 'valid'), index=False)
        train_log[['round', 'loss', 'f1_micro']].to_csv(os.path.join(logpath, 'train'), index=False)
        
        print('val_loss {:.4f}, val_f1 {:.4f}'.format(vloss, vf1))
        print()
        print()

    pool.close()
    pool.join()
        

In [7]:
fed_averaging(config)

UnboundLocalError: local variable 'model' referenced before assignment