In [1]:
import tensorflow as tf
import numpy as np
from itertools import cycle
import cv2
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc, cohen_kappa_score, confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
from sklearn.metrics import roc_auc_score


import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D, Flatten, Dropout, InputLayer
from tensorflow.keras import optimizers, models
from keras.applications.resnet50 import ResNet50
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.models import Model
from keras.models import Sequential
from keras import optimizers
from keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator



import torch 
from torchvision import transforms



In [3]:
def process(X, y):
    y = [item.split(' ')[0] for item in y]
    n = len(X)
    y_bis = []
    resized_patch = []
    for i in range(n):
        if X[i].shape[0] > 0 and X[i].shape[1] > 0:
            img = cv2.resize(np.array(X[i], dtype = np.float32), (224, 224))
            resized_patch.append(img)
            y_bis.append(rev_subs.get(y[i],y[i]))
    X = np.array(resized_patch, dtype = np.float32)
    y = np.array(y_bis, dtype=np.int32)
    return(X, y)

In [3]:
def train_model():
    base_model = MobileNetV2(include_top = False, input_shape=(224, 224, 3))
    #base_model = MobileNetV2(include_top = False, weights = None,  input_shape=(224, 224, 3))
    base_model.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation = tf.nn.relu)(x)
    x = Dense(128, activation = tf.nn.relu)(x)
    preds = Dense(63, activation = tf.nn.softmax)(x)

    model= Model(inputs=base_model.input,outputs=preds)

    #for i, layer in enumerate(model.layers):
        #print(i, layer.name)

    #for layer in model.layers[-20:]:
        #layer.trainable = False

    epochs = 10
    learning_rate = 0.1
    decay_rate = learning_rate / epochs
    momentum = 0.8
    #opt = tf.keras.optimizers.SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
    opt = tf.keras.optimizers.SGD(lr=0.1)
    
    model.compile(loss='categorical_crossentropy',
                optimizer=opt,
                metrics=['accuracy'])

    return(model)

In [4]:
def train_model_basic():

    IMG_SIZE = 224

    model = Sequential()
    model.add(Conv2D(64, kernel_size=6, activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization(momentum=0.01))

    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization(momentum=0.01))

    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization(momentum=0.01))

    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization(momentum=0.01))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(63, activation='softmax'))

    epochs = 5
    learning_rate = 0.01
    decay_rate = learning_rate / epochs
    momentum = 0.8
    opt = tf.keras.optimizers.SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)

    model.compile(loss='categorical_crossentropy',
                optimizer=opt,
                metrics=['accuracy'])

    return(model)
    

## Testing Set

5 folds from the true matching.

In [5]:
list_species = np.load('data/list_species_reduced.npy', allow_pickle=True)
print(len(list_species))
dico_species = {i:list_species[i] for i in range(len(list_species))}
rev_subs = { v:k for k,v in dico_species.items()}
print(rev_subs)

63
{'Abies': 0, 'Acer': 1, 'Aesculus': 2, 'Amelanchier': 3, 'Arctostaphylos': 4, 'Asimina': 5, 'Bejaria': 6, 'Betula': 7, 'Calocedrus': 8, 'Carpinus': 9, 'Carya': 10, 'Castanea': 11, 'Ceanothus': 12, 'Celtis': 13, 'Cephalanthus': 14, 'Cercis': 15, 'Cornus': 16, 'Corylus': 17, 'Crataegus': 18, 'Diospyros': 19, 'Fagus': 20, 'Frangula': 21, 'Fraxinus': 22, 'Gaultheria': 23, 'Hamamelis': 24, 'Ilex': 25, 'Juglans': 26, 'Juniperus': 27, 'Liquidambar': 28, 'Liriodendron': 29, 'Lupinus': 30, 'Lyonia': 31, 'Maclura': 32, 'Magnolia': 33, 'Morella': 34, 'Morus': 35, 'Nyssa': 36, 'Ostrya': 37, 'Persea': 38, 'Picea': 39, 'Pinaceae': 40, 'Pinus': 41, 'Platanus': 42, 'Populus': 43, 'Prunus': 44, 'Pseudotsuga': 45, 'Pyrus': 46, 'Quercus': 47, 'Rhamnus': 48, 'Rhus': 49, 'Robinia': 50, 'Salix': 51, 'Sassafras': 52, 'Serenoa': 53, 'Taxus': 54, 'Thuja': 55, 'Tilia': 56, 'Triadica': 57, 'Tsuga': 58, 'Ulmus': 59, 'Unknown': 60, 'Vaccinium': 61, 'Vitis': 62}


## Training Steps

In [6]:
def train_on_matching(method, sigma, model_choice):

        PATCHES_TEST = []
        LABELS_TEST = []
        for i in range(1,6):
                XX = np.load('cnn/test/patches_test_{}.npy'.format(i), allow_pickle=True)
                yy = np.load('cnn/test/labels_test_{}.npy'.format(i))

                XX, yy = process(XX, yy)
                PATCHES_TEST.append(XX)
                LABELS_TEST.append(yy)
    
        dir = 'cnn/train'
        path_patches = os.path.join(dir, 'patches_{}_sigma_{}.npy'.format(method, sigma))
        path_labels = os.path.join(dir, 'labels_{}_sigma_{}.npy'.format(method, sigma))
        patches = np.load(path_patches, allow_pickle=True)
        labels = np.load(path_labels, allow_pickle=True)

        
        if model_choice == 'basic':

                X, y = process(patches, labels)
                X_train, X_val, y_train, y_val = train_test_split(X, to_categorical(y), test_size = 0.15, random_state = 2)
        
                train_species = ImageDataGenerator(rescale = 1/255)
                validation_species = ImageDataGenerator(rescale = 1/255)

                # Flow training images in batches of 120 using train_datagen generator
                train_generator = train_species.flow( X_train, y_train, # All images will be resized to 200x200
                        batch_size=64)

                # Flow validation images in batches of 19 using valid_datagen generator
                validation_generator = validation_species.flow(X_val, y_val,  # All images will be resized to 200x200
                        batch_size=64,
                        shuffle=False)

                model = train_model_basic()

        if model_choice == 'mobilenetv2':
        
                X, y = process(patches, labels)
                X_train, X_val, y_train, y_val = train_test_split(X, to_categorical(y).astype(int), test_size = 0.15, random_state = 2)
                train_species = ImageDataGenerator(preprocessing_function=preprocess_input)
                validation_species = ImageDataGenerator(preprocessing_function=preprocess_input)
                # Flow training images in batches of 120 using train_datagen generator
                train_generator = train_species.flow( X_train, y_train, # All images will be resized to 200x200
                        batch_size=64)

                # Flow validation images in batches of 19 using valid_datagen generator
                validation_generator = validation_species.flow(X_val, y_val,  # All images will be resized to 200x200
                        batch_size=64,
                        shuffle=False)

                model = train_model()


        history = model.fit(train_generator,  
        epochs=5,
        verbose=1,
        validation_data = validation_generator) 
        #use_multiprocessing=True, workers=8)

        acc_tests = []
        for i in range(len(LABELS_TEST)):
                X_test = PATCHES_TEST[i]
                y_test = LABELS_TEST[i]

                if model_choice == 'basic':
                        X_test = X_test/255
                if model_choice == 'mobilenetv2':
                        X_test = preprocess_input(X_test)

                # predict probabilities for test set
                yhat_probs = model.predict(X_test, verbose=0)
                # predict crisp classes for test set
                yhat_classes = np.argmax(yhat_probs,axis=1)
                
                #print('probs: ', yhat_probs[:3])
                print('classes: ', yhat_classes[:3])
                print('y_test: ', y_test[:3])


                # accuracy: (tp + tn) / (p + n)
                accuracy = accuracy_score(y_test, yhat_classes)
                print('Accuracy: %f' % accuracy)
                acc_tests.append(accuracy)
        return(acc_tests)

In [7]:
methods = ['NN', 'GMN', 'OT non greedy', 'OT greedy', 'GW']


sigmas = np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0])


for i in range(len(methods)):
    for k in range(len(sigmas)):
        method, sigma = methods[i], sigmas[k]
        print(method, sigma)
        
        acc_tests = train_on_matching(methods[i], sigmas[k], model_choice = 'mobilenetv2')

        df_sup = pd.DataFrame()
        df_sup['sigma'] = [sigmas[k]]*5
        df_sup['accuracy'] = acc_tests
        df_sup['method'] = [methods[i]]*5
        df_sup['test'] = [1, 2, 3, 4, 5]
        df_sup.to_csv('cnn/results_mobilenetv2/res_{}_sigma_{}_mobilenetv2.csv'.format(methods[i], sigmas[k]))
        print(df_sup)


OT non greedy 0.0


KeyboardInterrupt: 

In [8]:
methods = ['NN', 'GMN', 'OT non greedy', 'OT greedy', 'GW']

sigmas = np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5])


for i in range(len(methods)):
    for k in range(len(sigmas)):
        method, sigma = methods[i], sigmas[k]
        print(method, sigma)
        
        acc_tests = train_on_matching(methods[i], sigmas[k], model_choice = 'basic')

        df_sup = pd.DataFrame()
        df_sup['sigma'] = [sigmas[k]]*5
        df_sup['accuracy'] = acc_tests
        df_sup['method'] = [methods[i]]*5
        df_sup['test'] = [1, 2, 3, 4, 5]
        df_sup.to_csv('cnn/results_basic/res_{}_sigma_{}_basic.csv'.format(methods[i], sigmas[k]))
        print(df_sup)

GW 0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
classes:  [41  3 39]
y_test:  [41  3 39]
Accuracy: 0.801550
classes:  [22 45 58]
y_test:  [41 45 58]
Accuracy: 0.799697
classes:  [47  8 47]
y_test:  [47 41 47]
Accuracy: 0.811012
classes:  [ 0  0 12]
y_test:  [ 0  0 12]
Accuracy: 0.805766
classes:  [ 0 41 45]
y_test:  [ 0 41  0]
Accuracy: 0.787365
   sigma  accuracy method  test
0    0.0  0.801550     GW     1
1    0.0  0.799697     GW     2
2    0.0  0.811012     GW     3
3    0.0  0.805766     GW     4
4    0.0  0.787365     GW     5
GW 0.5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
classes:  [41  3 39]
y_test:  [41  3 39]
Accuracy: 0.804651
classes:  [41 45 58]
y_test:  [41 45 58]
Accuracy: 0.790592
classes:  [47  8 47]
y_test:  [47 41 47]
Accuracy: 0.811012
classes:  [ 0  0 12]
y_test:  [ 0  0 12]
Accuracy: 0.801214
classes:  [39 41  0]
y_test:  [ 0 41  0]
Accuracy: 0.802773
   sigma  accuracy method  test
0    0.5  0.804651     GW     1
1    0.5  0.790592     GW     



In [9]:
res = pd.DataFrame(columns=['sigma', 'accuracy', 'method', 'test'])

for file in os.listdir('cnn/results_basic'):
    print(file)
    df_sup = pd.read_csv(os.path.join('cnn/results_basic',file), index_col = 0)
    res = pd.concat([res, df_sup], ignore_index=True)


print(res)
res.to_csv('cnn/results_basic/classification_results_basic.csv')

                

res_GMN_sigma_0.0_basic.csv
res_GMN_sigma_0.5_basic.csv
res_GMN_sigma_1.0_basic.csv
res_GMN_sigma_1.5_basic.csv
res_GMN_sigma_2.0_basic.csv
res_GMN_sigma_2.5_basic.csv
res_GMN_sigma_3.0_basic.csv
res_GMN_sigma_3.5_basic.csv
res_GMN_sigma_4.0_basic.csv
res_GW_sigma_0.0_basic.csv
res_GW_sigma_0.5_basic.csv
res_GW_sigma_1.0_basic.csv
res_GW_sigma_1.5_basic.csv
res_GW_sigma_2.0_basic.csv
res_GW_sigma_2.5_basic.csv
res_GW_sigma_3.0_basic.csv
res_GW_sigma_3.5_basic.csv
res_GW_sigma_4.0_basic.csv
res_NN_sigma_0.0_basic.csv
res_NN_sigma_0.5_basic.csv
res_NN_sigma_1.0_basic.csv
res_NN_sigma_1.5_basic.csv
res_NN_sigma_2.0_basic.csv
res_NN_sigma_2.5_basic.csv
res_NN_sigma_3.0_basic.csv
res_NN_sigma_3.5_basic.csv
res_NN_sigma_4.0_basic.csv
res_OT greedy_sigma_0.0_basic.csv
res_OT greedy_sigma_0.5_basic.csv
res_OT greedy_sigma_1.0_basic.csv
res_OT greedy_sigma_1.5_basic.csv
res_OT greedy_sigma_2.0_basic.csv
res_OT greedy_sigma_2.5_basic.csv
res_OT greedy_sigma_3.0_basic.csv
res_OT greedy_sigma_4.0_