In [1]:
# Import libraries
import keras
import dask.array as da
from pathlib import Path
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import pickle
from sklearn import svm

import dask
dask.config.set(scheduler='synchronous') # to avoid memory issues

2024-06-18 17:09:06.222393: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-18 17:09:06.222743: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-18 17:09:06.232430: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-18 17:09:06.326371: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<dask.config.set at 0x7fd34190b310>

### construct test samples from different datasets

In [2]:
# Manually create a map between class text and integer label
class_map = {
            1: 'buriti', 
            2: 'tucuma',
            3: 'banana',
            5: 'fruit',
            6: 'palm_label',
            }

In [3]:
path_cutouts_selected = Path("/home/oku/Developments/XAI4GEO/data/cleaned_data/selected_cutouts")

ds_label1 = xr.open_zarr(path_cutouts_selected / "label1_netflora_buriti_emprapa00.zarr").isel(sample=range(37,50)).fillna(0)
ds_label2 = xr.open_zarr(path_cutouts_selected / "label2_netflora_tucuma_emprapa00.zarr").isel(sample=range(16,29)).fillna(0)
ds_label3 = xr.open_zarr(path_cutouts_selected / "label3_reforestree_banana.zarr").isel(sample=range(29,42)).fillna(0)
ds_label4 = xr.open_zarr(path_cutouts_selected / "label5_reforestree_fruit.zarr").isel(sample=range(19,32)).fillna(0)
ds_label_mannual = xr.open_zarr(path_cutouts_selected / "labelx_mannual.zarr").fillna(0)
ds_label_mannual['Y'] = ds_label_mannual['Y'] +6

# for ds in [ds_label0, ds_label1, ds_label2, ds_label3, ds_label4, ds_label_mannual]:
for ds in [ds_label1, ds_label2, ds_label3, ds_label4, ds_label_mannual]:
    print(f"shape:{ds['X'].sizes}")
    print(f"label:{np.unique(ds['Y'].values)}")
    print("---")

shape:Frozen({'sample': 13, 'x': 128, 'y': 128, 'channel': 3})
label:[1]
---
shape:Frozen({'sample': 13, 'x': 128, 'y': 128, 'channel': 3})
label:[2]
---
shape:Frozen({'sample': 13, 'x': 128, 'y': 128, 'channel': 3})
label:[3]
---
shape:Frozen({'sample': 13, 'x': 128, 'y': 128, 'channel': 3})
label:[5]
---
shape:Frozen({'sample': 9, 'x': 128, 'y': 128, 'channel': 3})
label:[6]
---


In [4]:
class_data = {1: ds_label1, 2: ds_label2, 3: ds_label3, 5: ds_label4, 6: ds_label_mannual}
class_data

{1: <xarray.Dataset>
 Dimensions:  (sample: 13, x: 128, y: 128, channel: 3)
 Dimensions without coordinates: sample, x, y, channel
 Data variables:
     X        (sample, x, y, channel) float32 dask.array<chunksize=(13, 128, 128, 3), meta=np.ndarray>
     Y        (sample) int64 dask.array<chunksize=(13,), meta=np.ndarray>,
 2: <xarray.Dataset>
 Dimensions:  (sample: 13, x: 128, y: 128, channel: 3)
 Dimensions without coordinates: sample, x, y, channel
 Data variables:
     X        (sample, x, y, channel) float32 dask.array<chunksize=(13, 128, 128, 3), meta=np.ndarray>
     Y        (sample) int64 dask.array<chunksize=(13,), meta=np.ndarray>,
 3: <xarray.Dataset>
 Dimensions:  (sample: 13, x: 128, y: 128, channel: 3)
 Dimensions without coordinates: sample, x, y, channel
 Data variables:
     X        (sample, x, y, channel) float32 dask.array<chunksize=(13, 128, 128, 3), meta=np.ndarray>
     Y        int64 3,
 5: <xarray.Dataset>
 Dimensions:  (sample: 13, x: 128, y: 128, channel: 3

In [5]:
test_samples = {}
test_samples = class_data.copy()

In [6]:
@keras.saving.register_keras_serializable(package="MyLayers")
class euclidean_lambda(keras.layers.Layer):
    def __init__(self, **kwargs):
        super(euclidean_lambda, self).__init__(**kwargs)
        self.name = 'euclidean_lambda'

    def call(self, featA, featB):
        squared = keras.ops.square(featA-featB)
        return squared

siamese_model = keras.saving.load_model('../optimized_models/siamese_model.keras')
siamese_model.summary()

In [7]:
# Due to memory limit, we make a function to compute the similarity score per batch
batch_size = 10  # number of samples to process at once to compute similarity score
def predict_per_chunk(x, y):
    """Compute similarity score between two sets of images in the same bacth."""
    return siamese_model.predict([x, y], verbose=0).squeeze()

# Compute similarity scores between the test sample and each example class
similarity_scores = {}

for key, samples in test_samples.items():
    print(f"#####class{key} as test sample#####")
    sample_class = {}
    for id in range(samples.sizes['sample']):
        test_sample = samples.isel(sample=id)
        sample_class_id = {}
        for class_i in class_map.keys():

            # Make sample and example class data pairs
            shape = class_data[class_i]["sample"].shape[0]
            X_sample_norm = test_sample.expand_dims({"sample": shape})["X"] / 255.0
            X_class_norm = class_data[class_i]["X"] / 255.0

            # Chunk the data
            X_sample_norm = X_sample_norm.chunk({"sample": batch_size})
            X_class_norm = X_class_norm.chunk({"sample": batch_size})

            # Compute similarity scores per batch
            scores = da.map_blocks(
                predict_per_chunk,
                X_sample_norm.data,
                X_class_norm.data,
                dtype="float32",
                chunks=(batch_size,),
                drop_axis=(1, 2, 3),
            )
            scores = scores.compute()

            sample_class_id[class_i] = scores
        sample_class[id] = sample_class_id
    similarity_scores[key] = sample_class


#####class1 as test sample#####


  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:09.555858: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:10.248208: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 17:09:10.525979: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:10.886383: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:11.189694: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
  self.gen.throw(typ, value, traceb

#####class2 as test sample#####


  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:36.440686: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 17:09:36.832486: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:37.205822: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:37.638706: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:09:38.074024: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceb

#####class3 as test sample#####


2024-06-18 17:10:01.836869: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:02.276158: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 17:10:02.776225: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:03.182888: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:03.596369: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:03.895118: W tenso

#####class5 as test sample#####


2024-06-18 17:10:28.799817: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:29.228418: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:29.645037: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:30.163859: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:30.665475: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
  self.gen.throw(typ, value, traceb

#####class6 as test sample#####


2024-06-18 17:10:55.586617: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:56.010500: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:56.431309: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:56.834612: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)
2024-06-18 17:10:57.245039: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 17:10:57.551844: W tenso

In [8]:
# save the similarity scores to pickle file
with open('data/similarity_scores_acc.pkl', 'wb') as f:
    pickle.dump(similarity_scores, f)

## Classification

In [9]:
# load the similarity scores from pickle file
with open('data/similarity_scores_acc.pkl', 'rb') as f:
    similarity_scores = pickle.load(f)

In [10]:
similarity_scores

{1: {0: {1: array([0.5136911 , 0.84681785, 0.9559784 , 0.6465236 , 0.6332368 ,
          0.69302267, 0.01273153, 0.9667048 , 0.7848781 , 0.5416552 ,
          0.4087268 , 0.47685915, 0.74672985], dtype=float32),
   2: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32),
   3: array([1.51679124e-04, 1.73586886e-02, 1.24558428e-04, 1.08700719e-04,
          1.97481540e-06, 2.09440486e-11, 8.51900836e-07, 1.26134155e-05,
          6.45189516e-07, 1.31420078e-04, 5.93882260e-06, 1.23446473e-04,
          4.04628023e-04], dtype=float32),
   5: array([8.3888123e-09, 3.7318960e-06, 4.9225299e-04, 1.5087739e-05,
          1.7852409e-04, 2.1073260e-05, 1.4723715e-07, 2.4218737e-04,
          4.0570255e-02, 2.0954375e-04, 9.2734699e-06, 8.2853610e-08,
          3.6114110e-07], dtype=float32),
   6: array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
          0.0000000e+00, 1.3322859e-37, 0.0000000e+00, 0.0000000e+00,
          0.0000000e+00], dtype=float32)

### Method 1: Average similarity score

In [11]:
# Compute the average similarity score per class per sample
accuracy_avg = {}
for key, samples in similarity_scores.items():
    truth = key
    correct_num = 0
    for sample_id, sample_class in samples.items():
        sample_class_avg = {}
        for class_i, scores in sample_class.items():
            sample_class_avg[class_i] = np.mean(scores)
        predicted_class = max(sample_class_avg, key=sample_class_avg.get)
        if predicted_class == truth:
            correct_num += 1
    accuracy_avg[key] = correct_num/len(samples)

accuracy_avg   

{1: 0.9230769230769231,
 2: 1.0,
 3: 0.6923076923076923,
 5: 0.8461538461538461,
 6: 0.7777777777777778}

## Metod 2: K-nearest neighbors

In [12]:
# Manual input: the number of K
k = 7

accuracy_knn = {}
for key, samples in similarity_scores.items():
    truth = key
    correct_num = 0
    for sample_id, sample_class in samples.items():
        top_k_scores = {}
        for class_i, scores in sample_class.items():
            top_k_scores[class_i] = np.sort(scores)[-k:] 
        # Reverse the dictionary
        reversed_dict = {vi: k for k, v in top_k_scores.items() for vi in v}
        # then sort the top k scores from all classes
        top_k_scores_all = np.concatenate(list(top_k_scores.values()))
        top_k_scores_all_sorted = np.sort(top_k_scores_all)[::-1][0:k]
        top_k_classes = [reversed_dict[key] for key in top_k_scores_all_sorted]
        counter = Counter(top_k_classes)
        predicted_class = counter.most_common(1)[0][0]
        if predicted_class == truth:
            correct_num += 1
    accuracy_knn[key] = correct_num/len(samples)
    # truth += 1

accuracy_knn

{1: 0.8461538461538461,
 2: 1.0,
 3: 0.6923076923076923,
 5: 0.6153846153846154,
 6: 0.8888888888888888}

In [None]:
with open('../optimized_models/svm_classifier.pkl', 'rb') as f:
    svm_model = pickle.load(f)

# Compute the average similarity score per class per sample
accuracy_svm = {}
for key, samples in similarity_scores.items():
    truth = 0
    correct_num = 0
    for sample_id, sample_class in samples.items():
        sample_class_avg = {}
        for class_i, scores in sample_class.items():
            sample_class_avg[class_i] = np.mean(scores)
        mean_scores = np.array([sample_class_avg[class_i] for class_i in range(len(class_map))]).reshape(1, -1)
        mean_scores = np.hstack([mean_scores]) 
        predicted_class = svm_model.predict(mean_scores)[0].astype(int)
        if predicted_class == truth:
            correct_num += 1
    accuracy_svm[key] = correct_num/len(samples)

accuracy_svm  