In [None]:

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from osgeo import gdal
from osgeo import gdalconst
import tensorflow_datasets as tfds
from __future__ import print_function

SEED = 42

import random as rn
rn.seed(SEED)


np.random.seed(SEED)

tf.compat.v1.random.set_random_seed(SEED)

import os
import argparse
import json
import importlib

import csv


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data_path = 'drive/My Drive/Bigearthnet-MM19'

In [None]:



#Exigences ----------
#1 Le dossier racine contenant les images brutes du jeu de données BigEarthNet-S1 téléchargé.
#2 Le dossier racine contenant les images brutes du jeu de données BigEarthNet-S2 téléchargé.
#3 Le dossier de sortie où les fichiers résultants seront créés
#4 Une liste de fichiers CSV dont chacun contient les noms de patch du split correspondant.
#5 Un indicateur pour indiquer que ce script modifiera également les fichiers json d'origine du BigEarthNet-MM en mettant à jour les étiquettes


In [None]:
# SAR band names to read related GeoTIFF files
band_names_s1 = ["VV", "VH"]

# Spectral band names to read related GeoTIFF files
band_names_s2 = ['B01', 'B02', 'B03', 'B04', 'B05',
                 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12']

def prep_example(bands, BigEarthNet_19_labels, BigEarthNet_19_labels_multi_hot, patch_name_s1, patch_name_s2):
    return tf.train.Example(
            features=tf.train.Features(
               feature={
                    'B01': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B01']))),
                    'B02': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B02']))),
                    'B03': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B03']))),
                    'B04': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B04']))),
                    'B05': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B05']))),
                    'B06': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B06']))),
                    'B07': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B07']))),
                    'B08': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B08']))),
                    'B8A': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B8A']))),
                    'B09': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B09']))),
                    'B11': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B11']))),
                    'B12': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=np.ravel(bands['B12']))),
                    "VV":  tf.train.Feature(
                        float_list=tf.train.FloatList(value=np.ravel(bands['VV']))),
                    "VH":  tf.train.Feature(
                        float_list=tf.train.FloatList(value=np.ravel(bands['VH']))),
                    'BigEarthNet-19_labels': tf.train.Feature(
                        bytes_list=tf.train.BytesList(
                            value=[i.encode('utf-8') for i in BigEarthNet_19_labels])),
                    'BigEarthNet-19_labels_multi_hot': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=BigEarthNet_19_labels_multi_hot)),
                    'patch_name_s1': tf.train.Feature(
                        bytes_list=tf.train.BytesList(value=[patch_name_s1.encode('utf-8')])),
                    'patch_name_s2': tf.train.Feature(
                        bytes_list=tf.train.BytesList(value=[patch_name_s2.encode('utf-8')]))
                }))
    
def create_split(root_folder_s1, root_folder_s2, patch_names, TFRecord_writer, label_indices, GDAL_EXISTED, RASTERIO_EXISTED, UPDATE_JSON):
    label_conversion = label_indices['label_conversion']
    BigEarthNet_19_label_idx = {v: k for k, v in label_indices['BigEarthNet-19_labels'].items()}
    if GDAL_EXISTED:
        import gdal
    elif RASTERIO_EXISTED:
        import rasterio
    progress_bar = tf.keras.utils.Progbar(target = len(patch_names))
    for patch_idx, patch_name in enumerate(patch_names):
        patch_name_s1, patch_name_s2 = patch_name[1], patch_name[0]
        patch_folder_path_s1 = os.path.join(root_folder_s1, patch_name_s1)
        patch_folder_path_s2 = os.path.join(root_folder_s2, patch_name_s2)

        bands = {}
        for band_name in band_names_s1:
            band_path = os.path.join(
                patch_folder_path_s1, patch_name_s1 + '_' + band_name + '.tif')
            if GDAL_EXISTED:
                band_ds = gdal.Open(band_path,  gdal.GA_ReadOnly)
                raster_band = band_ds.GetRasterBand(1)
                band_data = raster_band.ReadAsArray()
                bands[band_name] = np.array(band_data)
            elif RASTERIO_EXISTED:
                band_ds = rasterio.open(band_path)
                band_data = np.array(band_ds.read(1))
                bands[band_name] = np.array(band_data)

        for band_name in band_names_s2:
            # First finds related GeoTIFF path and reads values as an array
            band_path = os.path.join(
                patch_folder_path_s2, patch_name_s2 + '_' + band_name + '.tif')
            if GDAL_EXISTED:
                band_ds = gdal.Open(band_path,  gdal.GA_ReadOnly)
                raster_band = band_ds.GetRasterBand(1)
                band_data = raster_band.ReadAsArray()
                bands[band_name] = np.array(band_data)
            elif RASTERIO_EXISTED:
                band_ds = rasterio.open(band_path)
                band_data = np.array(band_ds.read(1))
                bands[band_name] = np.array(band_data)
        
        original_labels_multi_hot = np.zeros(
            len(label_indices['original_labels'].keys()), dtype=int)
        BigEarthNet_19_labels_multi_hot = np.zeros(len(label_conversion),dtype=int)
        patch_json_path = os.path.join(
            patch_folder_path_s1, patch_name + '_labels_metadata.json')

        with open(patch_json_path, 'rb') as f:
            patch_json = json.load(f)

        original_labels = patch_json['labels']
        for label in original_labels:
            original_labels_multi_hot[label_indices['original_labels'][label]] = 1

        for i in range(len(label_conversion)):
            BigEarthNet_19_labels_multi_hot[i] = (
                    np.sum(original_labels_multi_hot[label_conversion[i]]) > 0
                ).astype(int)

        BigEarthNet_19_labels = []
        for i in np.where(BigEarthNet_19_labels_multi_hot == 1)[0]:
            BigEarthNet_19_labels.append(BigEarthNet_19_label_idx[i])

        if UPDATE_JSON:
            patch_json['BigEarthNet_19_labels'] = BigEarthNet_19_labels
            with open(patch_json_path, 'wb') as f:
                json.dump(patch_json, f)
# use of prep_example function
        example = prep_example(
            bands, 
            BigEarthNet_19_labels,
            BigEarthNet_19_labels_multi_hot,
            patch_name_s1, 
            patch_name_s2
        )
        TFRecord_writer.write(example.SerializeToString())
        progress_bar.update(patch_idx)

def prep_tf_record_files(root_folder_s1, root_folder_s2, out_folder, split_names, patch_names_list, label_indices, GDAL_EXISTED, RASTERIO_EXISTED, UPDATE_JSON):
    
    writer_list = []
    for split_name in split_names:
        writer_list.append(tf.compat.v1.python_io.TFRecordWriter(os.path.join(out_folder, split_name + '.tfrecord')))

    for split_idx in range(len(patch_names_list)):
        print('INFO: creating the split of', split_names[split_idx], 'is started')
        create_split(
            root_folder_s1, 
            root_folder_s2,
            patch_names_list[split_idx], 
            writer_list[split_idx],
            label_indices,
            GDAL_EXISTED, 
            RASTERIO_EXISTED, 
            UPDATE_JSON
            )
        writer_list[split_idx].close()
        


In [None]:
   try:
        writer_list = []
        for split_name in split_names:
            writer_list.append(
                    tf.python_io.TFRecordWriter(os.path.join(
                        out_folder, split_name + '.tfrecord'))
                )
   except:  
        print('ERROR: TFRecord writer is not able to write files')
        exit()

ERROR: TFRecord writer is not able to write files


In [None]:
 #il faut d'abord initialiser les arguments du fonction !
GDAL_EXISTED = True
RASTERIO_EXISTED = False
UPDATE_JSON = True
###########
root_folder_s1=data_path+'/data/mband'
root_folder_s2=data_path+'/data/mband'
out_folder=data_path+'/outf'
split=data_path+'/splits'
splits=[split+'/val.csv',split+'/train.csv',split+'/test.csv']

for csv_file in splits:
  print(csv_file)

patch_names_list = []
split_names = []
for csv_file in splits:
    patch_names_list.append([])
    split_names.append(os.path.basename(csv_file).split('.')[0])
    print(split_names)
    with open(csv_file, 'r') as fp:
        csv_reader = csv.reader(fp, delimiter=',')
        for row in csv_reader:
            patch_names_list[-1].append(row)





with open(data_path+'/label_indices.json', 'rb') as f:
    label_indices = json.load(f)



 prep_tf_record_files(root_folder_s1,root_folder_s2,out_folder,split_names,patch_names_list,label_indices,GDAL_EXISTED,RASTERIO_EXISTED,UPDATE_JSON)


drive/My Drive/Bigearthnet-MM19/splits/val.csv
drive/My Drive/Bigearthnet-MM19/splits/train.csv
drive/My Drive/Bigearthnet-MM19/splits/test.csv
['val']
['val', 'train']
['val', 'train', 'test']
INFO: creating the split of val is started


AttributeError: ignored

###Training

In [None]:
class BigEarthNet:
    def __init__(self, TFRecord_paths, batch_size, nb_epoch, shuffle_buffer_size, label_type):
        self.label_type = label_type   
        dataset = tf.data.TFRecordDataset(TFRecord_paths)
        if shuffle_buffer_size > 0:
            dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
        dataset = dataset.repeat(nb_epoch)

        dataset = dataset.map(
            lambda x: self.parse_function(x, self.label_type), 
            num_parallel_calls=10
        )

        dataset = dataset.batch(batch_size, drop_remainder=False)
        self.dataset = dataset.prefetch(10)
        self.batch_iterator = self.dataset.make_one_shot_iterator()


    def parse_function(self, example_proto, label_type):
        nb_class = 43 if label_type == 'original' else 19

        parsed_features = tf.parse_single_example(
                example_proto, 
                {
                    'B01': tf.FixedLenFeature([20*20], tf.int64),
                    'B02': tf.FixedLenFeature([120*120], tf.int64),
                    'B03': tf.FixedLenFeature([120*120], tf.int64),
                    'B04': tf.FixedLenFeature([120*120], tf.int64),
                    'B05': tf.FixedLenFeature([60*60], tf.int64),
                    'B06': tf.FixedLenFeature([60*60], tf.int64),
                    'B07': tf.FixedLenFeature([60*60], tf.int64),
                    'B08': tf.FixedLenFeature([120*120], tf.int64),
                    'B8A': tf.FixedLenFeature([60*60], tf.int64),
                    'B09': tf.FixedLenFeature([20*20], tf.int64),
                    'B11': tf.FixedLenFeature([60*60], tf.int64),
                    'B12': tf.FixedLenFeature([60*60], tf.int64),
                    'VV': tf.FixedLenFeature([120*120], tf.float32),
                    'VH': tf.FixedLenFeature([120*120], tf.float32),
                    'patch_name_s1': tf.io.VarLenFeature(dtype=tf.string),
                    'patch_name_s2': tf.io.VarLenFeature(dtype=tf.string),
                    label_type + '_labels': tf.VarLenFeature(dtype=tf.string),
                    label_type + '_labels_multi_hot': tf.FixedLenFeature([nb_class], tf.int64)
                }
            )

        return {
            'B01': tf.reshape(parsed_features['B01'], [20, 20]),
            'B02': tf.reshape(parsed_features['B02'], [120, 120]),
            'B03': tf.reshape(parsed_features['B03'], [120, 120]),
            'B04': tf.reshape(parsed_features['B04'], [120, 120]),
            'B05': tf.reshape(parsed_features['B05'], [60, 60]),
            'B06': tf.reshape(parsed_features['B06'], [60, 60]),
            'B07': tf.reshape(parsed_features['B07'], [60, 60]),
            'B08': tf.reshape(parsed_features['B08'], [120, 120]),
            'B8A': tf.reshape(parsed_features['B8A'], [60, 60]),
            'B09': tf.reshape(parsed_features['B09'], [20, 20]),
            'B11': tf.reshape(parsed_features['B11'], [60, 60]),
            'B12': tf.reshape(parsed_features['B12'], [60, 60]),
            'VV' : tf.reshape(parsed_features['VV'], [120, 120]),
            'VH' : tf.reshape(parsed_features['VV'], [120, 120]),
            'patch_name_s1': parsed_features['patch_name_s1'],
            'patch_name_s2': parsed_features['patch_name_s2'],
            label_type + '_labels': parsed_features[label_type + '_labels'],
            label_type + '_labels_multi_hot': parsed_features[label_type + '_labels_multi_hot']
        }


In [None]:
import sys
sys.path.append(data_path)
from utils import get_metrics
import BigEarthNet as ben

In [None]:
pip install --upgrade tf_slim

Collecting tf_slim
  Downloading tf_slim-1.1.0-py2.py3-none-any.whl (352 kB)
[K     |████████████████████████████████| 352 kB 4.2 MB/s 
Installing collected packages: tf-slim
Successfully installed tf-slim-1.1.0


In [None]:
with open(data_path+'/configs/base.json', 'rb') as f:
        args = json.load(f)


In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import tf_slim as contrib_slim
slim = contrib_slim

In [None]:
!python /content/drive/MyDrive/Bigearthnet-MM19/models/main_model.py

Instructions for updating:
non-resource variables are not supported in the long term
Traceback (most recent call last):
  File "/content/drive/MyDrive/Bigearthnet-MM19/models/main_model.py", line 13, in <module>
    from content.drive.MyDrive.BigearthnetMM19.nets.resnet_utils import resnet_arg_scope
ModuleNotFoundError: No module named 'content'


In [None]:

sys.path.append(data_path+'/models')
import main_model

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

def run_model(args):
    with tf.Session() as sess:
        iterator = BigEarthNet(
            args['tr_tf_record_files'], 
            args['batch_size'], 
            args['nb_epoch'], 
            args['shuffle_buffer_size'],
            args['label_type']
        ).batch_iterator
        nb_iteration = int(np.ceil(float(args['training_size'] * args['nb_epoch']) / args['batch_size']))
        iterator_ins = iterator.get_next()

        model = importlib.import_module('models.' + args['model_name']).DNN_model(args['label_type'], args['modality'])
        model.create_network()
        loss = model.define_loss()

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = tf.train.AdamOptimizer(learning_rate=args['learning_rate']).minimize(loss)

        variables_to_save = tf.global_variables()
        _, metric_means, metric_update_ops = get_metrics(model.multi_hot_label, model.predictions, model.probabilities)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        model_saver = tf.train.Saver(max_to_keep=0, var_list=variables_to_save)
        iteration_idx = 0

        if args['fine_tune']:
            model_saver.restore(sess, args['model_file'])
            if 'iteration' in args['model_file']:
                iteration_idx = int(args['model_file'].split('iteration-')[-1])

        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(os.path.join(args['out_dir'], 'logs', 'training'), sess.graph)
        
        progress_bar = tf.contrib.keras.utils.Progbar(target = nb_iteration) 
        while True:
            try:
                batch_dict = sess.run(iterator_ins)
            except tf.errors.OutOfRangeError:
                break
            _, _, batch_loss, batch_summary = sess.run([train_op, metric_update_ops, loss, summary_op], 
                                                        feed_dict = model.feed_dict(batch_dict, is_training=True))
            iteration_idx += 1
            summary_writer.add_summary(batch_summary, iteration_idx)
            if (iteration_idx % args['save_checkpoint_per_iteration'] == 0) and (iteration_idx >= args['save_checkpoint_after_iteration']):
                model_saver.save(sess, os.path.join(args['out_dir'], 'models', 'iteration'), iteration_idx)
            progress_bar.update(iteration_idx, values=[('loss', batch_loss)])
        model_saver.save(sess, os.path.join(args['out_dir'], 'models', 'iteration'), iteration_idx)



In [None]:

sys.path.append(data_path+'/nets')
import resnet_utils
import resnet_v1

In [None]:
run_model(args)

AttributeError: ignored