# Mount Google Drive

In [1]:
# Connect to Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/gdrive/My Drive/Kaggle"

In [3]:
# Copying the files to the Colab instance gives an error
# !cp -r /content/gdrive/MyDrive/Projekte/deforestation /home/
# I/O error!

# Download Kaggle data

In [4]:
#!kaggle datasets download -d nikitarom/planets-dataset

In [5]:
#unzipping the zip files and deleting the zip files
#!unzip \*.zip  && rm *.zip

# Clone own code from GitHub

In [6]:
%cd /content

/content


In [7]:
!git clone https://github.com/danielwiegand/deforestation

Cloning into 'deforestation'...
remote: Enumerating objects: 238, done.[K
remote: Counting objects: 100% (238/238), done.[K
remote: Compressing objects: 100% (154/154), done.[K
remote: Total 238 (delta 138), reused 175 (delta 75), pack-reused 0[K
Receiving objects: 100% (238/238), 34.75 MiB | 39.31 MiB/s, done.
Resolving deltas: 100% (138/138), done.


# Unzip data from Google Drive to Colab

In [8]:
!unzip "/content/gdrive/MyDrive/Projekte/deforestation/data.zip" -d "/content/deforestation/"

[1;30;43mDie letzten 5000 Zeilen der Streamingausgabe wurden abgeschnitten.[0m
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19478.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_1949.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19492.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19503.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19511.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19583.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19641.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19649.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19689.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/file_19703.jpg  
  inflating: /content/deforestation/data/images/test-jpg-additional/fi

In [9]:
ls /content/deforestation/data

[0m[01;34mimages[0m/                [01;34msample_submission_v2.csv[0m/  [01;34my_labels[0m/
rainforest_chips.jpeg  [01;34mtest_v2_file_mapping.csv[0m/


# Training

In [10]:
%cd /content/deforestation/train

/content/deforestation/train


In [11]:
!pip install wandb

Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/f6/28/4aefc543967839bdb4e139831b82004279f1c435cede2a9557ccf8369875/wandb-0.10.27-py2.py3-none-any.whl (2.1MB)
[K     |▏                               | 10kB 25.1MB/s eta 0:00:01[K     |▎                               | 20kB 33.1MB/s eta 0:00:01[K     |▌                               | 30kB 23.9MB/s eta 0:00:01[K     |▋                               | 40kB 26.9MB/s eta 0:00:01[K     |▉                               | 51kB 25.1MB/s eta 0:00:01[K     |█                               | 61kB 27.8MB/s eta 0:00:01[K     |█                               | 71kB 18.7MB/s eta 0:00:01[K     |█▎                              | 81kB 19.6MB/s eta 0:00:01[K     |█▍                              | 92kB 18.4MB/s eta 0:00:01[K     |█▋                              | 102kB 18.2MB/s eta 0:00:01[K     |█▊                              | 112kB 18.2MB/s eta 0:00:01[K     |██                              | 122kB 18

In [12]:
import os
import pickle

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from wandb.keras import WandbCallback
import wandb
from functions import load_labels, predict_on_testset, create_model, create_callbacks, evaluate_model, generate_generators

## Load data

In [13]:
y_labels, UNIQUE_LABELS = load_labels()
weight_dict = pickle.load(open("pickle/weight_dict.p", "rb"))

## Initialize wandb

In [14]:
run = wandb.init(project = "deforestation",
           reinit = True,
           name = "transfer learning #1",
           config = {"cnn_layers": None,
                     "filter_layout": None,
                     "batch_norm": None,
                     "max_pooling": None,
                     "dense_layers": None,
                     "dense_units": None,
                     "full_data": True,
                     "data_size": None,
                     "epochs": 100,
                     "patience": 3,
                     "augmentation": False,
                     "class_weight": False,
                     "finetuning": True,
                     "early_stop": True,
                     "transfer_learning": True,
                     "batch_size": 32,
                     "activation": "elu",
                     "optimizer": "adam"})

config = wandb.config

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


## Create model

In [15]:
train_set, val_set = train_test_split(y_labels, test_size = 0.2)

In [17]:
train_generator, valid_generator = generate_generators(train_set, val_set, config, UNIQUE_LABELS, 
                                                       transfer_learning = config.transfer_learning, augmentation = config.augmentation)

m, base_model, F2Score = create_model(config, UNIQUE_LABELS, transfer_learning = config.transfer_learning)

Found 40479 validated image filenames belonging to 17 classes.
Found 8096 validated image filenames belonging to 17 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-mobile-no-top.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
NASNet (Functional)          (None, 1056)              4269716   
_________________________________________________________________
dense (Dense)                (None, 50)                52850     
_________________________________________________________________
dropout (Dropout)            (None, 50)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 17)                867       
Total params: 4,323,

In [18]:
early_stopping, checkpoint, reduce_lr = create_callbacks(model_name = wandb.run.name, patience = config.patience)

In [19]:
if config.class_weight == True:
  class_weight = weight_dict
else:
  class_weight = None

## Run model

In [20]:
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size

In [None]:
history = m.fit(train_generator,
                steps_per_epoch = STEP_SIZE_TRAIN,
                validation_data = valid_generator,
                validation_steps = STEP_SIZE_VALID,
                epochs = config.epochs,
                class_weight = class_weight,
                callbacks = [WandbCallback(), checkpoint, early_stopping, reduce_lr] # checkpoint, reduce_lr
                )

Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.11722, saving model to /content/gdrive/MyDrive/Projekte/deforestation/models/transfer learning #1
INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/Projekte/deforestation/models/transfer learning #1/assets
Epoch 2/100

Epoch 00002: val_loss improved from 0.11722 to 0.11140, saving model to /content/gdrive/MyDrive/Projekte/deforestation/models/transfer learning #1
INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/Projekte/deforestation/models/transfer learning #1/assets
Epoch 3/100

Epoch 00003: val_loss improved from 0.11140 to 0.10710, saving model to /content/gdrive/MyDrive/Projekte/deforestation/models/transfer learning #1
INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/Projekte/deforestation/models/transfer learning #1/assets
Epoch 4/100

Epoch 00004: val_loss improved from 0.10710 to 0.10451, saving model to /content/gdrive/MyDrive/Projekte/deforestation/models/transfer learning #1
INFO:tensorfl

In [None]:
m.save("/content/gdrive/MyDrive/Projekte/deforestation/models/final_model")

# Finetuning

Finetuning: The model is trained with a very low learning rate with all layers unfrozen.

In [25]:
# from tensorflow.keras.models import load_model
# m = load_model("/content/gdrive/MyDrive/Projekte/deforestation/models/final_model", compile = False) # compile = False: Wegen "custom metric". Siehe https://github.com/jakeret/unet/issues/8



In [None]:
from tensorflow.keras.optimizers import Adam

if config.finetuning == True:

  base_model.trainable = True
  m.summary()

  m.compile(optimizer = Adam(1e-5), # low learning rate 
            loss = 'binary_crossentropy', 
            metrics = [F2Score, "AUC"])

  epochs = 10

  history = m.fit(train_generator,
                  steps_per_epoch = STEP_SIZE_TRAIN,
                  validation_data = valid_generator,
                  validation_steps = STEP_SIZE_VALID,
                  epochs = epochs,
                  class_weight = class_weight
                  )

# Finish run

In [28]:
run.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [29]:
m.save("/content/gdrive/MyDrive/Projekte/deforestation/models/final_model_finetuned")

INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/Projekte/deforestation/models/final_model_finetuned_alldata/assets


# Evaluation

In [36]:
y_train, y_train_pred, y_val, y_val_pred, best_threshold = evaluate_model(m, history, train_generator, valid_generator, UNIQUE_LABELS)

100%|██████████| 8095/8095 [00:17<00:00, 451.29it/s]




  0%|          | 0/17 [00:00<?, ?it/s]


Get best thresholds...


100%|██████████| 17/17 [00:32<00:00,  1.91s/it]


# Prediction

In [37]:
submission = predict_on_testset(model = m, classes = train_generator.class_indices, threshold = best_threshold, transfer_learning = config.transfer_learning)

Found 61191 images belonging to 2 classes.


100%|██████████| 61191/61191 [00:19<00:00, 3142.60it/s]


# Submit to Kaggle

In [38]:
#from google.colab import files
#uploaded = files.upload() # upload kaggle.JSON

In [39]:
#!mv kaggle.json /content/gdrive/MyDrive/Kaggle

In [40]:
submission.to_csv("submissions/submission.csv")

In [41]:
!kaggle competitions submit -c planet-understanding-the-amazon-from-space -f submissions/submission.csv -m "training on all images"

100% 2.13M/2.13M [00:00<00:00, 2.74MB/s]
Successfully submitted to Planet: Understanding the Amazon from Space