#DeepForest Training Walkthrough(CPU/GPU)
  - For GPU implementation.
      1. Select **Runtime** > Change **runtime type** and Select GPU as Hardware accelerator.

In [2]:
#install the package, on colab make sure to upgrade existing packages. This is not needed in a clean env.
! pip install --upgrade deepforest



In [3]:
# pip install git+https://github.com/weecology/DeepForest.git
! pip install numpy
! pip install huggingface-hub
! pip install supervision



In [4]:
#load the modules
import os
import time
import numpy as np
from deepforest import main
from deepforest import get_data
from deepforest import utilities
from deepforest import preprocess


In [5]:
#convert hand annotations from xml into retinanet format
#The get_data function is only needed when fetching sample package data
YELL_xml = get_data("2019_YELL_2_528000_4978000_image_crop2.xml")
annotation = utilities.xml_to_annotations(YELL_xml)
annotation.head()

Unnamed: 0,image_path,xmin,ymin,xmax,ymax,label
0,2019_YELL_2_528000_4978000_image_crop2.png,681,369,699,386,Tree
1,2019_YELL_2_528000_4978000_image_crop2.png,980,59,998,79,Tree
2,2019_YELL_2_528000_4978000_image_crop2.png,953,135,964,150,Tree
3,2019_YELL_2_528000_4978000_image_crop2.png,969,183,995,208,Tree
4,2019_YELL_2_528000_4978000_image_crop2.png,742,623,751,635,Tree


In [6]:
#load the image file corresponding to the annotaion file
YELL_train = get_data("2019_YELL_2_528000_4978000_image_crop2.png")
image_path = os.path.dirname(YELL_train)
#Write converted dataframe to file. Saved alongside the images
annotation.to_csv(os.path.join(image_path, "train_example.csv"), index=False)

## Prepare Training and Validation Data
  - 75% Training Data
  - 25% Validation Data

In [7]:
#Find annotation path
annotation_path = os.path.join(image_path,"train_example.csv")
#crop images will save in a newly created directory
#os.mkdir(os.getcwd(),'train_data_folder')
crop_dir = os.path.join(os.getcwd(),'train_data_folder_tree')
train_annotations= preprocess.split_raster(path_to_raster=YELL_train,
                                 annotations_file=annotation_path,
                                 base_dir=crop_dir,
                                 patch_size=400,
                                 patch_overlap=0.05)



In [8]:
#Split image crops into training and test. Normally these would be different tiles! Just as an example.
image_paths = train_annotations.image_path.unique()
#split 25% validation annotation
valid_paths = np.random.choice(image_paths, int(len(image_paths)*0.25) )
valid_annotations = train_annotations.loc[train_annotations.image_path.isin(valid_paths)]
train_annotations = train_annotations.loc[~train_annotations.image_path.isin(valid_paths)]

print(image_paths)

['2019_YELL_2_528000_4978000_image_crop2_1.png'
 '2019_YELL_2_528000_4978000_image_crop2_3.png'
 '2019_YELL_2_528000_4978000_image_crop2_4.png'
 '2019_YELL_2_528000_4978000_image_crop2_5.png'
 '2019_YELL_2_528000_4978000_image_crop2_6.png'
 '2019_YELL_2_528000_4978000_image_crop2_7.png'
 '2019_YELL_2_528000_4978000_image_crop2_8.png'
 '2019_YELL_2_528000_4978000_image_crop2_9.png'
 '2019_YELL_2_528000_4978000_image_crop2_10.png'
 '2019_YELL_2_528000_4978000_image_crop2_11.png'
 '2019_YELL_2_528000_4978000_image_crop2_12.png'
 '2019_YELL_2_528000_4978000_image_crop2_13.png'
 '2019_YELL_2_528000_4978000_image_crop2_14.png'
 '2019_YELL_2_528000_4978000_image_crop2_15.png'
 '2019_YELL_2_528000_4978000_image_crop2_16.png'
 '2019_YELL_2_528000_4978000_image_crop2_17.png'
 '2019_YELL_2_528000_4978000_image_crop2_18.png'
 '2019_YELL_2_528000_4978000_image_crop2_19.png'
 '2019_YELL_2_528000_4978000_image_crop2_20.png'
 '2019_YELL_2_528000_4978000_image_crop2_21.png'
 '2019_YELL_2_528000_4978000

In [9]:
#View output
train_annotations.head()
print("There are {} training crown annotations".format(train_annotations.shape[0]))
print("There are {} test crown annotations".format(valid_annotations.shape[0]))

#save to file and create the file dir
annotations_file= os.path.join(crop_dir,"train.csv")
validation_file= os.path.join(crop_dir,"valid.csv")
#Write window annotations file without a header row, same location as the "base_dir" above.
train_annotations.to_csv(annotations_file,index=False)
valid_annotations.to_csv(validation_file,index=False)

There are 519 training crown annotations
There are 173 test crown annotations


In [10]:
annotations_file

'/home/christian/hnee/DeepForest/docs/examples/train_data_folder_tree/train.csv'

In [11]:
train_annotations.head()

Unnamed: 0,image_path,xmin,ymin,xmax,ymax,label,geometry
28,2019_YELL_2_528000_4978000_image_crop2_1.png,195.0,115.0,238.0,158.0,Tree,"POLYGON ((195.000 115.000, 195.000 158.000, 23..."
29,2019_YELL_2_528000_4978000_image_crop2_3.png,336.0,152.0,383.0,189.0,Tree,"POLYGON ((336.000 152.000, 336.000 189.000, 38..."
477,2019_YELL_2_528000_4978000_image_crop2_4.png,371.0,50.0,400.0,110.0,Tree,"POLYGON ((371.000 50.000, 371.000 110.000, 400..."
232,2019_YELL_2_528000_4978000_image_crop2_4.png,311.0,91.0,348.0,135.0,Tree,"POLYGON ((311.000 91.000, 311.000 135.000, 348..."
231,2019_YELL_2_528000_4978000_image_crop2_4.png,272.0,101.0,312.0,148.0,Tree,"POLYGON ((272.000 101.000, 272.000 148.000, 31..."


In [12]:
valid_annotations.head()

Unnamed: 0,image_path,xmin,ymin,xmax,ymax,label,geometry
433,2019_YELL_2_528000_4978000_image_crop2_14.png,43.0,3.0,78.0,31.0,Tree,"POLYGON ((43.000 3.000, 43.000 31.000, 78.000 ..."
170,2019_YELL_2_528000_4978000_image_crop2_14.png,0.0,1.0,26.0,36.0,Tree,"POLYGON ((26.000 36.000, 26.000 1.000, 0.000 1..."
432,2019_YELL_2_528000_4978000_image_crop2_14.png,52.0,29.0,73.0,46.0,Tree,"POLYGON ((52.000 29.000, 52.000 46.000, 73.000..."
350,2019_YELL_2_528000_4978000_image_crop2_14.png,15.0,48.0,48.0,87.0,Tree,"POLYGON ((15.000 48.000, 15.000 87.000, 48.000..."
440,2019_YELL_2_528000_4978000_image_crop2_14.png,158.0,172.0,180.0,196.0,Tree,"POLYGON ((158.000 172.000, 158.000 196.000, 18..."


## Training & Evaluating Using GPU

In [13]:
validation_file

'/home/christian/hnee/DeepForest/docs/examples/train_data_folder_tree/valid.csv'

In [14]:
annotations_file

'/home/christian/hnee/DeepForest/docs/examples/train_data_folder_tree/train.csv'

In [18]:
# annotations_file = "/home/christian/hnee/DeepForest/docs/examples/train_data_folder/train.csv"
# validation_file = "/home/christian/hnee/DeepForest/docs/examples/train_data_folder/train.csv"

In [25]:
#initial the model and change the corresponding config file

m = main.deepforest(label_dict={"Tree": 0})

m.config["batch_size"] = 20
m.config["epochs"] = 30

m.config['gpus'] = '-1' #move to GPU and use all the GPU resources
m.config["train"]["csv_file"] = annotations_file
m.config["train"]["root_dir"] = os.path.dirname(annotations_file)


m.config["score_thresh"] = 0.4
m.config["train"]['epochs'] = 25

m.config["validation"]["csv_file"] = annotations_file
m.config["validation"]["root_dir"] = os.path.dirname(annotations_file)

#create a pytorch lighting trainer used to training
m.create_trainer()
#load the lastest release model
# m.use_release()

Reading config file: /home/christian/hnee/DeepForest/deepforest/data/deepforest_config.yml


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


In [1]:
""" FIXME this fails sometimes with UserWarning: Converting predictions to GeoDataFrame using geometry column

No predictions made

File ~/anaconda3/envs/DeepForest2/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py:167, in _call_lightning_module_hook(trainer, hook_name, pl_module, *args, **kwargs)
    164 pl_module._current_fx_name = hook_name
    166 with trainer.profiler.profile(f"[LightningModule]{pl_module.__class__.__name__}.{hook_name}"):
--> 167     output = fn(*args, **kwargs)
    169 # restore current_fx when nested context
    170 pl_module._current_fx_name = prev_fx_name

File ~/hnee/DeepForest/deepforest/main.py:654, in deepforest.on_validation_epoch_end(self)
    652 for key, value in results.items():
    653     if key in ["class_recall"]:
--> 654         for index, row in value.iterrows():
    655             self.log(
    656                 "{}_Recall".format(
    657                     self.numeric_to_label_dict[row["label"]]),
    658                 row["recall"])
    659             self.log(
    660                 "{}_Precision".format(
    661                     self.numeric_to_label_dict[row["label"]]),
    662                 row["precision"])

"""

"""
TODO: why is this always creating an weird error at epoch 20?



"""

start_time = time.time()
m.trainer.fit(m)
print(f"--- Training on GPU: {(time.time() - start_time):.2f} seconds ---")

NameError: name 'time' is not defined

In [None]:
#save the prediction result to a prediction folder
save_dir = os.path.join(os.getcwd(),'pred_result')
try:
  os.mkdir(save_dir)
except FileExistsError:
  pass
results = m.evaluate(annotations_file, os.path.dirname(annotations_file), iou_threshold = 0.4, savedir= save_dir)

In [None]:
results

In [None]:
results['box_precision']

In [None]:
results["box_recall"]

In [None]:
results["results"]

In [None]:
# results["class_recall"] #useless with a single class

In [None]:
import torch
root_folder = os.path.join(os.getcwd(), "model")

m.trainer.save_checkpoint(
    os.path.join(root_folder, "checkpoint_epochs_10_cosine_lr_retinanet.pl")
)
torch.save(m.model.state_dict(), os.path.join(root_folder, "weights_cosine_lr"))


In [None]:
# Load from the saved checkpoint
model = main.deepforest.load_from_checkpoint(
    os.path.join(root_folder, "checkpoint_epochs_10_cosine_lr_retinanet.pl")
)

In [None]:
import matplotlib.pyplot as plt

In [None]:
raster_path = "/home/christian/hnee/DeepForest/deepforest/data/2019_YELL_2_541000_4977000_image_crop.png"
raster_path = "/home/christian/hnee/DeepForest/deepforest/data/2018_SJER_3_252000_4107000_image_477.tif"
raster_path = "/home/christian/hnee/DeepForest/deepforest/data/2019_YELL_2_528000_4978000_image_crop2.png"
predicted_raster = model.predict_tile(
    raster_path, return_plot=True, patch_size=400, patch_overlap=0.25
)
plt.imshow(predicted_raster)
plt.show()