# Training

## Libraries

In [None]:
!pip install git+https://github.com/PatBall1/detectree2.git

In [None]:
import os, shutil, glob, time, json, random, yaml
from datetime import date, datetime
from pathlib import Path
from google.colab import drive
drive.mount('/content/drive')

from detectron2.engine import DefaultPredictor
from detectree2.preprocessing.tiling import tile_data_train, to_traintest_folders, tile_data
from detectree2.models.predict import predict_on_data
from detectree2.models.train import MyTrainer, setup_cfg, register_train_data, remove_registered_data, predictions_on_data, combine_dicts, get_tree_dicts, load_json_arr
from detectree2.models.outputs import project_to_geojson, stitch_crowns, clean_crowns, to_eval_geojson, clean_predictions
from detectree2.models.evaluation import site_f1_score2
from detectron2.utils.visualizer import Visualizer
from detectron2.evaluation.coco_evaluation import instances_to_coco_json

import cv2
import wandb
from PIL import Image
import rasterio
import rioxarray as rxr
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from detectree_addons import *

## Convert to COCO format

The images in all folders need to be converted to the COCO format using detectree2 built-in methods. `preparare_tiled_data` and `to_traintest_folders` must be run only once

In [None]:
site_path = "drive/Shareddrives/detectree2_Cambridge/data/Cambridge/"

# Set tiling parameters
buffer = 0
tile_width = 200
tile_height = 200
threshold = 0
tilename = 'city_center'

### Small training dataset

In [None]:
# Set up input paths
small_train_dir = site_path + "train_small/"
small_crown_path = site_path + "crowns/tiles_0.25m_160_20_0_train_crowns.shp"
small_rgb_path = site_path + "rgb/"
small_data_name = 'Cambridge_25cm_2017_small'
small_tiles_dir = site_path + "tiles/"
small_train_dir = site_path + "train/"
small_test_dir = site_path + "test/"

small_imgs = read_multiple_rgb(small_rgb_path)

# Read in crowns (then filter by an attribute if required)
small_crowns = gpd.read_file(small_crown_path)
small_crowns = small_crowns.to_crs(small_imgs[0].crs.data)

# remove_registered_data(data_name)
register_train_data(small_train_dir, small_data_name, val_fold=5)

In [None]:
prepare_tiled_data_train(small_imgs, small_tiles_dir, tilename = tilename, buffer = buffer,
                         tile_size = tile_width, crowns = small_crowns, threshold = threshold, dtype_bool = True)
to_traintest_folders(small_tiles_dir, site_path, test_frac=0.1, folds=5)

### Large training dataset

In [None]:
# Set up input paths
large_train_dir = site_path + "train_large/"
large_crown_path = site_path + "crowns/tiles_0.25m_160_20_0_train_crowns.shp"
large_rgb_path = site_path + "rgb/"
large_data_name = 'Cambridge_25cm_2017_large'
large_tiles_dir = site_path + "tiles/"
large_train_dir = site_path + "train/"
large_test_dir = site_path + "test/"

large_imgs = read_multiple_rgb(large_rgb_path)

# Read in crowns (then filter by an attribute if required)
large_crowns = gpd.read_file(large_crown_path)
large_crowns = large_crowns.to_crs(large_imgs[0].crs.data)

# remove_registered_data(data_name)
register_train_data(large_train_dir, large_data_name, val_fold=5)

In [None]:
prepare_tiled_data_train(large_imgs, large_tiles_dir, tilename = tilename, buffer = buffer,
                         tile_size = tile_width, crowns = large_crowns, threshold = threshold, dtype_bool = True)
to_traintest_folders(large_tiles_dir, site_path, test_frac=0.1, folds=5)

### Testing dataset

In [None]:
test_dir = site_path + "test_large/"
test_crown_path = site_path + "crowns/tiles_0.25m_160_20_0_test_crowns.shp"
test_rgb_path = site_path + "rgb/"
test_tiles_dir = site_path + "tiles/"

test_imgs = read_multiple_rgb(large_rgb_path)

# Read in crowns (then filter by an attribute if required)
test_crowns = gpd.read_file(test_crown_path)
test_crowns = large_crowns.to_crs(test_imgs[0].crs.data)

In [None]:
# RUN ONLY ONCE
prepare_tiled_data_train(test_imgs, test_tiles_dir, tilename = tilename, buffer = buffer,
                         tile_size = tile_width, crowns = test_crowns, threshold = threshold, dtype_bool = True)