# First predict and select the images to be manually labeled/reviewed

In [None]:
DATASET_DIRECTORY = '/mnt/c/Users/Vasile Rotaru/Downloads/plaiul-20220605T045443Z-001/plaiul'
MODELS_DIRECTORY = '/mnt/c/Users/Vasile Rotaru/Downloads/plaiul-20220605T045443Z-001/plaiul'
ACTUAL_MODEL_FILENAME = 'deepforest_iter2'
ENV='local'

# Manually label the selected samples via the LblImg tool
In order to do this:
1. Link the selected images from drive, to your computer and start LblImg in that folder.
https://www.google.com/drive/download/
2. Manually label/review them via LblImg.
This will simultaneously update the labels on drive.
Enter the folder where the predictions are made and run labelImg to open LabelImg in this folder, then start correcting the predictions.
3. When finished, go to next step: Train on new samples.

# Install dependecies... relevant in colab

In [2]:
if ENV == 'colab':
    !git clone https://github.com/ai-in-actiune/tree-counting-and-classification-in-images.git
    !pip install -r tree-counting-and-classification-in-images/requirements.txt
    !pip install --upgrade opencv-python setuptools==59.5.0 albumentations==1.0.3
    from google.colab import drive
    drive.mount('/content/drive')

# Train on the output from LabelImg
! Split into Train & Valid

In [None]:
# make sure they were maually corrected before running these cells
from pathlib import Path

preds_path = Path(DATASET_DIRECTORY)
train_csv_path = preds_path / 'train' / 'labels.csv'
valid_csv_path = preds_path / 'valid' / 'labels.csv'

Prepare the csvs from the xmls

In [None]:
import os
workdir = '/'
if ENV == 'colab':
    workdir = '/content/tree-counting-and-classification-in-images'
elif ENV == 'local':
    workdir = '..'
os.chdir(workdir)

In [None]:
from glob import glob
from tqdm import tqdm
import pandas as pd

from src.utils import xml_utils

def extract_labels_as_csvs(from_folder_path, to_file):
    path = f"{str(from_folder_path)}/*.xml"
    xmls_paths = sorted(glob(path))
    accumulator_bboxes_dfs = []
    for xml_path_str in tqdm(xmls_paths, desc="Converting xmls to csv for train eval"):
        xml_path = Path(xml_path_str)
        xml_as_df = xml_utils.xml_to_annotations(str(xml_path))
        accumulator_bboxes_dfs.append(xml_as_df)
    folder_bboxes_df = pd.concat(accumulator_bboxes_dfs)
    folder_bboxes_df.to_csv(to_file, index=False)


extract_labels_as_csvs(train_csv_path.parent, train_csv_path)
extract_labels_as_csvs(valid_csv_path.parent, valid_csv_path)

manually create divide the manually tagged images into train/valid folders, then run the following cell

In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
#load the modules
import os
import time
import numpy as np
from pathlib import Path

import torch
from deepforest import main 
# from deepforest import get_data
# from deepforest import utilities
# from deepforest import preprocess

In [None]:
m = main.deepforest()
# alternative2
# m.use_release()
# alternative1
m.model.load_state_dict(
    torch.load(
        'deepforest_p5863_r6672'
    )
)
m.config["train"]['epochs'] = 33
m.config["batch_size"] = 3
m.config["save-snapshot"] = False
m.config["train"]["csv_file"] = str(train_csv_path)
m.config["train"]["root_dir"] = str(train_csv_path.parent)
m.config["validation"]["csv_file"] = str(valid_csv_path)
m.config["validation"]["root_dir"] = str(valid_csv_path.parent)
m.config["gpus"] = 1

In [None]:
m.create_trainer()
m.trainer.fit(m)
m.evaluate(csv_file=m.config["validation"]["csv_file"], root_dir=m.config["validation"]["root_dir"])

In [None]:
# decomment and use when predicting on plaiul fagului
#predicted_raster = model.predict_tile(raster_path, return_plot = True, patch_size=400,patch_overlap=0.05)

In [None]:
save_path = Path(
    '/content/drive/MyDrive/vork/ML/trees/tree-counting-and-classification-in-images/models/model1/'
)
m.trainer.save_checkpoint(save_path/"checkpoint.pl")
torch.save(m.model.state_dict(),
           save_path/'deepforest_p5933_r6815')

# Plaiul fagului

In [None]:
# rotate tif image in mac 30°
# sips -r 30 plaiul_2.tif -o plaiul_rotated30.tif

In [None]:
# import rasterio
# plaiul_sample = rasterio.open('/content/drive/MyDrive/vork/ML/trees/training/plaiul/plaiul_2.tif').read()
# plaiul_sample.shape

In [None]:
# Divide a large tile into smaller arrays. Each crop will be saved to file.
# Parameters:
# numpy_image – a numpy object to be used as a raster, usually opened from rasterio.open.read()
# path_to_raster – (str): Path to a tile that can be read by rasterio on disk
# annotations_file (str) – Path to annotations file (with column names) data in the format -> image_path, xmin, ymin, xmax, ymax, label
# base_dir (str) – Where to save the annotations and image crops relative to current working dir
# patch_size (int) – Maximum dimensions of square window
# patch_overlap (float) – Percent of overlap among windows 0->1
# allow_empty – If True, include images with no annotations to be included in the dataset
# image_name (str) – If numpy_image arg is used, what name to give the raster?
# Returns:	
# A pandas dataframe with annotations file for training.

from deepforest import preprocess
preprocess.split_raster(
    annotations_file = f'{DATASET_DIRECTORY}/empty_annotations.csv',
    path_to_raster=f'{DATASET_DIRECTORY}/plaiul_rotated30.tif',
    base_dir=f'{DATASET_DIRECTORY}/crops',
    patch_size=400,
    patch_overlap=0.05,  # buffer percentage of patch_size. (patch_overlap * patch_size) should equal the size of a tree
    allow_empty=True,  # allow empty, since plaieul fagului is not yet annotated
)#image_name=None)
