# Preliminaries

#### Imports

In [None]:
import os
import glob
import PIL
import numpy as np
import math
from xml.etree import ElementTree
from contextlib import contextmanager
import threading
from google.colab import output
from google.colab import drive

#### Installing utilities

In [None]:
#install Go

!add-apt-repository ppa:longsleep/golang-backports -y
!apt update
!apt install golang-go
%env GOPATH=/root/go
!go get -u github.com/gopherdata/gophernotes
!cp ~/go/bin/gophernotes /usr/bin/
!mkdir /usr/local/share/jupyter/kernels/gophernotes
!cp ~/go/src/github.com/gopherdata/gophernotes/kernel/* \
       /usr/local/share/jupyter/kernels/gophernotes

# then refresh notebook!

In [None]:
#install AuGoment
! git clone https://github.com/lootag/ImageAuGomentationCLI.git
os.chdir('ImageAuGomentationCLI' + os.sep + 'src')
! make
os.chdir('/content')

In [None]:
! pip install mean_average_precision

#### Import Repository

In [None]:
! git clone https://github.com/guglielmoG/windspeed.git

#### Definitions

In [None]:
from windspeed.utils import *

repo = 'windspeed'
detection_classes = ['flag']

# TO_DELETE
Possiamo mettere legacy code che non ha senso stia da altre parti ma che potrebbe essere utile in futuro

In [None]:
#use to rename pictures
with cwd('windspeed', 'flags'):
    # get all annots
    ann = set(os.listdir('Annotations'))
    c = 0
    for f in os.listdir('images'):
        img_name, ext = os.path.splitext(f)
        if os.path.exists(join_path('Annotations', img_name+'.xml')):
            os.rename(join_path('Annotations', img_name+'.xml'), join_path('Annotations', str(c) + 'flag.xml'))
            os.rename(join_path('images', f), join_path('images', str(c)+'flag'+ext))
            ann.remove(img_name+'.xml')
        else:
            os.rename(join_path('images', f), f)
            print(f)
        c += 1
            
    print('No images for ', ann)

In [None]:
#used to debug augoment, identify images for which creates problems
with cwd(repo):
    ! git checkout .
    ! git clean -fd

with cwd(repo, 'data', 'flags'):
    os.mkdir('temp')
    os.mkdir(join_path('temp', 'Images'))
    os.mkdir(join_path('temp', 'Annotations'))
    c = 0
    for f in os.listdir('Images'):
        img_name,_ = os.path.splitext(f)
        os.rename(join_path('Annotations', img_name+'.xml'), join_path('temp', 'Annotations', img_name+'.xml'))
        os.rename(join_path('Images', f), join_path('temp', 'Images', f))
        try:
            with cwd('temp'):
                ! augoment -exclusion_threshold=1 -batch_size=1 -blur=2 >good 2>bad
                ! echo {f} > boh
                ! grep panic bad >> boh
                tt = ! grep panic bad
                if len(tt) != 0:
                    ! cat boh >> out_good
                    ! echo "" >> out_good
        except Exception as e:
            print(f, str(e))
        finally:
            os.remove(join_path('temp', 'Annotations', img_name+'.xml'))
            os.remove(join_path('temp', 'Images', f))
        print(c)
        c += 1

# END_TO_DELETE

# Flag Detection

Folder structure:  
-train  
-validation  
-test  

In each folder one can find images and annotations, separated.

## Data Preprocessing

### Folder Structure Creation
Divide in training, validation and test set, with approximately 75%, 15% and 10% respectively.

In [None]:
np.random.seed(3456)

def make_augoment_dir(name):
    os.mkdir(name)
    os.mkdir(join_path(name, 'Images'))
    os.mkdir(join_path(name, 'Annotations'))
    
with cwd(repo, 'data', 'flags'):
    make_augoment_dir('train')
    make_augoment_dir('valid')
    make_augoment_dir('test')
    imgs = os.listdir('Images')
    n = len(imgs)
    #use random indexes to shuffle the images
    idx = np.arange(n)
    np.random.shuffle(idx)
    for i in range(n):
        img = imgs[idx[i]]
        img_name, _ = os.path.splitext(img)
        if i < n * 0.75:
            os.rename(join_path('Images', img), join_path('train', 'Images', img))
            os.rename(join_path('Annotations', img_name + '.xml'), join_path('train', 'Annotations', img_name + '.xml'))
        elif i < n * 0.9:
            os.rename(join_path('Images', img), join_path('valid', 'Images', img))
            os.rename(join_path('Annotations', img_name + '.xml'), join_path('valid', 'Annotations', img_name + '.xml'))
        else:
            os.rename(join_path('Images', img), join_path('test', 'Images', img))
            os.rename(join_path('Annotations', img_name + '.xml'), join_path('test', 'Annotations', img_name + '.xml'))
    os.rmdir('Images')
    os.rmdir('Annotations')

### Augmentation
NB: if one would like to obtain different blur values for a given image (i.e. produce say 3 new blurred images out of every image, with different blur values), they would need to rename previosly created blurred images, because they are overwritten.

Convert images to jpg for augoment to work properly, then run augoment.

In [None]:
convert_to_jpg(join_path(repo,'data','flags','train', 'Images'))
convert_to_jpg(join_path(repo,'data','flags','valid', 'Images'))
convert_to_jpg(join_path(repo,'data','flags','test', 'Images'))

with cwd(join_path(repo,'data','flags','train')):
    ! augoment -blur=2
    
with cwd(join_path(repo,'data','flags','valid')):
    ! augoment -blur=2

### YOLO Preprocessing

Clone the repo, and compile code with CUDA.

In [None]:
! git clone https://github.com/pjreddie/darknet.git

with cwd('darknet'):
    ! sed -i 's/GPU=0/GPU=1/' Makefile
    ! sed -i 's/CUDNN=0/CUDNN=1/' Makefile
    ! make
    
#copy needed files
os.rename(join_path('darknet', 'darknet'), join_path(repo, 'darknet'))
os.mkdir(join_path(repo, 'backup'))
os.mkdir(join_path(repo, 'results'))

Convert annotations from xml to YOLO format. Additionally, create input txt files containing images paths, as required by YOLO.

In [None]:
def collect_img_path(in_dir, out_path):
    l = glob.glob(join_path(in_dir, '*[!.txt]'))
    with open(out_path, 'w') as w:
        w.write('\n'.join(l))
        
#converting annotations
with cwd(repo, 'data', 'flags'):
    for f in os.listdir(): #train, valid, test
        if os.path.isdir(f) and f in ['train', 'valid']:
            for xml in os.listdir(join_path(f, 'AugmentedAnnotations')):
                convert_annot_yolo(join_path(f, 'AugmentedAnnotations', xml), detection_classes, join_path(f, 'AugmentedImages'))

#creating input paths to images
with cwd(repo):
    collect_img_path(join_path('data','flags','train','AugmentedImages'), join_path('data','flags','train.txt'))
    collect_img_path(join_path('data','flags','valid','AugmentedImages'), join_path('data','flags','valid.txt'))

The structure of _cfg/flag.data_ is as follows
```
  classes = 1
  train  = <path-to-data>/train.txt
  valid  = <path-to-data>/valid.txt
  names = <path-to-data>/flag.names
  backup = /mydrive/yolov3
```
_flag.names_ should contain the list of classes, one per line.  

Download weights for the classifier (Darknet-53)

In [None]:
! wget https://pjreddie.com/media/files/darknet53.conv.74
os.rename('darknet53.conv.74', join_path(repo,'cfg','darknet53.conv.74'))

Edit the cfg file for _yolo-v3_. Need to set adeguate `batch` (number of images per training step) and `subdivisions` (further subdivide the batch to speedup training) values according to the hardware at disposal. Then need to update `classes` in yolo layer to match the number of classes that needs to be predicted. Finally, adjust `filters` amount in convolutional layer prior to yolo layer, to match the updated number of classes, according to the formula:
```
filters = (classes + 5)*3
```
The file _yolo-v3.cfg_ provided is already setup in such a way.  

### Training

In [None]:
drive.mount('/content/gdrive')
! ln -s /content/gdrive/My\ Drive/ /mydrive
! mkdir -p "/mydrive/yolov3"

In [None]:
def clean():
    output.clear()

#clean every minute, for 12 hours
for x in range(20,1*60*60*12,60):
    timer = threading.Timer(x, clean)
    timer.start()

with cwd(repo):
    ! chmod 755 darknet
    ! ./darknet detector train cfg/flag.data cfg/yolov3.cfg cfg/darknet53.conv.74 -dont_show

### Inference

We start by importing our trained model into opencv. To predict we can use`predict_yolo()`, which takes as input the network, and image, and predicts and returns the bounding boxes. They are encoded with one bounding box per row, represented as `center_x, center_y, w, h, confidence` where `center_x, center_y` are the center pixels of the bounding box, `w` and `h` are half of the width and half of the height respectively. Finally, `confidence` stands for the prediction confidence of the network.

At this stage we are interested in assessing the quality of our model out of sample, computing the mAP. To do that we use a wrapper function that takes a generic model, custom predict function for that model, class labels and image folder, and computes the metric of interest. As for the optional parameters, we are asking to compute mAP according to both pascal_voc definiton and COCO. Finally, `net_input` refers to the image input size fo YOLO, as specified in the config file.

In [None]:
! cp /mydrive/yolov3/yolov3.backup /content
os.rename('yolov3.backup', join_path(repo, 'cfg', 'volov3.weights'))

with cwd(repo, 'cfg'):
    net = cv2.dnn.readNet("volov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
output_layers = net.getUnconnectedOutLayersNames()

mAP = evaluate_model(net, predict_yolo, detection_classes, mdl_type='detection', 
                     path=join_path(repo,'data','flags','test'),
                     mAP_type='both', net_input_w=608, net_input_h=608)