# Preliminaries

#### Imports

In [30]:
import os
import glob
import PIL
import numpy as np
import math
from xml.etree import ElementTree
from contextlib import contextmanager

#### Installing utilities

In [None]:
#install Go

!add-apt-repository ppa:longsleep/golang-backports -y
!apt update
!apt install golang-go
%env GOPATH=/root/go
!go get -u github.com/gopherdata/gophernotes
!cp ~/go/bin/gophernotes /usr/bin/
!mkdir /usr/local/share/jupyter/kernels/gophernotes
!cp ~/go/src/github.com/gopherdata/gophernotes/kernel/* \
       /usr/local/share/jupyter/kernels/gophernotes

# then refresh notebook!

In [None]:
#install AuGoment
! git clone https://github.com/lootag/ImageAuGomentationCLI.git
os.chdir('ImageAuGomentationCLI' + os.sep + 'src')
! make
os.chdir('/content')

#### Definitions

In [31]:
repo = 'windspeed'
detection_classes = ['flag']
join_path = lambda *l: os.sep.join(l)

#used in combination with _with_, sets path as cwd inside with block, and restores previous working dir upon exit
@contextmanager
def cwd(*l):
    path = os.sep.join(l)
    oldpwd=os.getcwd()
    os.chdir(path)
    try:
        yield
    finally:
        os.chdir(oldpwd)

# TO_DELETE
Possiamo mettere legacy code che non ha senso stia da altre parti ma che potrebbe essere utile in futuro

In [None]:
#use to rename pictures
with cwd('windspeed', 'flags'):
    # get all annots
    ann = set(os.listdir('Annotations'))
    c = 0
    for f in os.listdir('images'):
        img_name, ext = os.path.splitext(f)
        if os.path.exists(join_path('Annotations', img_name+'.xml')):
            os.rename(join_path('Annotations', img_name+'.xml'), join_path('Annotations', str(c) + 'flag.xml'))
            os.rename(join_path('images', f), join_path('images', str(c)+'flag'+ext))
            ann.remove(img_name+'.xml')
        else:
            os.rename(join_path('images', f), f)
            print(f)
        c += 1
            
    print('No images for ', ann)

In [None]:
#used to debug augoment, identify images for which creates problems
with cwd(repo):
    ! git checkout .
    ! git clean -fd

with cwd(repo, 'data', 'flags'):
    os.mkdir('temp')
    os.mkdir(join_path('temp', 'Images'))
    os.mkdir(join_path('temp', 'Annotations'))
    c = 0
    for f in os.listdir('Images'):
        img_name,_ = os.path.splitext(f)
        os.rename(join_path('Annotations', img_name+'.xml'), join_path('temp', 'Annotations', img_name+'.xml'))
        os.rename(join_path('Images', f), join_path('temp', 'Images', f))
        try:
            with cwd('temp'):
                ! augoment -exclusion_threshold=1 -batch_size=1 -blur=2 >good 2>bad
                ! echo {f} > boh
                ! grep panic bad >> boh
                tt = ! grep panic bad
                if len(tt) != 0:
                    ! cat boh >> out_good
                    ! echo "" >> out_good
        except Exception as e:
            print(f, str(e))
        finally:
            os.remove(join_path('temp', 'Annotations', img_name+'.xml'))
            os.remove(join_path('temp', 'Images', f))
        print(c)
        c += 1

# END_TO_DELETE

# Flag Detection

Folder structure:  
-train  
-validation  
-test  

In each folder one can find images and annotations, separated.

In [None]:
! git clone https://github.com/guglielmoG/windspeed.git

## Data Preprocessing

### Folder Structure Creation
Divide in training, validation and test set, with approximately 75%, 15% and 10% respectively.

In [33]:
np.random.seed(3456)

def make_augoment_dir(name):
    os.mkdir(name)
    os.mkdir(join_path(name, 'Images'))
    os.mkdir(join_path(name, 'Annotations'))
    
with cwd(repo, 'data', 'flags'):
    make_augoment_dir('train')
    make_augoment_dir('valid')
    make_augoment_dir('test')
    imgs = os.listdir('Images')
    n = len(imgs)
    #use random indexes to shuffle the images
    idx = np.arange(n)
    np.random.shuffle(idx)
    for i in range(n):
        img = imgs[idx[i]]
        img_name, _ = os.path.splitext(img)
        if i < n * 0.75:
            os.rename(join_path('Images', img), join_path('train', 'Images', img))
            os.rename(join_path('Annotations', img_name + '.xml'), join_path('train', 'Annotations', img_name + '.xml'))
        elif i < n * 0.9:
            os.rename(join_path('Images', img), join_path('valid', 'Images', img))
            os.rename(join_path('Annotations', img_name + '.xml'), join_path('valid', 'Annotations', img_name + '.xml'))
        else:
            os.rename(join_path('Images', img), join_path('test', 'Images', img))
            os.rename(join_path('Annotations', img_name + '.xml'), join_path('test', 'Annotations', img_name + '.xml'))
    os.rmdir('Images')
    os.rmdir('Annotations')

PermissionError: [WinError 5] Access is denied: 'Images'

### Augmentation
NB: if one would like to obtain different blur values for a given image (i.e. produce say 3 new blurred images out of every image, with different blur values), they would need to rename previosly created blurred images, because they are overwritten.

In [None]:
def convert_to_jpg(path):
    dir, file = os.path.split(path) 
    img_name,_ = os.path.splitext(file)
    img = PIL.Image.open(path)
    img = img.convert('RGB')
    img.save(join_path(dir, img_name + '.jpg'))
    img.close()

#Augment dataset
def augoment(path):
    files = os.listdir(path)
    if not ('Images' in files or 'Annotations' in path):
        raise Exception(1)

    #cast png to jpg
    pngs = glob.glob(join_path(path, 'Images/*.png'))
    pngs.extend(glob.glob(join_path(path, 'Images/*.PNG')))
    for png in pngs:
        convert_to_jpg(png)
        os.remove(png)

    #cast jpeg to jpg
    pngs = glob.glob(join_path(path, 'Images/*.jpeg'))
    for png in pngs:
        convert_to_jpg(png)
        os.remove(png)
        
    #augmentation
    with cwd(path):
        ! augoment -blur=2 

In [None]:
augoment(join_path(repo,'data','flags','train'))
augoment(join_path(repo,'data','flags','valid'))

### YOLO Preprocessing

Clone the repo, and compile code with CUDA.

In [None]:
! git clone https://github.com/pjreddie/darknet.git

with cwd('darknet'):
    ! sed -i 's/GPU=0/GPU=1/' Makefile
    ! sed -i 's/CUDNN=0/CUDNN=1/' Makefile
    ! make
    
#copy needed files
os.rename(join_path('darknet', 'darknet'), join_path(repo, 'darknet'))
os.mkdir(join_path(repo, 'backup'))
os.mkdir(join_path(repo, 'results'))

Define functions to convert xml annotations into YOLO input format. NB: labels need to be left inside images folder

In [6]:
#size: image size (width, height)
#box: box coordinates (xmin, xmax, ymin, ymax)
def convert(size, box):                                                                                                     
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

#create YOLO label files
def convert_annotation(ann_path, outdir=''):    
    img_name,_ = os.path.splitext(os.path.basename(ann_path))
    tree = ElementTree.parse(ann_path)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        cls = obj.find('name').text
        if cls not in detection_classes:
            print('WARNING: skipped BBox of image %s with undefined class'%(img_name) , cls)
            continue
        cls_id = detection_classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        with open(join_path(outdir, img_name + ".txt"), 'w') as writer:
            writer.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

Create input txt files containing images paths.

In [None]:
def collect_img_path(in_dir, out_path):
    l = glob.glob(join_path(in_dir, '*[!.txt]'))
    with open(out_path, 'w') as w:
        w.write('\n'.join(l))

Combining everything

In [8]:
#converting annotations
with cwd(repo, 'data', 'flags'):
    for f in os.listdir(): #train, valid, test
        if os.path.isdir(f) and f in ['train', 'valid']:
            for xml in os.listdir(join_path(f, 'AugmentedAnnotations')):
                convert_annotation(join_path(f, 'AugmentedAnnotations', xml), join_path(f, 'AugmentedImages'))

#creating input paths to images
with cwd(repo):
    collect_img_path(join_path('data','flags','train','AugmentedImages'), join_path('data','flags','train.txt'))
    collect_img_path(join_path('data','flags','valid','AugmentedImages'), join_path('data','flags','valid.txt'))

'D:\\ME\\gugli\\Bocconi\\5\\1_sem\\comp_vision\\project\\C_lib'

The structure of _cfg/flag.data_ is as follows
```
  classes = 1
  train  = <path-to-data>/train.txt
  valid  = <path-to-data>/valid.txt
  names = <path-to-data>/flag.names
  backup = backup
```
_flag.names_ should contain the list of classes, one per line.  

Download weights for the classifier (Darknet-53)

In [11]:
! wget https://pjreddie.com/media/files/darknet53.conv.74
os.rename('darknet53.conv.74', join_path(repo,'cfg','darknet53.conv.74'))

Edit the cfg file for _yolo-v3_. Need to set adeguate `batch` (number of images per training step) and `subdivisions` (further subdivide the batch to speedup training) values according to the hardware at disposal. Then need to update `classes` in yolo layer to match the number of classes that needs to be predicted. Finally, adjust `filters` amount in convolutional layer prior to yolo layer, to match the updated number of classes, according to the formula:
```
filters = (classes + 5)*3
```
The file _yolo-v3.cfg_ provided is already setup in such a way.  

Finally, we can train the model.

In [None]:
with cwd(repo):
    ! chmod 755 darknet
    ! ./darknet detector train cfg/flag.data cfg/yolov3.cfg cfg/darknet53.conv.74 -dont_show