<a href="https://colab.research.google.com/github/asteinig4018/BirdWatcher/blob/master/makeModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Make a Model
The purpose of this notebook is to create a TFlite model for Raspberry Pi to detect birds (ideally in an image and not individually). I'll try to use the Caltech set. 

In [1]:
#in progress: gather requirements
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import PIL
import PIL.Image
from tensorflow.keras.preprocessing import image_dataset_from_directory
import tensorflow_datasets as tfds

In [2]:
#idk check the version
print(tf.__version__)

2.7.0


# Build the Dataset

## Downloading the dataset:

In [3]:
#easy way but not compatible with yolo as far as I can tell
# bird_train_data = tfds.load('caltech_birds2011', split='train', shuffle_files=True)
# bird_test_data = tfds.load('caltech_birds2011', split='test', shuffle_files=False)

In [3]:
#physicall download
!gdown https://drive.google.com/u/0/uc?id=1hbzc_P1FuxMkcabkgn9ZKinBwW683j45

Downloading...
From: https://drive.google.com/u/0/uc?id=1hbzc_P1FuxMkcabkgn9ZKinBwW683j45
To: /content/CUB_200_2011.tgz
100% 1.15G/1.15G [00:07<00:00, 152MB/s]


In [4]:
!tar -xzf /content/CUB_200_2011.tgz

In [5]:
#add some more image manipulation packages
!pip install imagesize
import imagesize



In [6]:
#use a class to track data so we can create yolo specific data structures
class Yolo_Image:

  def __init__(self, filename, subdir):
    self.bb = list()
    self.filename = filename
    self.subdir = subdir
    #print(self.filename)
    self.set_img_dim()

  def add_bb(self, bx, by, w, h):
    self.bb = [float(bx)/self.width, float(by)/self.height, 
               float(w)/self.width, float(h)/self.height]

  def get_bb(self):
    return self.bb

  #get the 0 index class id
  def add_class(self, classname):
    self.classnum = int(classname) - 1

  def get_class(self):
    return self.classnum

  def get_txt_filename(self):
    return os.path.splitext(self.filename)[0] + ".txt"

  def get_jpg_filename(self):
    return self.filename

  #these we'll use to set whether they're part of the training or testing set
  def set_test_train(self, t_or_t):
    self.tetr = int(t_or_t)
    #print(self.tetr)

  def get_test_train(self):
    return self.tetr

  def set_img_dim(self):
    self.width, self.height = imagesize.get("/content/CUB_200_2011/images/" + self.subdir + "/" + self.filename)



In [7]:
#create yolo classes for each image
YImages = []
lines = []
with open('/content/CUB_200_2011/images.txt', 'r') as f:
  lines = f.readlines()

for line in lines:
  YImages.append(Yolo_Image((line.split()[1]).split('/')[1], 
                 (line.split()[1]).split('/')[0]))

#now that we have them all in order, let's add the classes
with open('/content/CUB_200_2011/image_class_labels.txt', 'r') as f:
  lines = f.readlines()

indexer = 0
for line in lines:
  YImages[indexer].add_class(line.split()[1])
  indexer+=1

#get bounding boxes
with open('/content/CUB_200_2011/bounding_boxes.txt', 'r') as f:
  lines = f.readlines()

indexer = 0
for line in lines:
  parts = line.split()
  YImages[indexer].add_bb(parts[1], parts[2], parts[3], parts[4])
  indexer+=1

#get whether images should be in test or train dataset
with open('/content/CUB_200_2011/train_test_split.txt', 'r') as f:
  lines = f.readlines()

indexer = 0
for line in lines:
  YImages[indexer].set_test_train(line.split()[1])
  indexer += 1



In [8]:
#let's organize the pictures and labels
!mkdir /content/dataset-train
!mkdir /content/dataset-test
!mkdir /content/dataset-train/images
!mkdir /content/dataset-train/labels
!mkdir /content/dataset-test/images
!mkdir /content/dataset-test/labels

mkdir: cannot create directory ‘/content/dataset-train’: File exists
mkdir: cannot create directory ‘/content/dataset-test’: File exists
mkdir: cannot create directory ‘/content/dataset-train/images’: File exists
mkdir: cannot create directory ‘/content/dataset-train/labels’: File exists
mkdir: cannot create directory ‘/content/dataset-test/images’: File exists
mkdir: cannot create directory ‘/content/dataset-test/labels’: File exists


In [9]:
!mv /content/CUB_200_2011/images/**/*.jpg /content/dataset-train/images/

In [10]:
#create label files and move testing data
import shutil

#need to create this folder
LOCATION_PREFIX_TRAIN_L = "/content/dataset-train/labels/"
LOCATION_PREFIX_TEST_L = "/content/dataset-test/labels/"

LOCATION_PREFIX_TRAIN_I = "/content/dataset-train/images/"
LOCATION_PREFIX_TEST_I = "/content/dataset-test/images/"

for yimg in YImages:
  f = None
  if yimg.get_test_train() == 1:
    f = open(LOCATION_PREFIX_TRAIN_L + yimg.get_txt_filename(), 'w')
  else:
    #it is a testing image
    f = open(LOCATION_PREFIX_TEST_L + yimg.get_txt_filename(), 'w')
    #and let's move the corresponding image
    shutil.move(LOCATION_PREFIX_TRAIN_I + yimg.get_jpg_filename(), 
                LOCATION_PREFIX_TEST_I + yimg.get_jpg_filename())
    
  f.write(str(yimg.get_class()) + " ")
  for x in yimg.get_bb():
    f.write(str(x) + " ")

  f.close()

In [11]:
#get all the class names
lines = []
with open('/content/CUB_200_2011/classes.txt','r') as f:
  lines = f.readlines()

classes = []
for line in lines:
  classes.append(line.split()[1])



In [12]:
#make the yaml
f = open('/content/dataset.yaml', 'w')
f.write("train: /content/dataset-train/images\n")
f.write("val: /content/dataset-train/images\n")
f.write("test: /content/dataset-test/images\n\n")
f.write("nc: "+ str(len(classes)) + "\n\n")
f.write("names: " + str(classes) + "\n")
f.close()

# Get YOLOv5 ver 6 

In [13]:
#clone YOLOv5 and 
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
%pip install -qr requirements.txt
import torch
from IPython.display import Image, clear_output # for showing images

Cloning into 'yolov5'...
remote: Enumerating objects: 10671, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 10671 (delta 7), reused 10 (delta 3), pack-reused 10649[K
Receiving objects: 100% (10671/10671), 10.88 MiB | 10.95 MiB/s, done.
Resolving deltas: 100% (7361/7361), done.
/content/yolov5
[K     |████████████████████████████████| 596 kB 4.6 MB/s 
[?25h

## Actually do the training:


*   img = set our max image size which works even though we have variable image sizes (the unused part of the square is padded with gray
*   batch = this depends on the gpu/architecture/how we want to train it
*   epochs = how long we want to train it - longer is better up to a point
*   data = point to the yaml above
*   weights = which weights we want to start with
*   cache = we can turn this on so it is faster but it will also take more RAM



In [14]:

!python train.py --img 640 --batch 16 --epochs 25 --data /content/dataset.yaml --weights yolov5n.pt

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[34m[1mtrain: [0mweights=yolov5n.pt, cfg=, data=/content/dataset.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=25, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.0-212-gff8646c torch 1.10.0+cu111 CPU

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.1, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_p

In [None]:
# Start tensorboard
# Launch after you have started training
# logs save in the folder "runs"
%load_ext tensorboard
%tensorboard --logdir runs

## Test
Use our test dataset for this

In [None]:
!python test.py --data /content/daatset.yaml --weights TBD.pt

# Export 
Export the weights and network so we can use it on another device.

In [None]:
!python export.py --weights TBD.pt --include tflight onnx --simplify