# Loading data & packages 

In [1]:
import numpy as np 
import pandas as pd 
import os
import ast
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import shutil
import matplotlib.pyplot as plt 

In [2]:
data_path='/content/drive/My Drive/global-wheat-detection/'
for dirname, _, filenames in os.walk(data_path):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/content/drive/My Drive/global-wheat-detection/sample_submission.csv
/content/drive/My Drive/global-wheat-detection/train.csv
/content/drive/My Drive/global-wheat-detection/test/348a992bb.jpg
/content/drive/My Drive/global-wheat-detection/test/2fd875eaa.jpg
/content/drive/My Drive/global-wheat-detection/test/51b3e36ab.jpg
/content/drive/My Drive/global-wheat-detection/test/cc3532ff6.jpg
/content/drive/My Drive/global-wheat-detection/test/51f1be19e.jpg
/content/drive/My Drive/global-wheat-detection/test/53f253011.jpg
/content/drive/My Drive/global-wheat-detection/test/f5a1f0358.jpg
/content/drive/My Drive/global-wheat-detection/test/cb8d261a3.jpg
/content/drive/My Drive/global-wheat-detection/test/796707dd7.jpg
/content/drive/My Drive/global-wheat-detection/test/aac893a91.jpg
/content/drive/My Drive/global-wheat-detection/train/dc50efc2c.jpg
/content/drive/My Drive/global-wheat-detection/train/df53b43f1.jpg
/content/drive/My Drive/global-wheat-detection/train/e2a3ba358.jpg
/content/driv

In [3]:
df=pd.read_csv(os.path.join(data_path, 'train.csv'))
df.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147793 entries, 0 to 147792
Data columns (total 5 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   image_id  147793 non-null  object
 1   width     147793 non-null  int64 
 2   height    147793 non-null  int64 
 3   bbox      147793 non-null  object
 4   source    147793 non-null  object
dtypes: int64(2), object(3)
memory usage: 5.6+ MB


In [5]:
def crate_df(data_frame):
  #the bounding oxes are given in strings format ---> need to be formated as list 
  #the simplest way to convert str to list
  data_frame['bbox']=data_frame['bbox'].apply(ast.literal_eval)
  data_frame=data_frame.groupby('image_id')['bbox'].apply(list).reset_index(name='bboxes')
  df_train,df_val=train_test_split(data_frame, test_size=0.2,random_state=42,shuffle=True)
  df_train=df_train.reset_index(drop=True)
  df_val=df_val.reset_index(drop=True)
  return df_train,df_val

In [6]:
df_train,df_val=crate_df(df)

In [7]:
print('#######train data frame #########')
print(df_train)
print('#######validation data frame_df#########')
print(df_val)

#######train data frame #########
       image_id                                             bboxes
0     e99667355  [[614.0, 360.0, 63.0, 93.0], [195.0, 352.0, 12...
1     d03043abb  [[212.0, 852.0, 58.0, 87.0], [279.0, 861.0, 84...
2     d728e2fc2  [[62.0, 117.0, 105.0, 102.0], [0.0, 760.0, 61....
3     3733d0e53  [[849.0, 92.0, 142.0, 79.0], [707.0, 132.0, 95...
4     b7c9166b6  [[62.0, 665.0, 72.0, 142.0], [0.0, 951.0, 74.0...
...         ...                                                ...
2693  536ef8d03  [[483.0, 4.0, 144.0, 92.0], [50.0, 246.0, 150....
2694  572a469e9  [[458.0, 619.0, 109.0, 90.0], [825.0, 604.0, 8...
2695  63c4e8cda  [[84, 83, 190, 123], [260, 56, 181, 145], [7, ...
2696  444977945  [[542.0, 0.0, 87.0, 46.0], [529.0, 175.0, 90.0...
2697  f1144b38d  [[724.0, 17.0, 87.0, 67.0], [617.0, 812.0, 114...

[2698 rows x 2 columns]
#######validation data frame_df#########
      image_id                                             bboxes
0    f3c5e09ab  [[785.0, 41.0,

# Cloning the yolo v5 repo & preparing configurations & changes 

### cloning repo 

In [8]:
!git clone https://github.com/ultralytics/yolov5.git

Cloning into 'yolov5'...
remote: Enumerating objects: 2679, done.[K
remote: Total 2679 (delta 0), reused 0 (delta 0), pack-reused 2679[K
Receiving objects: 100% (2679/2679), 5.42 MiB | 1.19 MiB/s, done.
Resolving deltas: 100% (1771/1771), done.


now we need to format our data properly, YOLO expect data in a specific order and format for that we are going to create the following :

-create new directory : wheat_data

-inside wheat_data we create 2 new directorys images & labels 

-inside teise directorys we create on each one train & validation directorys

In [9]:
pwd

'/content'

In [10]:
cd /content/yolov5

/content/yolov5


In [11]:
ls

[0m[01;34mdata[0m/       hubconf.py  [01;34mmodels[0m/            sotabench.py  tutorial.ipynb
detect.py   [01;34minference[0m/  [01;32mREADME.md[0m*         test.py       [01;34mutils[0m/
Dockerfile  LICENSE     [01;32mrequirements.txt[0m*  train.py      [01;34mweights[0m/


### installing requirements 

In [12]:
!pip install -r requirements.txt

Collecting PyYAML>=5.3
[?25l  Downloading https://files.pythonhosted.org/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)
[K     |████████████████████████████████| 276kB 2.7MB/s 
Building wheels for collected packages: PyYAML
  Building wheel for PyYAML (setup.py) ... [?25l[?25hdone
  Created wheel for PyYAML: filename=PyYAML-5.3.1-cp36-cp36m-linux_x86_64.whl size=44619 sha256=8b8111621c3ce76c249cfa815ef268677430d4141b0882fea98c40e3c936721a
  Stored in directory: /root/.cache/pip/wheels/a7/c1/ea/cf5bd31012e735dc1dfea3131a2d5eae7978b251083d6247bd
Successfully built PyYAML
Installing collected packages: PyYAML
  Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-5.3.1


In [13]:
mkdir wheat_data

In [14]:
cd wheat_data/

/content/yolov5/wheat_data


In [15]:
mkdir images

In [16]:
mkdir labels

In [17]:
cd images

/content/yolov5/wheat_data/images


In [18]:
mkdir train

In [19]:
mkdir validation

In [20]:
cd ../labels

/content/yolov5/wheat_data/labels


In [21]:
mkdir train

In [22]:
mkdir validation

In [23]:
cd ../

/content/yolov5/wheat_data


### loading data in specific format

Another thing to mention is that the yolov5 expect the data in the form 
label, x_center, y_center, with ,hight

In [24]:
out_path='/content/yolov5/wheat_data/'
def proc_data(data, data_type='train'):
  #here we are going to iterate over the data columns
  for _,row in tqdm(data.iterrows() ,
                    total=len(data) ):
    image_name=row["image_id"]
    bounding_boxes=row['bboxes']
    yolo_data=[]
    for bbox in bounding_boxes:
      x=bbox[0]
      y=bbox[1]
      w=bbox[2]
      h=bbox[3]
      x_center=x+w/2
      y_center=y+h/2
      x_center/=1024.0
      y_center/=1024.0
      w/=1024.0
      h/=1024.0
      #now we append in the correct format in this part problem we have only one class 0
      yolo_data.append([0, x_center,y_center, w,h])
    yolo_data=np.array(yolo_data)
    np.savetxt(
        os.path.join(out_path,f"labels/{data_type}/{image_name}.txt ") ,yolo_data,fmt=["%d","%f","%f","%f","%f"] 
    )
    #copy img into the images folder 
    shutil.copyfile(
        os.path.join(data_path,f"train/{image_name}.jpg" ) , 
        os.path.join(out_path,f"images/{data_type}/{image_name}.jpg" ) 
    )

In [25]:
def create_yolo_data_prefered_format():
  proc_data(df_train, data_type='train')
  proc_data(df_val, data_type='validation')
create_yolo_data_prefered_format()

100%|██████████| 2698/2698 [30:45<00:00,  1.46it/s]
100%|██████████| 675/675 [07:40<00:00,  1.46it/s]


checking that did work 

In [26]:
cd /content/yolov5/wheat_data/

/content/yolov5/wheat_data


In [27]:
cd images

/content/yolov5/wheat_data/images


In [28]:
cd train

/content/yolov5/wheat_data/images/train


In [29]:
ls

005b0d8bb.jpg  38b19d4f5.jpg  69d37b0ba.jpg  9c0466c9f.jpg  ccd16cf61.jpg
006a994f7.jpg  38b259a9e.jpg  69e509038.jpg  9c05c8d56.jpg  ccd5d31da.jpg
00764ad5d.jpg  38dc44b85.jpg  69fc3d3ff.jpg  9c29fd766.jpg  cce5cb631.jpg
00b5fefed.jpg  38eb35232.jpg  6a0e4e8cf.jpg  9c527c135.jpg  ccfff0629.jpg
00b70a919.jpg  38ed976da.jpg  6a1ad8811.jpg  9c72bafb6.jpg  cd243b0b0.jpg
00e903abe.jpg  38fd197f0.jpg  6a2eac670.jpg  9c832432c.jpg  cd2578a38.jpg
00ea5e5ee.jpg  3918cb6de.jpg  6a4d4d9bd.jpg  9cadbf983.jpg  cdc68ad10.jpg
010b216d4.jpg  3940de6a5.jpg  6a614e72e.jpg  9cb0a5504.jpg  cdd3bc83b.jpg
010c93b99.jpg  39869da63.jpg  6a7cbd408.jpg  9d0eb23a0.jpg  ce2e2fe79.jpg
010dbcc8f.jpg  398708128.jpg  6a82e6e98.jpg  9d0f63de7.jpg  ce3999eb9.jpg
0114c88aa.jpg  399260323.jpg  6a8522f06.jpg  9d137bb37.jpg  ce7375bad.jpg
01189a3c3.jpg  399fce7aa.jpg  6a888dbcf.jpg  9d63ad423.jpg  ce78f7f47.jpg
0126b7d11.jpg  39ad9affa.jpg  6a8a08409.jpg  9d78cb5d0.jpg  ce803c694.jpg
01397a84c.jpg  39f0b1003.jpg  6a9534cf

In [30]:
cd ../

/content/yolov5/wheat_data/images


In [31]:
cd validation

/content/yolov5/wheat_data/images/validation


In [32]:
ls

00333207f.jpg  2c836cccb.jpg  614b1d8c5.jpg  9785e4e53.jpg  ca56c6d41.jpg
013669953.jpg  2ccb37ea9.jpg  61bcac952.jpg  98db75bde.jpg  cb3236ed5.jpg
015939012.jpg  2cffefd1a.jpg  6244395ed.jpg  9a1947520.jpg  cc4d168f6.jpg
027086635.jpg  2d06b68f0.jpg  62ec8d906.jpg  9a9f42302.jpg  ccf2bea3e.jpg
02992044c.jpg  2d1b6ce2a.jpg  63467d323.jpg  9ab4727a2.jpg  ccf5706ef.jpg
02d662fa8.jpg  2d72a5f04.jpg  63c658201.jpg  9ae3752f9.jpg  ce367f7d6.jpg
02e02e347.jpg  2d990708e.jpg  654a741f5.jpg  9b4ad2587.jpg  ce4124be0.jpg
02f0fe2ca.jpg  2eaac7a41.jpg  657f6bf98.jpg  9ba7810c6.jpg  ce69098f5.jpg
02fe8c28e.jpg  2ed94451a.jpg  667b4a999.jpg  9bbee18cc.jpg  cea58b404.jpg
038432e90.jpg  2f03889ab.jpg  668149d6b.jpg  9cd2ec689.jpg  ceb6406ab.jpg
03a177626.jpg  2f1c4be01.jpg  67e82d536.jpg  9ce6888c9.jpg  cf0401699.jpg
03ad2a35c.jpg  2f53b1a20.jpg  688e051dc.jpg  9d8f6d301.jpg  cf3a2a32d.jpg
03dd40d7b.jpg  2f9755f66.jpg  6895c563e.jpg  9ddb20382.jpg  cf770a944.jpg
041707ba8.jpg  3014e5cad.jpg  69257057

In [33]:
cd /content/yolov5/wheat_data/labels/train

/content/yolov5/wheat_data/labels/train


In [34]:
ls

'005b0d8bb.txt '  '45a13cbcd.txt '  '83995f53f.txt '  'c09caecc1.txt '
'006a994f7.txt '  '45a203e70.txt '  '83aae58fa.txt '  'c0a6307fa.txt '
'00764ad5d.txt '  '45a6506a3.txt '  '83b801148.txt '  'c0ad0793a.txt '
'00b5fefed.txt '  '45d452a61.txt '  '83bfee8fd.txt '  'c0bc8d9f2.txt '
'00b70a919.txt '  '461376287.txt '  '84053c39b.txt '  'c0d460d6a.txt '
'00e903abe.txt '  '461d55f4b.txt '  '8405c37aa.txt '  'c0e168cd3.txt '
'00ea5e5ee.txt '  '462d4d062.txt '  '8417e5762.txt '  'c0fa19bac.txt '
'010b216d4.txt '  '463a95dac.txt '  '84411af4b.txt '  'c1279ebc9.txt '
'010c93b99.txt '  '4646dcda7.txt '  '844d5bd76.txt '  'c14c1e300.txt '
'010dbcc8f.txt '  '4655fcc18.txt '  '8473a95f7.txt '  'c1577d6ff.txt '
'0114c88aa.txt '  '46630486d.txt '  '849061b2b.txt '  'c15a563d8.txt '
'01189a3c3.txt '  '466a4369a.txt '  '8498f5a6c.txt '  'c164bb5bb.txt '
'0126b7d11.txt '  '46727d887.txt '  '84a5579db.txt '  'c1973a2fe.txt '
'01397a84c.txt '  '4676009a9.txt '  '84a6298a3.txt '  'c1a61bc88.txt '
'013fd

In [35]:
!cat e99667355.txt 
#if the output of this cell is 
#/bin/bash: cat: command not found
#just go to the file in files exp and double click on it 
#and the file will pop up
#this is a ggogle drive issue 

cat: e99667355.txt: No such file or directory


the next step is to create a config file 
we will call it weat.yaml

In [36]:
cd /content/yolov5

/content/yolov5


### creating config file

In [37]:
!touch wheat.yaml

one can simply edit the config file:
there are 4 things to specify : the train images path along with the validation , the number of clases and the names of classes (here we have a single class)

train:wheat_data/images/train

val:/content/yolov5/wheat_data/

images/validation

nc:1

names=["wheat']



In [45]:
ls

[0m[01;34mdata[0m/       [01;34minference[0m/  [01;32mrequirements.txt[0m*  tutorial.ipynb  wheat.yaml
detect.py   LICENSE     sotabench.py       [01;34mutils[0m/
Dockerfile  [01;34mmodels[0m/     test.py            [01;34mweights[0m/
hubconf.py  [01;32mREADME.md[0m*  train.py           [01;34mwheat_data[0m/


In [46]:
!cat wheat.yaml

train:wheat_data/images/train

val:/content/yolov5/wheat_data/

images/validation

nc:1

names=["wheat']

now we can display the yolov5 arich

In [47]:
cd models

/content/yolov5/models


In [48]:
ls

common.py        export.py  __init__.py  yolov5l.yaml  yolov5s.yaml
experimental.py  [0m[01;34mhub[0m/       yolo.py      yolov5m.yaml  yolov5x.yaml


In [49]:
!cat yolov5s.yaml

# parameters
nc: 80  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, BottleneckCSP, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, BottleneckCSP, [1024, False]],  # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, BottleneckCSP, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-

# Training 

run the training command 

In [50]:
cd ..

/content/yolov5


we can excute the training by the following command 
```
python train.py --img<image_size> --batch<batch_size> --epochs <number of epochs> --data <name of the data config.yaml file> --cfg <name of the model config.yaml file> --name <our exp name>
```

if this cell return an AttributeError: 'str' object has no attribute 'get' upgrade the python kernel to 3.8 in google Colab
```
!wget -O mini.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.8.2-Linux-x86_64.sh
!chmod +x mini.sh
!bash ./mini.sh -b -f -p /usr/local

```
then reinstall requirements 
```
!pip install -r requirements.txt
```

In [54]:
!wget -O mini.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.8.2-Linux-x86_64.sh
!chmod +x mini.sh
!bash ./mini.sh -b -f -p /usr/local


--2020-10-18 22:01:11--  https://repo.anaconda.com/miniconda/Miniconda3-py38_4.8.2-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.130.3, 104.16.131.3, 2606:4700::6810:8303, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.130.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 89817099 (86M) [application/x-sh]
Saving to: ‘mini.sh’


2020-10-18 22:01:12 (72.2 MB/s) - ‘mini.sh’ saved [89817099/89817099]

PREFIX=/usr/local
Unpacking payload ...
Collecting package metadata (current_repodata.json): - \ done
Solving environment: / - done

## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - _libgcc_mutex==0.1=main
    - asn1crypto==1.3.0=py38_0
    - ca-certificates==2020.1.1=0
    - certifi==2019.11.28=py38_0
    - cffi==1.14.0=py38h2e261b9_0
    - chardet==3.0.4=py38_1003
    - conda-package-handling==1.6.0=py38h7b6447c_0
    - conda==4.8.2=py38_0
    - cryptography==2.8=py38h1ba5d50_

In [56]:
!pip install -r requirements.txt

Collecting Cython
  Downloading Cython-0.29.21-cp38-cp38-manylinux1_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 2.8 MB/s 
[?25hCollecting matplotlib>=3.2.2
  Downloading matplotlib-3.3.2-cp38-cp38-manylinux1_x86_64.whl (11.6 MB)
[K     |████████████████████████████████| 11.6 MB 6.9 MB/s 
[?25hCollecting numpy>=1.18.5
  Downloading numpy-1.19.2-cp38-cp38-manylinux2010_x86_64.whl (14.5 MB)
[K     |████████████████████████████████| 14.5 MB 25 kB/s 
[?25hCollecting opencv-python>=4.1.2
  Downloading opencv_python-4.4.0.44-cp38-cp38-manylinux2014_x86_64.whl (49.5 MB)
[K     |████████████████████████████████| 49.5 MB 291 bytes/s 
[?25hCollecting pillow
  Downloading Pillow-8.0.0-cp38-cp38-manylinux1_x86_64.whl (2.2 MB)
[K     |████████████████████████████████| 2.2 MB 76.3 MB/s 
[?25hCollecting PyYAML>=5.3
  Using cached PyYAML-5.3.1.tar.gz (269 kB)
Collecting scipy>=1.4.1
  Downloading scipy-1.5.3-cp38-cp38-manylinux1_x86_64.whl (25.8 MB)
[K     |█████████

In [57]:
!python train.py --img 1024 --batch 32 --epochs 100 --data wheat.yaml --cfg models/yolov5s.yaml --name wn

Traceback (most recent call last):
  File "train.py", line 23, in <module>
    import test  # import test.py to get mAP after each epoch
  File "/content/yolov5/test.py", line 13, in <module>
    from models.experimental import attempt_load
  File "/content/yolov5/models/experimental.py", line 7, in <module>
    from models.common import Conv, DWConv
  File "/content/yolov5/models/common.py", line 8, in <module>
    from utils.datasets import letterbox
  File "/content/yolov5/utils/datasets.py", line 17, in <module>
    from utils.general import xyxy2xywh, xywh2xyxy, torch_distributed_zero_first
  File "/content/yolov5/utils/general.py", line 17, in <module>
    import matplotlib.pyplot as plt
  File "/usr/local/lib/python3.8/site-packages/matplotlib/pyplot.py", line 2336, in <module>
    switch_backend(rcParams["backend"])
  File "/usr/local/lib/python3.8/site-packages/matplotlib/pyplot.py", line 276, in switch_backend
    class backend_mod(matplotlib.backend_bases._Backend):
  File "

In [None]:
#run tensorboard 
tensorboard --logdir=runs

Now we test the model:
copy the weights into a dir

In [None]:
cd /content/yolov5/

create a directory to store the best weights in it 

In [None]:
mkdir wheat_det_project_weights 

copy in this dir the best model weights 

In [None]:
!cp /content/yolov5/runs/exp0_wheat_model/weights/best.pt .

# wheat detection testing 

to test detection run the following command 
```
python detect.py --source <a single img /a folder> --weights <model_weights.pt>
```

In [None]:
python detect.py --source /content/drive/My Drive/global-wheat-detection/test --weights best.pt
#this command will generate inference images located in 
#/content/yolov5/inference/output

if we want to display detections in images 

In [None]:
def predict_obj_det(im_path):
  image=plt.imread(im_path)
  plt.imshow(image)
  plt.show()

In [None]:
predict_obj_det(im_path)