# YOLOv4 Data Mining Method

In [None]:
# !rm -r train/
# !rm -r test/
# !rm *.csv
# !rm *.txt
# !rm *.py
# !rm *.weights
# !rm -r darknet/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Installing dependencies

In [None]:
!pip3 install boto3

## Importing Libraries

In [None]:
import cv2, os
import pandas as pd

## Cloning & Staging YOLO Framework

In [None]:
!ls
!cd /content
!rm -fr darknet
!git clone https://github.com/AlexeyAB/darknet/
% cd darknet
!sed -i 's/OPENCV=0/OPENCV=1/g' Makefile
!sed -i 's/GPU=0/GPU=1/g' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/g' Makefile
!apt update
!apt-get install libopencv-dev

In [None]:
!make &> compile.log
%cd ..

In [None]:
# %cd ..

### Data Set Preparation
- Initializing variables
- Create Directory Structure
- Get annotations csv file 
- Randomly select images for people and boxes
- Prepare Image List
- Download Images
- Resize Images
- Generate Image annotations labes
- Create train.txt and valid.txt files
- Create obj.names and obj.data

In [None]:
# Initializing variables
classes = [('box', '/m/025dyy', 0),
           ('person', '/m/01g317', 1)]
# classes = [('person', '/m/01g317', 0)]
partitions = ['train', 'validation', 'test']
img_folders = ['images'] 
# v0002
cols_to_keep_0 = ['ImageID', 'LabelName']
cols_to_keep_1 = ['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin','YMax']

# cols_to_keep_1 = ['path', 'x1', 'y1',	'w', 'h', 'class']
#v0001
# cols_to_keep_0 = ['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin','YMax']

train_max_images = 6000 # 100
train_img_format = 'YOLO' # 'Keras'
validation_size = 0.2
obj_path = 'darknet/data/obj'
data_path = 'darknet/data'


In [None]:
# Creating Directory Structure
for partition in partitions:
  
  for folder in img_folders:
    try:
      os.mkdir("{}_{}".format(partition, folder))
    except:
      print("dir {}_{} exists".format(partition, folder))

try:
  os.mkdir('darknet/data/obj')
except:
  print('darknet/data/obj')

In [None]:
# Getting annotations files
!curl -O https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv
!curl -O https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv
!curl -O https://storage.googleapis.com/openimages/v5/test-annotations-bbox.csv
!curl -O https://raw.githubusercontent.com/openimages/dataset/master/downloader.py 
!mv oidv6-train-annotations-bbox.csv train-annotations-bbox.csv

In [None]:
# df = pd.read_csv('train-annotations-bbox.csv')[cols_to_keep_0]
# df = df.loc[df['LabelName'].isin(['/m/025dyy','/m/01g317'])]
# df.head()

In [None]:
# df.loc[df['LabelName']=='/m/025dyy'].count()
# # len(df['ImageID'].unique())

In [None]:
# v002 - Creating Main Image Dataframe  train/test img_list and Pre staging Train_Images.txt list from Annotation Box
partition_info = []
img_df = pd.DataFrame()

for partition in partitions:
  max_images = train_max_images if partition == 'train' else round(train_max_images * validation_size)
  part_fname = '{}-annotations-bbox.csv'.format(partition)

  df = pd.read_csv(part_fname)[cols_to_keep_0]
 
  df.drop_duplicates('ImageID', keep='first', inplace=True)
    
  for i, cls in enumerate(classes):
    curr_df = df.loc[df['LabelName'] == cls[1]].copy()
    
    if curr_df.count().max() > max_images:
      curr_df = curr_df.sample(max_images)
    
    # if partition !=  'test':
    # class_sizes.append(df_t.count().max())
    partition_info.append((partition, cls[0], curr_df.count().max()))

    curr_df['partition'] = partition
    curr_df['class'] = cls[0]
    img_df = img_df.append(curr_df, ignore_index=True)
  
  
img_df.drop_duplicates('ImageID', keep='first', inplace=True)    
img_df['path'] = img_df.apply(lambda r: os.path.join('{}_images'.format(r['partition']), '{}.jpg'.format(r['ImageID'])), axis=1)
img_df['download_fmt'] = img_df.apply(lambda r: '{}/{}'.format(r['partition'], r['ImageID']), axis=1)
img_df['obj_path'] = img_df.apply(lambda r: '{}/{}.jpg'.format(obj_path, r['ImageID']), axis=1)
img_df['obj_rel_path'] = img_df.apply(lambda r: '{}/{}.jpg'.format(obj_path[8:], r['ImageID']), axis=1)
print('Image Dataframe ready!')
print(partition_info)
 

In [None]:
img_df.head()

In [None]:
 #v002 create list of image to download
 for partition in partitions:
  fname = "{}_img_list.txt".format(partition)
  img_df.loc[img_df['partition'] == partition]['download_fmt'].to_csv(fname, index = False, header = False)
  print("{} Ready".format(fname))


In [None]:
# Only if test & train folder are not present Uncomment and Run this cell
!python downloader.py train_img_list.txt --download_folder=train_images
!python downloader.py validation_img_list.txt --download_folder=validation_images
!python downloader.py test_img_list.txt --download_folder=test_images

!rm train_img_list.txt +
!rm validation_img_list.txt
!rm test_img_list.txt
!rm downloader.py

In [None]:
# count image files in images folder
!ls train_images | wc -l
!ls validation_images | wc -l
!ls test_images | wc -l

In [None]:
#v002 create annotations dataframe
annot_df = pd.DataFrame()
for partition in partitions:
  if partition != 'test':
    part_fname = '{}-annotations-bbox.csv'.format(partition)
    curr_df = pd.read_csv(part_fname)[cols_to_keep_1]
    
    imgid_lst = list(img_df.loc[img_df['partition'] == partition]['ImageID'])
    class_labels = [c[1] for c in classes]
    
    curr_df = curr_df.loc[curr_df['ImageID'].isin(imgid_lst)]
    curr_df = curr_df.loc[curr_df['LabelName'].isin(class_labels)]
    curr_df['partition'] = partition
    curr_df['class'] = 0

    annot_df = annot_df.append(curr_df, ignore_index = True)

  os.remove('{}-annotations-bbox.csv'.format(partition))

for i, cls in enumerate(classes):
  if i != 0:
    annot_df.loc[annot_df['LabelName'] == cls[1],'class'] = i

print('Annotations DataFrame Ready!')

In [None]:
# annot_df.loc[annot_df['LabelName'] == '/m/01g317']
# annot_df.loc[annot_df['LabelName'] == '/m/025dyy']
# '/m/01g317'
# '/m/025dyy'

In [None]:
#v002 resize images
size_ratio = 0.7

def resize_img(r, ratio):
  img_path = r['path']

  img = cv2.imread(img_path)
  
  h, w, _ = img.shape
  
  h = int(h * ratio)
  w = int(w * ratio)
  
  _, img_name = os.path.split(img_path)
  img_path = os.path.join(obj_path, img_name)
  sml_img = cv2.resize(img, (w, h))
  cv2.imwrite(img_path, sml_img)

resiz_df = img_df.loc[img_df['partition'].isin(['train', 'validation'])]
resiz_df.apply(lambda r: resize_img(r, size_ratio), axis=1)
print('Resize Ready')

In [None]:
# v002 create image label
def create_img_label(r):
  fmt =  '{} {} {} {} {}'.format(r['class'], r['XMin'] + ((r['XMax'] - r['XMin'])/2), r['YMin'] + ((r['YMax'] - r['YMin'])/2), r['XMax'] - r['XMin'], r['YMax'] - r['YMin'])
  curr_file = open(os.path.join(obj_path, '{}.txt'.format(r['ImageID'])), 'a')
  curr_file.write('{}\n'.format(fmt))
  curr_file.close()

annot_df.apply(lambda r: create_img_label(r), axis=1)
print('Image Labels Ready')

In [None]:
# v002 create train and valid txt files
img_df[img_df['partition'] == 'train']['obj_rel_path'].to_csv('darknet/data/train.txt', index=False, header=False)
img_df[img_df['partition'] == 'validation']['obj_rel_path'].to_csv('darknet/data/valid.txt', index=False, header=False)
print('train and valid txt files Ready')

In [None]:
# Creating obj.names
file_objnames = open('darknet/data/obj.names', 'a')
for i, cls in enumerate(classes):
  file_objnames.write(cls[0] + '\n')
file_objnames.close()

In [None]:
# Creating obj.data
file_objdata = open('darknet/data/obj.data', 'w')
text = '''classes = {}
train  = data/train.txt
valid  = data/valid.txt
names = data/obj.names
backup = backup/'''.format(len(classes))
file_objdata.write(text)
file_objdata.close()

In [None]:
# count image files in small_images folder
!ls darknet/data/obj | wc -l

## Train the data
- Get pre-trained weights
- Train the data

In [None]:
!cp drive/MyDrive/Colab\ Notebooks/CV003/yolo-obj.cfg darknet/cfg

In [None]:
% cd darknet
!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137

In [None]:
!./darknet detector train ./data/obj.data ./cfg/yolo-obj.cfg ./yolov4.conv.137 -dont_show

In [None]:
!cp data/obj.data ../drive/MyDrive/Colab\ Notebooks/CV003
!cp data/obj.names ../drive/MyDrive/Colab\ Notebooks/CV003
!cp cfg/yolo-obj.cfg ../drive/MyDrive/Colab\ Notebooks/CV003
!cp backup/* ../drive/MyDrive/Colab\ Notebooks/CV003
!cp data/obj/* ../drive/MyDrive/Colab\ Notebooks/CV003/obj
!cp data/train.txt ../drive/MyDrive/Colab\ Notebooks/CV003
!cp data/valid.txt ../drive/MyDrive/Colab\ Notebooks/CV003

In [None]:
### predictions
!./darknet detector test data/obj.data cfg/yolo-obj.cfg backup/yolo-obj_1000.weights ../test03.jpeg -thresh 0.2

In [None]:
!cp predictions03_1_20.428t20.jpg ../drive/MyDrive/Colab\ Notebooks/Results_CV003

In [None]:
!./darknet detector map data/obj.data cfg/yolo-obj.cfg backup/yolo-obj_3000.weights

In [None]:
#!wget https://pjreddie.com/media/files/yolov3.weights
#!./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg

In [None]:
#!nvidia-smi

In [None]:
#!git clone https://github.com/pjreddie/darknet.git
#!git clone https://github.com/AlexeyAB/darknet.git
# weights
# !wget https://pjreddie.com/media/files/darknet53.conv.74