# Препроцессинг данных в формат YOLO

In [1]:
!pip install -U wandb

Collecting wandb
  Downloading wandb-0.13.3-py2.py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
Installing collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.12.21
    Uninstalling wandb-0.12.21:
      Successfully uninstalled wandb-0.12.21
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
allennlp 2.10.0 requires protobuf==3.20.0, but you have protobuf 3.19.4 which is incompatible.
allennlp 2.10.0 requires wandb<0.13.0,>=0.10.0, but you have wandb 0.13.3 which is incompatible.[0m[31m
[0mSuccessfully installed wandb-0.13.3
[0m

In [2]:
from kaggle_secrets import UserSecretsClient
import wandb

user_secrets = UserSecretsClient()
wandb_key = user_secrets.get_secret("wandb-key")

wandb.login(key=wandb_key)
# wandb.init(project="haha_start_find_bear", name="a_metric")

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [3]:
import os
import shutil
from tqdm import tqdm
import pandas as pd
from sklearn.model_selection import train_test_split

In [4]:
train = pd.read_csv('../input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/train/train_annotations.csv')

In [5]:
category_names = ['Bier', 'Bier Maß', 'Weißbier', 'Cola', 'Wasser', 'Curry-Wurst', 'Weißwein',
                  'A-Schorle', 'Jägermeister', 'Pommes', 'Burger', 'Williamsbirne', 'Alm-Breze', 'Brotzeitkorb',
                  'Käsespätzle']

category_names = {k: v for v, k in enumerate(category_names)}

In [6]:
# Директории для сохранения файлов для обучения и валидации
!mkdir data_for_yolo
!mkdir data_for_yolo/data
!mkdir data_for_yolo/data/train
!mkdir data_for_yolo/data/val
!mkdir data_for_yolo/data/train/images
!mkdir data_for_yolo/data/train/labels
!mkdir data_for_yolo/data/val/images
!mkdir data_for_yolo/data/val/labels

In [None]:
# https://github.com/ultralytics/yolov5/issues/9194

In [83]:
yaml_content = f"""
train: ../data_for_yolo/data/train/images/
val: ../data_for_yolo/data/val/images/

# number of classes
nc: {len(category_names)}

# class names
names: {list(category_names.values())}
"""

with open('data_for_yolo/dataset.yaml', 'w') as f:
    f.write(yaml_content)

In [7]:
names_str = " \n ".join([f'{item[1]}: {item[0]}' for item in list(zip(category_names.keys(), category_names.values()))])
yaml_content = f"""
train: ../data_for_yolo/data/train/images/
val: ../data_for_yolo/data/val/images/

# number of classes
nc: {len(category_names)}

# class names
names: \n {names_str}
"""

with open('data_for_yolo/dataset.yaml', 'w') as f:
    f.write(yaml_content)

In [8]:
!cat ./data_for_yolo/dataset.yaml


train: ../data_for_yolo/data/train/images/
val: ../data_for_yolo/data/val/images/

# number of classes
nc: 15

# class names
names: 
 0: Bier 
 1: Bier Maß 
 2: Weißbier 
 3: Cola 
 4: Wasser 
 5: Curry-Wurst 
 6: Weißwein 
 7: A-Schorle 
 8: Jägermeister 
 9: Pommes 
 10: Burger 
 11: Williamsbirne 
 12: Alm-Breze 
 13: Brotzeitkorb 
 14: Käsespätzle


In [9]:
train_images_names, val_images_names = train_test_split(train.ImageID.values, 
                                                        test_size=0.1, 
                                                        random_state=1717)

print(len(train_images_names), len(val_images_names))

2426 270


In [10]:
label_s = []
for i, (_, LabelName, XMin, XMax, YMin, YMax) in train[train.ImageID == train_images_names[0]].iterrows():
    x_center = round((XMin + XMax) / 2, 3)
    y_center = round((YMin + YMax) / 2, 3)
    width = round(XMax - XMin, 3)
    height = round(YMax - YMin, 3)
    label_s.append(f'{category_names[LabelName]} {x_center} {y_center} {width} {height}')
label_s = '\n'.join(label_s)

In [11]:
print(label_s)

2 0.158 0.526 0.161 0.136
7 0.416 0.498 0.108 0.169
4 0.905 0.825 0.128 0.233


In [12]:
# yolov5 label format: class x_center y_center width height

# train
for train_image in tqdm(train_images_names):

    label_s = []
    for i, (_, LabelName, XMin, XMax, YMin, YMax) in train[train.ImageID == train_image].iterrows():
        x_center = round((XMin + XMax) / 2, 3)
        y_center = round((YMin + YMax) / 2, 3)
        width = round(XMax - XMin, 3)
        height = round(YMax - YMin, 3)
        label_s.append(f'{category_names[LabelName]} {x_center} {y_center} {width} {height}')
    label_s = '\n'.join(label_s)
    
    # labels
    label_file = train_image.replace('.jpg', '.txt')
    with open('data_for_yolo/data/train/labels/'+label_file, 'w') as f:
        f.write(label_s)

    # images 
    shutil.copy('../input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/train/'+train_image, 'data_for_yolo/data/train/images/'+train_image)

# val
for val_image in tqdm(val_images_names):

    label_s = []
    for i, (_, LabelName, XMin, XMax, YMin, YMax) in train[train.ImageID == val_image].iterrows():
        x_center = round((XMin + XMax) / 2, 3)
        y_center = round((YMin + YMax) / 2, 3)
        width = round(XMax - XMin, 3)
        height = round(YMax - YMin, 3)
        label_s.append(f'{category_names[LabelName]} {x_center} {y_center} {width} {height}')
    label_s = '\n'.join(label_s)
    
    # labels
    label_file = val_image.replace('.jpg', '.txt')
    with open('data_for_yolo/data/val/labels/'+label_file, 'w') as f:
        f.write(label_s)

    # images 
    shutil.copy('../input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/train/'+val_image, 'data_for_yolo/data/val/images/'+val_image)

100%|██████████| 2426/2426 [00:14<00:00, 165.14it/s]
100%|██████████| 270/270 [00:00<00:00, 286.08it/s]


In [13]:
# !ls ./data_for_yolo/data/val/labels/
# !cat ./data_for_yolo/data/val/labels/1526730197210_45.txt
!cat ./data_for_yolo/dataset.yaml


train: ../data_for_yolo/data/train/images/
val: ../data_for_yolo/data/val/images/

# number of classes
nc: 15

# class names
names: 
 0: Bier 
 1: Bier Maß 
 2: Weißbier 
 3: Cola 
 4: Wasser 
 5: Curry-Wurst 
 6: Weißwein 
 7: A-Schorle 
 8: Jägermeister 
 9: Pommes 
 10: Burger 
 11: Williamsbirne 
 12: Alm-Breze 
 13: Brotzeitkorb 
 14: Käsespätzle


# Обучение YOLO

In [14]:
!git clone https://github.com/ultralytics/yolov5.git
!pip install -r ./yolov5/requirements.txt

Cloning into 'yolov5'...
remote: Enumerating objects: 12398, done.[K
remote: Counting objects: 100% (106/106), done.[K
remote: Compressing objects: 100% (84/84), done.[K
remote: Total 12398 (delta 56), reused 44 (delta 22), pack-reused 12292[K
Receiving objects: 100% (12398/12398), 12.71 MiB | 8.42 MiB/s, done.
Resolving deltas: 100% (8525/8525), done.
Collecting thop>=0.1.1
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop
Successfully installed thop-0.1.1.post2209072238
[0m

In [None]:
# !nvidia-smi

In [16]:
# !cd yolov5 && python train.py --img 640 --batch 16 --epochs 10 --data ../data_for_yolo/dataset.yaml --weights yolov5s.pt
# !cd yolov5 && python train.py --img 640 --batch 32 --epochs 28 --data ../data_for_yolo/dataset.yaml --weights yolov5s.pt
!cd yolov5 && python train.py --img 1280 --batch 16 --epochs 30 --data ../data_for_yolo/dataset.yaml --weights yolov5s.pt

[34m[1mwandb[0m: Currently logged in as: [33mdimka11[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=../data_for_yolo/dataset.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=30, batch_size=16, imgsz=1280, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.2-128-ge8a9c5a Python-3.7.12 torch-1.11.0 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_

Yolov7

In [24]:
!git clone https://github.com/WongKinYiu/yolov7.git
!pip install -r ./yolov7/requirements.txt

Cloning into 'yolov7'...
remote: Enumerating objects: 959, done.[K
remote: Counting objects: 100% (2/2), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 959 (delta 0), reused 0 (delta 0), pack-reused 957[K
Receiving objects: 100% (959/959), 68.16 MiB | 28.70 MiB/s, done.
Resolving deltas: 100% (499/499), done.
[0m

yolov7 train

In [None]:
# import wandb
# wandb.login(key='6cecf29ed31ae96db4daf5c7f6f093bfae2f5de5')

In [None]:
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt

In [79]:
!rm -r  ./data_for_yolo/data/train/labels.cache
!rm -r  ./data_for_yolo/data/val/labels.cache

In [80]:
!pip uninstall --y wandb # !wandb disabled

Found existing installation: wandb 0.13.3
Uninstalling wandb-0.13.3:
  Successfully uninstalled wandb-0.13.3
[0m

In [None]:
# !cd yolov7 && python train.py --workers 8 --device 0 --batch-size 16 --epochs 15 --data ../data_for_yolo/dataset.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights '../yolov7_training.pt' --name yolov7-custom --hyp data/hyp.scratch.custom.yaml
!cd yolov7 && python train.py --workers 8 --device 0 --batch-size 16 --epochs 20 --data ../data_for_yolo/dataset.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights '../../input/best-yolov7-15-epochs/yolov7_30_epochs.pt' --name yolov7-custom --hyp data/hyp.scratch.custom.yaml

# Создание файла с результатами на тестовых данных

In [None]:
# !cp ./yolov7/runs/train/yolov7-custom3/weights/best.pt ./

In [None]:
# --augment --conf --iou

In [25]:
!cd yolov5 && python detect.py --augment --img 1280 --source '../../input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/test/' --weights runs/train/exp2/weights/best.pt --save-txt --save-conf

[34m[1mdetect: [0mweights=['runs/train/exp2/weights/best.pt'], source=../../input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/test/, data=data/coco128.yaml, imgsz=[1280, 1280], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=True, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v6.2-128-ge8a9c5a Python-3.7.12 torch-1.11.0 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

Fusing layers... 
Model summary: 157 layers, 7050580 parameters, 0 gradients, 15.9 GFLOPs
image 1/85 /kaggle/input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/test/1526680458785_20.jpg: 736x1280 1 Weißbier, 31.7ms
image 2/85 /kaggle/input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/test/1526731061177_20.jpg: 736x1280 1 Käsespätzle, 28.7ms
image 3/85 /kaggle/input/haha-start-find

yolov7 inference

In [None]:
# !cd yolov7 && python detect.py --weights '../../input/best-yolov7-15-epochs/yolov7_30_epochs.pt' --augment  --agnostic --img-size 640 --iou 0.05 --source '../../input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/test/' --save-txt --save-conf 

In [None]:
# !cd yolov7 && python detect.py --weights '../../input/best-yolov7-15-epochs/yolov7_30_epochs.pt' --augment  --agnostic --conf 0.15 --iou 0.3 --img-size 1920 --source '../../input/haha-start-findbear/ХаХаСтарт_ Найди Пиво/test/' --save-txt --save-conf 

In [69]:
# !rm predict.zip
# !zip -q predict.zip ./yolov7/runs/detect/exp15/*

In [19]:
def get_soliton_labels_df(path_to_txt_folder):
    simple_solution = []
    for detection_file in os.listdir(path_to_txt_folder):
        img_name = detection_file.split('.')[0] + '.jpg'
        with open(path_to_txt_folder + detection_file, 'r') as f:
            data = f.read()
            data = [i for i in data.split('\n') if i != '']
        for line in data:
            val = [float(i) for i in line.split()]
            cls, xywh, conf = val[0], val[1:5], val[5]
            center_x, center_y, width, height = xywh
            xmin = center_x - (width / 2)
            xmax = center_x + (width / 2)
            ymin = center_y - (height / 2)
            ymax = center_y + (height / 2)
            simple_solution.append([img_name, cls, conf, xmin, xmax, ymin, ymax])
    return simple_solution

In [26]:
simple_solution = get_soliton_labels_df('./yolov5/runs/detect/exp2/labels/')
simple_solution = pd.DataFrame(simple_solution, columns=['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax'])

In [27]:
category_names = ['Bier', 'Bier Maß', 'Weißbier', 'Cola', 'Wasser', 'Curry-Wurst', 'Weißwein',
                  'A-Schorle', 'Jägermeister', 'Pommes', 'Burger', 'Williamsbirne', 'Alm-Breze', 'Brotzeitkorb',
                  'Käsespätzle']

simple_solution.LabelName = simple_solution.LabelName.apply(lambda x: category_names[int(x)])

In [28]:
simple_solution

Unnamed: 0,ImageID,LabelName,Conf,XMin,XMax,YMin,YMax
0,1527002513396_70.jpg,Bier,0.575752,0.386458,0.511458,0.384260,0.546296
1,1527002513396_70.jpg,Bier Maß,0.890441,0.385417,0.510417,0.384260,0.546296
2,1527002513396_70.jpg,Wasser,0.908396,0.313021,0.411979,0.381481,0.510185
3,1527002513396_70.jpg,Bier Maß,0.911868,0.416145,0.539062,0.501852,0.682408
4,1526744530681_45.jpg,Bier Maß,0.315206,0.422396,0.518750,0.405555,0.533333
...,...,...,...,...,...,...,...
263,1527420381430_70.jpg,Käsespätzle,0.890284,0.495312,0.633854,0.402778,0.740741
264,1527419523464_45.jpg,Brotzeitkorb,0.274996,0.111980,0.331251,0.452778,0.757408
265,1527419523464_45.jpg,Burger,0.298842,0.469271,0.591145,0.395370,0.646296
266,1527419523464_45.jpg,Pommes,0.307199,0.111459,0.330730,0.465741,0.775925


In [29]:
simple_solution.to_csv('./solution.csv', index=False)

In [30]:
from IPython.display import FileLink
FileLink(r'./solution.csv')

In [35]:
!zip -r predict.zip ./yolov5/runs/detect/

  adding: yolov5/runs/detect/ (stored 0%)
  adding: yolov5/runs/detect/exp2/ (stored 0%)
  adding: yolov5/runs/detect/exp2/1526824273726_20.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1526753194360_45.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527181781609_70.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1526744530681_45.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527344526047_20.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527335004395_20.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527441356645_45.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527354299678_20.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527008159184_20.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1526898802361_70.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527425397244_20.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1527440396680_20.jpg (deflated 0%)
  adding: yolov5/runs/detect/exp2/1526908651202_20.jpg (deflated 0%)
  adding: yolo