In [None]:
!pip install map-boxes

In [None]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
from map_boxes import mean_average_precision_for_boxes
import random
import re
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as immg
from PIL import Image
%matplotlib inline

In [None]:
!git clone https://github.com/ultralytics/yolov5.git

In [None]:
!pip install -r ./yolov5/requirements.txt

In [None]:
len(os.listdir('../input/tatarstan/test_dataset_test/test/images'))

In [None]:
len(os.listdir('../input/tatarstan/train_dataset_train/train/images'))

In [None]:
#распределение объектов в train
pd.Series([file_name.split('_')[-1] for file_name in os.listdir('../input/tatarstan/train_dataset_train/train/labels')]).value_counts()

In [None]:
#создадим обучающий датасет в формате yolo
!mkdir data_for_yolo
!mkdir data_for_yolo/data
!mkdir data_for_yolo/data/images
!mkdir data_for_yolo/data/labels
!mkdir data_for_yolo/data/images/train
!mkdir data_for_yolo/data/labels/train
!mkdir data_for_yolo/data/images/test
!mkdir data_for_yolo/data/labels/test

In [None]:
#создадим dataset.yaml со следующим содержимым

yaml_content = """
train: ./data_for_yolo/data/images/train/
val: ./data_for_yolo/data/images/test/

# number of classes
nc: 5

# class names
names: ['human', 'head', 'car', 'face', 'carplate']
"""

In [None]:
!mkdir correct_labels
!mkdir full_labels

In [None]:
class2num = {'car': 0, 'head': 1, 'face': 2, 'human': 3, 'carplate': 4}
for file_name in os.listdir('../input/tatarstan/train_dataset_train/train/labels'):
    label = class2num[re.search(r'(?<=M_)(.*)(?=..txt)', file_name)[0]]
    with open(f'../input/tatarstan/train_dataset_train/train/labels/{file_name}', "r") as file:
        with open(f'./correct_labels/{file_name}', "w") as new_file:
            for line in file:
                line = str(label) + ' ' + ' '.join(line.split(' ')[1:])
                new_file.write(line)

In [None]:
viewed = {}
for file_name in os.listdir('../input/tatarstan/train_dataset_train/train/labels'):
    id = re.search(r'(.*)(?<=M)', file_name)[0]
    try:
        viewed[id].append(file_name)
    except:
        viewed[id] = [file_name]

In [None]:
for file_name in list(viewed.keys()):
    with open(f'./full_labels/{file_name}.txt', 'w') as outfile:
        for fname in viewed[file_name]:
            with open(f'./correct_labels/{fname}') as infile:
                outfile.write(infile.read())

In [None]:
 for file_name in os.listdir('./full_labels')[:3]:
        with open(f'./full_labels/{file_name}', "r") as file:
            print(file_name)
            for line in file:
                print(line)

In [None]:
image_names = list(viewed.keys())

In [None]:
train_image_names, val_image_names = train_test_split(image_names, test_size=0.2, random_state=22)
print(len(train_image_names), len(val_image_names))

In [None]:
#train
for train_image in train_image_names:
  #labels
    for file in os.listdir('./full_labels'): 
        if train_image in file: 
            shutil.copy('./full_labels/' + file, './data_for_yolo/data/labels/train/' + file)
  #images 
    for file in os.listdir('../input/tatarstan/train_dataset_train/train/images'): 
        if train_image + '.jpg' == file: 
            shutil.copy('../input/tatarstan/train_dataset_train/train/images/' + file,
                        './data_for_yolo/data/images/train/')

#val
for val_image in val_image_names:
  #labels
    for file in os.listdir('./full_labels'): 
        if val_image in file: 
            shutil.copy('./full_labels/' + file, './data_for_yolo/data/labels/test/' + file)
  #images 
    for file in os.listdir('../input/tatarstan/train_dataset_train/train/images'): 
        if val_image + '.jpg' == file: 
            shutil.copy('../input/tatarstan/train_dataset_train/train/images/' + file,
                        './data_for_yolo/data/images/test/')

In [None]:
#валидация 
print(len(os.listdir('./data_for_yolo/data/images/test')))
print(len(os.listdir('./data_for_yolo/data/images/train')))

In [None]:
print(len(os.listdir('./data_for_yolo/data/labels/test')))
print(len(os.listdir('./data_for_yolo/data/labels/train')))

In [None]:
#обучение

In [None]:
!nvidia-smi

In [None]:
!wandb off

In [None]:
shutil.copy('../input/tatarstan/dataset.yaml', './data_for_yolo')
shutil.move('./data_for_yolo', './yolov5')

In [None]:
# #train
# !python ./yolov5/train.py --img 1280 --batch 20 --epochs 65 --data ./yolov5/data_for_yolo/dataset.yaml --weights yolov5s.pt --cfg ./yolov5/models/hub/yolov5s.yaml --name yolov5s_results --cache
# !python ./yolov5/train.py --img 1280 --batch 10 --epochs 40 --data ./yolov5/data_for_yolo/dataset.yaml --weights yolov5m6.pt --cfg ./yolov5/models/hub/yolov5m6.yaml --name yolov5m6_results --cache
# !python ./yolov5/train.py --img 1980 --batch 9 --epochs 60 --data ./yolov5/data_for_yolo/dataset.yaml --weights yolov5s6.pt --cfg ./yolov5/models/hub/yolov5s6.yaml --name yolov5s6_results --cache

In [None]:
!python3 ./yolov5/detect.py --weights ../input/tatarstan/yolov5s6_1980_60epochs_9batch.pt ../input/tatarstan/weights/yolov5m6_1280_40epochs_10batch_0.598046.pt ../input/tatarstan/weights/yolov5s_1280_65epochs_20batch_0.569434.pt --img 1980 --conf 0.25 --source ./yolov5/data_for_yolo/data/images/test --save-txt --save-conf

In [None]:
def get_soliton_labels_df(path_to_txt_folder):
  simple_solution = []
  for detection_file in os.listdir(path_to_txt_folder):
    img_name = detection_file.split('.')[0] + '.jpg'
    with open(path_to_txt_folder + detection_file, 'r') as f:
      data = f.read()
      data = [i for i in data.split('\n') if i != '']
    for line in data:
      val = [float(i) for i in line.split()]
      cls, xywh, conf = val[0], val[1:5], val[5]
      center_x, center_y, width, height = xywh
      xmin = center_x - (width / 2)
      xmax = center_x + (width / 2)
      ymin = center_y - (height / 2)
      ymax = center_y + (height / 2)
      simple_solution.append([img_name, cls, conf, xmin, xmax, ymin, ymax])
  return simple_solution

In [None]:
simple_solution = get_soliton_labels_df('./yolov5/runs/detect/exp/labels/')
simple_solution = pd.DataFrame(simple_solution, columns=['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax'])

In [None]:
def get_test_labels_df(path_to_txt_folder):
  simple_solution = []
  for detection_file in os.listdir(path_to_txt_folder):
    img_name = detection_file.split('.')[0] + '.jpg'
    with open(path_to_txt_folder + detection_file, 'r') as f:
      data = f.read()
      data = [i for i in data.split('\n') if i != '']
    for line in data:
      val = [float(i) for i in line.split()]
      cls, center_x, center_y, width, height = val
      xmin = center_x - (width / 2)
      xmax = center_x + (width / 2)
      ymin = center_y - (height / 2)
      ymax = center_y + (height / 2)
      simple_solution.append([img_name, cls, xmin, xmax, ymin, ymax])
  return simple_solution

In [None]:
test_labels = get_test_labels_df('./yolov5/data_for_yolo/data/labels/test/')
test_labels = pd.DataFrame(test_labels, columns=['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax'])

In [None]:
mean_ap, average_precisions = mean_average_precision_for_boxes(test_labels, simple_solution, iou_threshold=0.5, verbose=False)

In [None]:
mean_ap

In [None]:
def plot_image(image_name, data, path):
    image_group = df_grp.get_group(image_name)
    bbox = image_group.loc[:,['XMin', 'XMax', 'YMin', 'YMax']]
    img = immg.imread(path + image_name)
    wf, hf =  Image.open(os.path.join(path, image_name)).size
    fig,ax = plt.subplots(figsize=(18,10))
    ax.imshow(img, cmap='binary')
    print(len(bbox))
    for i in range(len(bbox)):
        box = bbox.iloc[i].values
        print(box)
        x,y,w,h = box[0]*wf, box[2]*hf, (box[1]-box[0])*wf, (box[3]-box[2])*hf
        rect = matplotlib.patches.Rectangle((x,y),w,h,linewidth=1,edgecolor='r',facecolor='none',)
        ax.add_patch(rect)
    plt.show()

In [None]:
# path = './yolov5/data_for_yolo/data/images/test/'
# df_grp = test_labels.groupby(['ImageID'])
# for img_name in os.listdir(path): 
#     plot_image(img_name, df_grp, path)

In [None]:
!python3 yolov5/detect.py --weights ../input/tatarstan/yolov5s6_1980_60epochs_9batch.pt ../input/tatarstan/weights/yolov5m6_1280_40epochs_10batch_0.598046.pt ../input/tatarstan/weights/yolov5s_1280_65epochs_20batch_0.569434.pt --img 1980 --conf 0.25 --source ../input/tatarstan/test_dataset_test/test/images --save-txt --save-conf

In [None]:
def get_soliton_labels_df_simple_solution(path_to_txt_folder):
  simple_solution = []
  for detection_file in os.listdir(path_to_txt_folder):
    img_name = detection_file.split('.')[0] + '.jpg'
    with open(path_to_txt_folder + detection_file, 'r') as f:
      data = f.read()
      data = [i for i in data.split('\n') if i != '']
    for line in data:
      val = [float(i) for i in line.split()]
      cls, xywh, conf = val[0], val[1:5], val[5]
      center_x, center_y, width, height = xywh
      xmin = center_x - (width / 2)
      xmax = center_x + (width / 2)
      ymin = center_y - (height / 2)
      ymax = center_y + (height / 2)
      simple_solution.append([img_name, int(cls), conf, xmin, xmax, ymin, ymax])
  return simple_solution

In [None]:
simple_solution = get_soliton_labels_df_simple_solution('yolov5/runs/detect/exp2/labels/')
simple_solution = pd.DataFrame(simple_solution, columns=['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax'])

In [None]:
simple_solution.to_csv("sample_solution.csv", sep=';', index=False)

In [None]:
df = pd.read_csv("sample_solution.csv", sep=';', index_col=None)

In [None]:
df['LabelName'].value_counts()

In [None]:
df