<a href="https://colab.research.google.com/github/Yyalexx/detecting-beer/blob/master/model_training_ipynb%22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import os 
import shutil 
import glob 
from tqdm import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split
import torch

# Обработка данных

In [3]:
# Задаем все пути к файлам и создаем папки для последующей работы с подсчетом метрик

# Корневая папка
path_to_files = "/content/gdrive/MyDrive/VIZIT/YOLO5"

# Исходные файлы изображений и labels
path_to_images = os.path.join(path_to_files, 'all_images_4_YOLO5/')
path_to_labels = os.path.join(path_to_files, 'all_labels_4_YOLO5/')

# Преобразованные файлы изображений и labels
path_to_resized_images = os.path.join(path_to_files, 'resized_images/')
path_to_resized_labels = os.path.join(path_to_files, 'resized_labels/')

# Если папок не существует, создаем их
if not os.path.exists(path_to_resized_images):
  os.mkdir(path_to_resized_images)
if not os.path.exists(path_to_resized_labels):
  os.mkdir(path_to_resized_labels)

In [4]:
# Функция для изменения размеров изображения и переноса изображения и labels в папку resized
def scale_image_label(input_image_path,
                      width=640,
                      height=640, 
                      path_to_files=path_to_files,
                      path_to_resized_images=path_to_resized_images,
                      path_to_resized_labels=path_to_resized_labels
                      ):
    """
    Функция приводит изображение к размеру width X height, 
    производит поворот изображения в соответствии с exif данными.

      Args:
        input_image_path (string): путь к изображению
        width (int) = ширина обработанного изображения
        height (int) = высота обработанного изображения
                                            
      Returns:
        None(None)
    """
    original_image = Image.open(input_image_path)
    resized_image = original_image.resize((width, height), Image.LANCZOS)
    if hasattr(original_image, '_getexif') or original_image._getexif() is not None:
        orientation = original_image._getexif().get(0x112)
        rotate_values = {3: 180, 6: 270, 8: 90}
        if orientation in rotate_values:
            img = resized_image.rotate(rotate_values[orientation])
        else:
            img = resized_image    
 
    img.save(os.path.join(path_to_resized_images, input_image_path.split('/')[-1]))

    label_name = input_image_path.split('/')[-1][:-3]+'txt'
    input_label_path = os.path.join(path_to_files, 'all_labels_4_YOLO5/', label_name)
    shutil.copy(input_label_path, path_to_resized_labels)

In [5]:
# Обрабатываем все исходные изображения
for img_name in tqdm(glob.glob(os.path.join(path_to_images, '*.jpg'))):
    scale_image_label(img_name)

100%|██████████| 530/530 [04:20<00:00,  2.04it/s]


In [6]:
# Деление тренировочных данных на train, val, test
rnd_seed = 123
all_imgs_pathes = sorted(glob.glob(os.path.join(path_to_resized_images, '*.jpg')))
all_lbls_pathes = sorted(glob.glob(os.path.join(path_to_resized_labels, '*.txt')))
indices = list(range(len(all_imgs_pathes)))

df_train_inds, df_val_test_inds = train_test_split(indices, test_size=0.1, 
                                                   random_state=rnd_seed)
df_val_inds, df_test_inds = train_test_split(df_val_test_inds, test_size=0.5, 
                                             random_state=rnd_seed)

In [7]:
print(f'Размеры выборок:\ntrain - {len(df_train_inds)}\nval - {len(df_val_inds)}\ntest - {len(df_test_inds)}')

Размеры выборок:
train - 477
val - 26
test - 27


In [8]:
# Создаем папки для переноса данных по папкам train, valid, test (структура для YOLOv5)
train_imgs_path = os.path.join(path_to_files, 'dataset', 'images', 'train')
train_lbls_path = os.path.join(path_to_files, 'dataset', 'labels', 'train')
if os.path.exists(train_imgs_path):
    shutil.rmtree(train_imgs_path)
os.makedirs(train_imgs_path)
if os.path.exists(train_lbls_path):
    shutil.rmtree(train_lbls_path)
os.makedirs(train_lbls_path)

valid_imgs_path = os.path.join(path_to_files, 'dataset', 'images', 'valid')
valid_lbls_path = os.path.join(path_to_files, 'dataset', 'labels', 'valid')
if os.path.exists(valid_imgs_path):
    shutil.rmtree(valid_imgs_path)
os.makedirs(valid_imgs_path)
if os.path.exists(valid_lbls_path):
    shutil.rmtree(valid_lbls_path)
os.makedirs(valid_lbls_path)

test_imgs_path = os.path.join(path_to_files, 'dataset', 'images', 'test')
test_lbls_path = os.path.join(path_to_files, 'dataset', 'labels', 'test')
if os.path.exists(test_imgs_path):
    shutil.rmtree(test_imgs_path)
os.makedirs(test_imgs_path)
if os.path.exists(test_lbls_path):
    shutil.rmtree(test_lbls_path)
os.makedirs(test_lbls_path)

In [9]:
# Перемещение файлов из папки resized в train, valid, test
for train_ind in df_train_inds:
    shutil.move(all_imgs_pathes[train_ind], train_imgs_path)
    shutil.move(all_lbls_pathes[train_ind], train_lbls_path)

for valid_ind in df_val_inds:
    shutil.move(all_imgs_pathes[valid_ind], valid_imgs_path)
    shutil.move(all_lbls_pathes[valid_ind], valid_lbls_path)

for test_ind in df_test_inds:
    shutil.move(all_imgs_pathes[test_ind], test_imgs_path)
    shutil.move(all_lbls_pathes[test_ind], test_lbls_path)

# Обучение модели

In [10]:
#Клонируем репозиторий YOLOv5 и устанавливаем зависимости
!git clone https://github.com/ultralytics/yolov5 
%cd yolov5
%pip install -qr requirements.txt

print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")


Cloning into 'yolov5'...
remote: Enumerating objects: 14983, done.[K
remote: Counting objects: 100% (47/47), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 14983 (delta 19), reused 21 (delta 8), pack-reused 14936[K
Receiving objects: 100% (14983/14983), 13.97 MiB | 28.72 MiB/s, done.
Resolving deltas: 100% (10283/10283), done.
/content/yolov5
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.0/184.0 KB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 KB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0m
[?25hSetup complete. Using torch 1.13.0+cu116 (Tesla T4)


In [11]:
# Обучение модели
!python train.py --img 640 --cfg yolov5m.yaml --batch 32 --epochs 300 --data /content/gdrive/MyDrive/VIZIT/YOLO5/dataset/data.yaml --weights yolov5m.pt --cache --workers 2

[34m[1mtrain: [0mweights=yolov5m.pt, cfg=yolov5m.yaml, data=/content/gdrive/MyDrive/VIZIT/YOLO5/dataset/data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=300, batch_size=32, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=2, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-66-g9650f16 Python-3.8.16 torch-1.13.0+cu116 CUDA:0 (Tesla T4, 15110MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj

In [None]:
#Сохраняем обученную модель на компьютер
from google.colab import files
files.download('./runs/train/exp/weights/best.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Новый раздел