Thanks to: https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-train  
Adjust the hyperparameters from hyp.scratch.yaml.  
Thanks to [adrielcabral](https://www.kaggle.com/adrielcabral) for hyperparameters: 
https://www.kaggle.com/c/vinbigdata-chest-xray-abnormalities-detection/discussion/222707

# Setup

In [None]:
!pip install --upgrade seaborn

In [None]:
# General imports.
import yaml
from glob import glob
import shutil, os
from IPython.display import Image, clear_output  # Display images.

import numpy as np, pandas as pd
import seaborn as sns

# Specific imports.
from os import listdir
from os.path import isfile  # Not used.

from tqdm.notebook import tqdm

import matplotlib.pyplot as plt

import torch
from sklearn.model_selection import GroupKFold

# Getting Lists of Image-Level Train and Val Paths to 512 x 512 Scans in PNG Format

In [None]:
fold = 0

In [None]:
train_df = pd.read_csv(f'../input/siim-covid19-detection/train_image_level.csv')
train_df.head()

In [None]:
from sklearn.model_selection import GroupKFold

gkf  = GroupKFold(n_splits=5)
train_df['fold'] = -1

# Split data into 5 separate and distinct/unique groups.
# 4 groups will be for training, 1 for val.
for fold, (train_idx, val_idx) in enumerate(gkf.split(train_df, groups=train_df.StudyInstanceUID.tolist())):
    train_df.loc[val_idx, 'fold'] = fold

In [None]:
train_df.head()

In [None]:
train_df['image_path'] = f'/kaggle/input/siimcovid19-512-img-png-600-study-png/image/' + train_df.id + '.png'
train_df.head()

In [None]:
train_files = []
val_files   = []

# Get all image paths for val and train.
val_files += list(train_df[train_df.fold==fold].image_path.unique())
train_files += list(train_df[train_df.fold!=fold].image_path.unique())
len(train_files), len(val_files), len(train_files) + len(val_files)

In [None]:
val_files[:5]

# Copying Files to Working Directory in YOLOv5 Format

In [None]:
os.makedirs('/kaggle/working/siim-cov19/labels/train', exist_ok = True)
os.makedirs('/kaggle/working/siim-cov19/labels/val', exist_ok = True)
os.makedirs('/kaggle/working/siim-cov19/images/train', exist_ok = True)
os.makedirs('/kaggle/working/siim-cov19/images/val', exist_ok = True)

label_dir = '/kaggle/input/siim-covid-19-yolo-txt'
for file in tqdm(train_files):
    shutil.copy(file, '/kaggle/working/siim-cov19/images/train')
    filename = file.split('/')[-1].split('.')[0]  # Basically extracts the Image ID (e.g. "000a312787f2").
    shutil.copy(os.path.join(label_dir, filename + '.txt'), '/kaggle/working/siim-cov19/labels/train')
    
for file in tqdm(val_files):
    shutil.copy(file, '/kaggle/working/siim-cov19/images/val')
    filename = file.split('/')[-1].split('.')[0]
    shutil.copy(os.path.join(label_dir, filename + '.txt'), '/kaggle/working/siim-cov19/labels/val')

# Get Class Name

In [None]:
classes = ['0. opacity']  # Why is the classes like this?

# YOLOv5

In [None]:
# This basically creates the yaml file needed to specify the train, val paths, number of classes, and the classes.
cwd = '/kaggle/working/'

with open(os.path.join(cwd, 'train.txt'), 'w') as f:
    for path in glob('/kaggle/working/siim-cov19/images/train/*'):
        f.write(path + '\n')
            
with open(os.path.join(cwd, 'val.txt'), 'w') as f:
    for path in glob('/kaggle/working/siim-cov19/images/val/*'):
        f.write(path + '\n')

data = dict(
    train = os.path.join(cwd, 'train.txt'),
    val   = os.path.join(cwd, 'val.txt'),
    nc    = 1,
    names = classes,
)

with open(os.path.join(cwd, 'siim-cov19.yaml'), 'w') as out_file:
    yaml.dump(data, out_file, default_flow_style=False)

f = open(os.path.join(cwd, 'siim-cov19.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
# ref: https://www.kaggle.com/ultralytics/yolov5

# An alternative to copying the YOLOv5 from the "yolov5-official-v31-dataset" directory:
# !git clone https://github.com/ultralytics/yolov5  # clone repo
# %cd yolov5
# %pip install -qr requirements.txt # install dependencies

shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5')

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

## Pretrained Checkpoints:

| Model | AP<sup>val</sup> | AP<sup>test</sup> | AP<sub>50</sub> | Speed<sub>GPU</sub> | FPS<sub>GPU</sub> || params | FLOPS |
|---------- |------ |------ |------ | -------- | ------| ------ |------  |  :------: |
| [YOLOv5s](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | 37.0     | 37.0     | 56.2     | **2.4ms** | **416** || 7.5M   | 13.2B
| [YOLOv5m](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | 44.3     | 44.3     | 63.2     | 3.4ms     | 294     || 21.8M  | 39.4B
| [YOLOv5l](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | 47.7     | 47.7     | 66.5     | 4.4ms     | 227     || 47.8M  | 88.1B
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | **49.2** | **49.2** | **67.7** | 6.9ms     | 145     || 89.0M  | 166.4B
| | | | | | || |
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases/tag/v3.0) + TTA|**50.8**| **50.8** | **68.9** | 25.5ms    | 39      || 89.0M  | 354.3B
| | | | | | || |
| [YOLOv3-SPP](https://github.com/ultralytics/yolov5/releases/tag/v3.0) | 45.6     | 45.5     | 65.2     | 4.5ms     | 222     || 63.0M  | 118.0B

# Selecting Models
In this notebok I'm using `v5x`. To select your preferred model, just replace: `--cfg models/yolov5s.yaml --weights yolov5s.pt` with the following command:
* `v5s` : `--cfg models/yolov5s.yaml --weights yolov5s.pt`
* `v5m` : `--cfg models/yolov5m.yaml --weights yolov5m.pt`
* `v5l` : `--cfg models/yolov5l.yaml --weights yolov5l.pt`
* `v5x` : `--cfg models/yolov5x.yaml --weights yolov5x.pt`

# Train

In [None]:
# !WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --nosave --cache 
!WANDB_MODE="dryrun" python train.py --img 512 --batch 24 --epochs 35 --data /kaggle/working/siim-cov19.yaml --hyp /kaggle/input/yolov5-1-yaml/hyp.scratch.yaml --weights yolov5x.pt --cache

# Class Distribution

In [None]:
plt.figure(figsize = (20,20))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/labels_correlogram.jpg'));

In [None]:
plt.figure(figsize = (20,20))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/labels.jpg'));

# Batch Image

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize = (15, 15))
plt.imshow(plt.imread('runs/train/exp/train_batch0.jpg'))

plt.figure(figsize = (15, 15))
plt.imshow(plt.imread('runs/train/exp/train_batch1.jpg'))

plt.figure(figsize = (15, 15))
plt.imshow(plt.imread('runs/train/exp/train_batch2.jpg'))

# GT Vs Pred

In [None]:
fig, ax = plt.subplots(3, 2, figsize = (2*5,3*5), constrained_layout = True)
for row in range(3):
    ax[row][0].imshow(plt.imread(f'runs/train/exp/test_batch{row}_labels.jpg'))
    ax[row][0].set_xticks([])
    ax[row][0].set_yticks([])
    ax[row][0].set_title(f'runs/train/exp/test_batch{row}_labels.jpg', fontsize = 12)
    
    ax[row][1].imshow(plt.imread(f'runs/train/exp/test_batch{row}_pred.jpg'))
    ax[row][1].set_xticks([])
    ax[row][1].set_yticks([])
    ax[row][1].set_title(f'runs/train/exp/test_batch{row}_pred.jpg', fontsize = 12)

# (Loss, Map) Vs Epoch

In [None]:
plt.figure(figsize=(30,15))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/results.png'));

# Confusion Matrix

In [None]:
plt.figure(figsize=(30,15))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/confusion_matrix.png'));

# Inference

In [None]:
!python detect.py --weights 'runs/train/exp/weights/best.pt'\
--img 512\
--conf 0.1\
--iou 0.5\
--source /kaggle/working/siim-cov19/images/val\
--exist-ok

# Inference Plot

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('runs/detect/exp/*')
for _ in range(3):
    row = 4
    col = 4
    grid_files = random.sample(files, row*col)
    images     = []
    for image_path in tqdm(grid_files):
        img          = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

In [None]:
shutil.rmtree('/kaggle/working/siim-cov19')
shutil.rmtree('runs/detect')
for file in (glob('runs/train/exp/**/*.png', recursive = True)+glob('runs/train/exp/**/*.jpg', recursive = True)):
    os.remove(file)

In [None]:
import os
from zipfile import ZipFile
import shutil

# print(os.listdir('/kaggle/input/'))
# print(os.path.getsize('/kaggle/input/siim-covid19-detection'))


directories = ['siim-covid19-efnb7-train-fold0-5-2class', 'kerasapplications', 'yolov5-official-v31-dataset', 'pydicom-conda-helper', 'siim-covid19-efnb7-train-study', 'efficientnet-keras-source-code', 'siim-cov19-yolov5-train']

for directory in directories:
#     shutil.make_archive(output_filename, 'zip', dir_name)
    shutil.make_archive(directory, 'tar', '/kaggle/input/' + directory)

zipObj = ZipFile('kaggle_input.zip', 'w')

for directory in directories:
    zipObj.write('/kaggle/working/'+ directory + '.tar')

zipObj.close()