# Fine-Tuning Pipeline

In [1]:
from pathlib import Path
import json
import numpy as np
from yolov7.utils.data_preparation import copy_split, create_data_yaml
import shutil
import os

## Split Training and Test Data

In [2]:
# Paths to image and label data (for loading the KITTI dataset within Kaggle)
base_dir = Path.cwd().parent.parent / "input"
print(f"Base Directory : {base_dir}")
img_path = base_dir / "images" / "data_object_image_2" / "training" / "image_2"
label_path = base_dir / "labels"

# Loading the classes present in the dataset
with open(base_dir / "classes.json", "r") as f:
    classes = json.load(f)
print(f"Classes : {classes}")

# Sorting images and labels to ensure alignment
ims = sorted(img_path.glob("*"))
labels = sorted(label_path.glob("*"))
pairs = list(zip(ims, labels))

# Dataset shuffle for randomized train/test split
seed = 42  # For reproducibility
random_state = np.random.RandomState(seed)
random_state.shuffle(pairs)

# Calculating the test size (10%)
test_size = int(0.1 * len(pairs))
splits = {}

# Creating 3 distinct splits
for i in range(3):
    # Select the test set for this split
    test_set = pairs[i * test_size : (i + 1) * test_size]
    # Select the training set (remaining data)
    train_set = pairs[: i * test_size] + pairs[(i + 1) * test_size :]
    splits[f"split{i + 1}"] = {"train": train_set, "test": test_set}

# Verifying sizes of each split
for key, value in splits.items():
    train_size = len(value["train"])
    test_size = len(value["test"])
    print(f"{key} - Train Size: {train_size}, Test Size: {test_size}")
    assert train_size + test_size == len(
        pairs
    ), "Train and test sizes do not add up to total pairs"

# Verifying distinct test sets
for i in range(3):
    for j in range(i + 1, 3):
        assert not set(splits[f"split{i + 1}"]["test"]).intersection(
            set(splits[f"split{j + 1}"]["test"])
        ), f"Test sets for split{i + 1} and split{j + 1} overlap"


Base Directory : D:\dev-workspace\IRO\input
Classes : {'Car': 0, 'Pedestrian': 1, 'Van': 2, 'Cyclist': 3, 'Truck': 4, 'Misc': 5, 'Tram': 6, 'Person_sitting': 7}
split1 - Train Size: 6733, Test Size: 748
split2 - Train Size: 6733, Test Size: 748
split3 - Train Size: 6733, Test Size: 748


## Training

### Data Preparation

In [3]:
# Define train and test data directory
train_path = Path('../train').resolve()
test_path = Path('../test').resolve()

create_data_yaml(train_path, test_path, classes, output_path='./data/kitti.yaml')

### Training One Split

In [4]:
# Remove cache files and remove previous data
if os.path.isfile("../train.cache"):
    os.remove("../train.cache")
if os.path.isfile("../test.cache"):
    os.remove("../test.cache")
if train_path.exists():
    shutil.rmtree(train_path)
if test_path.exists():
    shutil.rmtree(test_path)

In [None]:
# Get training data split index and path
train_split = splits['split1']['train']
# train_split = splits['split2']['train']
# train_split = splits['split3']['train']

# Copy training dataset to a designated folder
copy_split(train_split, train_path)

In [25]:
# Get test data split index and path
test_split = splits['split1']['test']
# train_split = splits['split2']['train']
# train_split = splits['split3']['train']

# Copy test dataset to a designated folder
copy_split(test_split, test_path)

100%|██████████| 748/748 [00:09<00:00, 76.32it/s]


In [28]:
%env WANDB_DISABLED=True
# Run training script
!python ../train.py --workers 4 --device 0 --batch-size 8 --img 640 640 --cfg cfg/training/yolov7-tiny.yaml --epochs 2 --data ./data/kitti.yaml --weights ../yolov7-tiny.pt --name split_1 
# !python train.py --workers 4 --device 0 --batch-size 8 --img 640 640 --cfg cfg/training/yolov7-tiny.yaml --epochs 2 --data ./data/kitti.yaml --weights ../yolov7-tiny.pt --name split_2 
# !python train.py --workers 4 --device 0 --batch-size 8 --img 640 640 --cfg cfg/training/yolov7-tiny.yaml --epochs 2 --data ./data/kitti.yaml --weights ../yolov7-tiny.pt --name split_3 

env: WANDB_DISABLED=True
[34m[1mwandb: [0mInstall Weights & Biases for YOLOR logging with 'pip install wandb' (recommended)

YOLOR  d7c15b5 torch 2.4.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4095.5MB)

Namespace(weights='yolov7-tiny.pt', cfg='cfg/training/yolov7-tiny.yaml', data='./data/kitti.yaml', hyp='data/hyp.scratch.p5.yaml', epochs=2, batch_size=8, img_size=[640, 640], rect=False, resume=False, nosave=False, notest=False, noautoanchor=False, evolve=False, bucket='', cache_images=False, image_weights=False, device='0', multi_scale=False, single_cls=False, adam=False, sync_bn=False, local_rank=-1, workers=4, project='runs/train', entity=None, name='split_1', exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias='latest', freeze=[0], v5_metric=False, world_size=1, global_rank=-1, save_dir='runs\\train\\split_17', total_batch_size=8)
[34m[1mtensorboard: [0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/
[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.1, momentum=0.937, weight_decay=0.



[34m[1mautoanchor: [0mAnalyzing anchors... anchors/target = 4.81, Best Possible Recall (BPR) = 0.9993
                 all         748        4223       0.355       0.197       0.174      0.0714
                 all         748        4223        0.52       0.307       0.314       0.156
                 Car         748        3037       0.606       0.828       0.818       0.439
          Pedestrian         748         441       0.476       0.542        0.48       0.196
                 Van         748         295       0.433       0.197       0.248       0.137
             Cyclist         748         174        0.33       0.264       0.239      0.0975
               Truck         748         104        0.43       0.327       0.347        0.21
                Misc         748          91       0.247       0.011      0.0549      0.0247
                Tram         748          56       0.634       0.286       0.323        0.14
      Person_sitting         748          25           1