# Preparation

## Mount Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Go to working dir

In [2]:
%cd /content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07

/content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07


In [3]:
!ls

all  data  imgs  test  test.csv  train	train.csv  utils


# Resize dataset image and label

## Resize Image

In [None]:
from PIL import Image
import os


def resize_images(image_dir, output_dir, target_resolution):
  # Iterate through files in the directory
  for filename in os.listdir(image_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
      img_path = os.path.join(image_dir, filename)
      try:
        # Open the image
        img = Image.open(img_path)

        # Resize the image
        img_resized = img.resize(target_resolution)

        # Save the resized image to the output directory
        output_path = os.path.join(output_dir, filename)
        img_resized.save(output_path)
        print(f"Resized and saved: {filename}")

      except Exception as e:
        print(f"Error processing {filename}: {e}")


In [None]:
# Directory containing the images
image_dirs = ['train/images', 'test/images']
output_dir = 'all/images'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Desired resolution
target_resolution = (640, 640)

# resize images
for image_dir in image_dirs:
  print(f"Resizing images in directory: {image_dir}")
  resize_images(image_dir, output_dir, target_resolution)
  print(f"Resizing completed for directory: {image_dir}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Resized and saved: LTRVDCDD1_T0001.ts-5217#011.jpg
Resized and saved: LTFIGIVP2_T0001.ts-3524#034.jpg
Resized and saved: LTSMAGUP1_UHSMA.ts-17112#055.jpg
Resized and saved: LTFIGIVP3_LTFIGIVP4_T0118.ts-14852#042.jpg
Resized and saved: LTFIGIVP2_T0294.ts-14622#061.jpg
Resized and saved: LTFIGIBN1_LTFIGIBN2_T0714.ts-11858#056.jpg
Resized and saved: LTFIGIBN3_LTFIGIBN4_T0402#00:25:58.jpg
Resized and saved: LTFIGIVP1_P1#00:19:34.jpg
Resized and saved: LTCMSRNS2_T0001#00:28:33.jpg
Resized and saved: LTGUPMIC_T0369_T0607.ts-2#024.jpg
Resized and saved: LTIABIVP1_T0170.ts-3135#001.jpg
Resized and saved: LTFIGIVP1_T0649.ts-3313#032.jpg
Resized and saved: LTBDTSBA2_T0049#00:20:07.jpg
Resized and saved: LTFIGIVP3_LTFIGIVP4_T0200.ts-14913#017.jpg
Resized and saved: LTFIGIVP2_T0294.ts-14609#017.jpg
Resized and saved: LTBSLPRS.ts-656#058.jpg
Resized and saved: LTFIGIBN1_LTFIGIBN2_T1606_SEIBN#00:07:08.jpg
Resized and saved: LTFIGIBN3_L

## Resize Label

In [None]:
import shutil

def resize_labels(label_dir, output_dir, original_resolution, target_resolution):
    # Iterate through files in the directory
    for filename in os.listdir(label_dir):
        if filename.lower().endswith('.txt'): # Assuming labels are in .txt format
            label_path = os.path.join(label_dir, filename)
            output_path = os.path.join(output_dir, filename)

            try:
                with open(label_path, 'r') as f_in, open(output_path, 'w') as f_out:
                    for line in f_in:
                        parts = line.strip().split()
                        if len(parts) >= 5: # Assuming YOLO format: class x_center y_center width height
                            class_id = parts[0]
                            # Convert string coordinates to float
                            x_center_orig = float(parts[1])
                            y_center_orig = float(parts[2])
                            width_orig = float(parts[3])
                            height_orig = float(parts[4])

                            # Calculate the scaling factors
                            scale_x = target_resolution[0] / original_resolution[0]
                            scale_y = target_resolution[1] / original_resolution[1]

                            # Resize the coordinates and dimensions
                            x_center_resized = x_center_orig * scale_x
                            y_center_resized = y_center_orig * scale_y
                            width_resized = width_orig * scale_x
                            height_resized = height_orig * scale_y

                            # Ensure coordinates are within the target resolution boundaries (optional but good practice)
                            x_center_resized = max(0.0, min(1.0, x_center_resized))
                            y_center_resized = max(0.0, min(1.0, y_center_resized))
                            width_resized = max(0.0, min(1.0, width_resized))
                            height_resized = max(0.0, min(1.0, height_resized))


                            # Write the resized coordinates to the new file
                            f_out.write(f"{class_id} {x_center_resized:.6f} {y_center_resized:.6f} {width_resized:.6f} {height_resized:.6f}\n")
                        else:
                             # If the line doesn't match the expected format, just copy it
                             f_out.write(line)


                print(f"Resized and saved: {filename}")

            except Exception as e:
                print(f"Error processing {filename}: {e}")


In [None]:
# Directory containing the original labels
label_dirs = ['train/labels', 'test/labels']
output_label_dir = 'all/labels'

# Create output directory if it doesn't exist
os.makedirs(output_label_dir, exist_ok=True)

# Original resolution of the images (You need to know this or extract it)
# Assuming a consistent original resolution for the dataset
original_resolution = (1280, 720) # Replace with the actual original resolution

# Desired resolution (same as the resized images)
target_resolution = (640, 640)

# Resize labels
for label_dir in label_dirs:
    print(f"Resizing labels in directory: {label_dir}")
    resize_labels(label_dir, output_label_dir, original_resolution, target_resolution)
    print(f"Resizing completed for directory: {label_dir}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Resized and saved: LTBSLPRS.ts-656#017.txt
Resized and saved: LTFIGIBN1_LTFIGIBN2_T0674.ts-13225#054.txt
Resized and saved: LTIBNTPR1_LTIBNTPR2_T0112.ts-1483#029.txt
Resized and saved: LTFIGIBN1_LTFIGIBN2_T1587.ts-3937#033.txt
Resized and saved: LTFIGIBN1_LTFIGIBN2_T0674.ts-13223#051.txt
Resized and saved: LTIABIVP1_T0040#00:03:51.txt
Resized and saved: LTADRVDP.ts-3098#025.txt
Resized and saved: LTGUPMIC_T0369_T0607.ts-181#044.txt
Resized and saved: LTIBNTPR1_LTIBNTPR2_T0112.ts-1483#007.txt
Resized and saved: LTIABIVP1_T0051.ts-12260#010.txt
Resized and saved: LTFIGIVP3_T0556.ts-13418#036.txt
Resized and saved: LTIABIVP2_T0129.ts-3168#037.txt
Resized and saved: LTFIGIVP3_T0556.ts-13418#015.txt
Resized and saved: LTRVDBPX1_T0429_LTRVDBPX2_T0431.ts-5341#015.txt
Resized and saved: LTFIGIBN1_LTFIGIBN2_T0674.ts-13223#008.txt
Resized and saved: LTFIGIBN3_LTFIGIBN4_T0834.ts-12142#063.txt
Resized and saved: LTFIGIBN3_LTFIGIBN4_T

In [None]:
# import shutil

# def copy_labels(source_dir, target_dir):
#   # Copy files from source_dir to target_dir
#   for filename in os.listdir(source_dir):
#       source_path = os.path.join(source_dir, filename)
#       target_path = os.path.join(target_dir, filename)
#       if os.path.isfile(source_path):
#           shutil.copy2(source_path, target_path)
#           print(f"Copied: {filename}")

In [None]:
# source_dirs = ['train/labels', 'test/labels']
# target_dir = 'all/labels'


# # Create target directory if it doesn't exist
# os.makedirs(target_dir, exist_ok=True)

# # copy labels
# for source_dir in source_dirs:
#   print(f"Copy labels in directory: {image_dir}")
#   copy_labels(source_dir, target_dir)
#   print(f"Copy completed for directory: {image_dir}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copied: LTBSLPRS.ts-656#017.txt
Copied: LTFIGIBN1_LTFIGIBN2_T0674.ts-13225#054.txt
Copied: LTIBNTPR1_LTIBNTPR2_T0112.ts-1483#029.txt
Copied: LTFIGIBN1_LTFIGIBN2_T1587.ts-3937#033.txt
Copied: LTFIGIBN1_LTFIGIBN2_T0674.ts-13223#051.txt
Copied: LTIABIVP1_T0040#00:03:51.txt
Copied: LTADRVDP.ts-3098#025.txt
Copied: LTGUPMIC_T0369_T0607.ts-181#044.txt
Copied: LTIBNTPR1_LTIBNTPR2_T0112.ts-1483#007.txt
Copied: LTIABIVP1_T0051.ts-12260#010.txt
Copied: LTFIGIVP3_T0556.ts-13418#036.txt
Copied: LTIABIVP2_T0129.ts-3168#037.txt
Copied: LTFIGIVP3_T0556.ts-13418#015.txt
Copied: LTRVDBPX1_T0429_LTRVDBPX2_T0431.ts-5341#015.txt
Copied: LTFIGIBN1_LTFIGIBN2_T0674.ts-13223#008.txt
Copied: LTFIGIBN3_LTFIGIBN4_T0834.ts-12142#063.txt
Copied: LTFIGIBN3_LTFIGIBN4_T0402#00:32:10.txt
Copied: LTFIGIVP3_LTFIGIVP4_T0118.ts-14858#069.txt
Copied: LTFIGIBN1_LTFIGIBN2_T1606_SEIBN#00:07:02.txt
Copied: LTRVDROD_T0575_LTCTMPQE_T0452.ts-5294#041.txt
Copied: LTR

# Split Dataset

## Make directory

In [4]:
%cd /content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07

/content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07


## Collect Information

In [5]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define the main data directory
data_dir = 'all' # Assuming your images are in all/images and labels in all/labels

# Define the output directories for the splits
train_dir = 'split_dataset/train'
val_dir = 'split_dataset/val'
test_dir = 'split_dataset/test'

# Create the split directories
os.makedirs(os.path.join(train_dir, 'images'), exist_ok=True)
os.makedirs(os.path.join(train_dir, 'labels'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'images'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'labels'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'images'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'labels'), exist_ok=True)

In [6]:
# Get list of image files
image_files = [f for f in os.listdir(os.path.join(data_dir, 'images')) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

In [7]:
# Create a list of (image_filename, label_filename) pairs
data_pairs = []
for img_file in image_files:
    # Assuming label filenames have the same base name but with .txt extension
    label_file = os.path.splitext(img_file)[0] + '.txt'
    data_pairs.append((img_file, label_file))

In [8]:
# Function to get class distribution from a label file
def get_classes_from_label(label_filepath):
    classes = []
    if os.path.exists(label_filepath):
        with open(label_filepath, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if parts:
                    classes.append(int(parts[0])) # Assuming class ID is the first element
    return classes

In [9]:
# Collect class information for stratification
all_classes = []
for _, label_file in data_pairs:
    label_filepath = os.path.join(data_dir, 'labels', label_file)
    all_classes.extend(get_classes_from_label(label_filepath))

KeyboardInterrupt: 

In [10]:
test_size = 0.15
val_size = 0.15/(1-test_size)

In [11]:
test_size, val_size

(0.15, 0.17647058823529413)

## Split Dataset

In [13]:
# Create a list of labels corresponding to the data pairs for stratification
# We need to assign a "label" to each data pair for stratification.
# A simple approach is to use the class ID of the first object in the label file.
# If a file has multiple classes, this might not be perfect for perfect balance across all classes,
# but it's a common starting point for stratification with multi-object images.
# A more robust approach might involve creating dummy variables for each class present in a file.
stratify_labels = []
for _, label_file in data_pairs:
    label_filepath = os.path.join(data_dir, 'labels', label_file)
    classes_in_file = get_classes_from_label(label_filepath)
    if classes_in_file:
        # Use the first class ID in the file for stratification
        stratify_labels.append(classes_in_file[0])
    else:
        # Assign a placeholder for files with no labels (if any)
        stratify_labels.append(-1) # Use a value that doesn't conflict with your actual class IDs

In [14]:
# Convert to numpy array for train_test_split
import numpy as np
stratify_labels = np.array(stratify_labels)
data_pairs = np.array(data_pairs) # Convert data_pairs to numpy array as well

In [15]:
# Split data_pairs into training and testing with stratification
train_val_pairs_indices, test_pairs_indices = train_test_split(
    np.arange(len(data_pairs)),
    test_size=test_size,
    random_state=42,
    stratify=stratify_labels
)
train_val_pairs = data_pairs[train_val_pairs_indices].tolist()
test_pairs = data_pairs[test_pairs_indices].tolist()
train_val_stratify_labels = stratify_labels[train_val_pairs_indices]


# Split training+validation into training and validation with stratification
train_pairs_indices, val_pairs_indices = train_test_split(
    np.arange(len(train_val_pairs)),
    test_size=val_size,
    random_state=42,
    stratify=train_val_stratify_labels
)
train_pairs = np.array(train_val_pairs)[train_pairs_indices].tolist()
val_pairs = np.array(train_val_pairs)[val_pairs_indices].tolist()

In [12]:
# # If there are no labels or no classes, proceed without stratification
# if not all_classes:
#     print("Warning: No label information found for stratification. Splitting without stratification.")
#     # Split data_pairs into training and testing
#     train_val_pairs, test_pairs = train_test_split(data_pairs, test_size=test_size, random_state=42)
#     # Split training+validation into training and validation
#     train_pairs, val_pairs = train_test_split(train_val_pairs, test_size=val_size, random_state=42)
# else:
#     # Create a list of labels corresponding to the data pairs for stratification
#     # We need to assign a "label" to each data pair for stratification.
#     # A simple approach is to use the class ID of the first object in the label file.
#     # If a file has multiple classes, this might not be perfect for perfect balance across all classes,
#     # but it's a common starting point for stratification with multi-object images.
#     # A more robust approach might involve creating dummy variables for each class present in a file.
#     stratify_labels = []
#     for _, label_file in data_pairs:
#         label_filepath = os.path.join(data_dir, 'labels', label_file)
#         classes_in_file = get_classes_from_label(label_filepath)
#         if classes_in_file:
#             # Use the first class ID in the file for stratification
#             stratify_labels.append(classes_in_file[0])
#         else:
#             # Assign a placeholder for files with no labels (if any)
#             stratify_labels.append(-1) # Use a value that doesn't conflict with your actual class IDs

#     # Convert to numpy array for train_test_split
#     import numpy as np
#     stratify_labels = np.array(stratify_labels)
#     data_pairs = np.array(data_pairs) # Convert data_pairs to numpy array as well

#     # Split data_pairs into training and testing with stratification
#     train_val_pairs_indices, test_pairs_indices = train_test_split(
#         np.arange(len(data_pairs)),
#         test_size=test_size,
#         random_state=42,
#         stratify=stratify_labels
#     )
#     train_val_pairs = data_pairs[train_val_pairs_indices].tolist()
#     test_pairs = data_pairs[test_pairs_indices].tolist()
#     train_val_stratify_labels = stratify_labels[train_val_pairs_indices]


#     # Split training+validation into training and validation with stratification
#     train_pairs_indices, val_pairs_indices = train_test_split(
#         np.arange(len(train_val_pairs)),
#         test_size=val_size,
#         random_state=42,
#         stratify=train_val_stratify_labels
#     )
#     train_pairs = np.array(train_val_pairs)[train_pairs_indices].tolist()
#     val_pairs = np.array(train_val_pairs)[val_pairs_indices].tolist()


KeyboardInterrupt: 

In [16]:
# Function to copy files
def copy_files(file_pairs, source_img_dir, source_label_dir, target_img_dir, target_label_dir):
    for img_file, label_file in file_pairs:
        # Copy image
        source_img_path = os.path.join(source_img_dir, img_file)
        target_img_path = os.path.join(target_img_dir, img_file)
        if os.path.exists(source_img_path):
            shutil.copy2(source_img_path, target_img_path)

        # Copy label
        source_label_path = os.path.join(source_label_dir, label_file)
        target_label_path = os.path.join(target_label_dir, label_file)
        if os.path.exists(source_label_path):
            shutil.copy2(source_label_path, target_label_path)

In [None]:
# Copy files to their respective directories
print("Copying training files...")
copy_files(train_pairs, os.path.join(data_dir, 'images'), os.path.join(data_dir, 'labels'), os.path.join(train_dir, 'images'), os.path.join(train_dir, 'labels'))

print("Copying validation files...")
copy_files(val_pairs, os.path.join(data_dir, 'images'), os.path.join(data_dir, 'labels'), os.path.join(val_dir, 'images'), os.path.join(val_dir, 'labels'))

print("Copying testing files...")
copy_files(test_pairs, os.path.join(data_dir, 'images'), os.path.join(data_dir, 'labels'), os.path.join(test_dir, 'images'), os.path.join(test_dir, 'labels'))

Copying training files...
Copying validation files...


In [None]:
print("Dataset splitting completed.")
print(f"Training set size: {len(train_pairs)} images")
print(f"Validation set size: {len(val_pairs)} images")
print(f"Testing set size: {len(test_pairs)} images")

In [None]:
# Function to get class distribution from a directory of label files
def get_class_distribution(label_dir):
    class_counts = {}
    if not os.path.exists(label_dir):
        return class_counts

    for filename in os.listdir(label_dir):
        if filename.lower().endswith('.txt'):
            label_filepath = os.path.join(label_dir, filename)
            try:
                with open(label_filepath, 'r') as f:
                    for line in f:
                        parts = line.strip().split()
                        if parts:
                            class_id = int(parts[0])
                            class_counts[class_id] = class_counts.get(class_id, 0) + 1
            except Exception as e:
                print(f"Error reading {filename}: {e}")
    return class_counts



In [None]:
# Get and print class distribution for each split
print("\nClass Distribution after Split:")

train_class_counts = get_class_distribution(os.path.join(train_dir, 'labels'))
print("\nTraining Set Class Distribution:")
if train_class_counts:
    for class_id, count in sorted(train_class_counts.items()):
        print(f"Class {class_id}: {count}")
else:
    print("No labels found in the training set.")

val_class_counts = get_class_distribution(os.path.join(val_dir, 'labels'))
print("\nValidation Set Class Distribution:")
if val_class_counts:
    for class_id, count in sorted(val_class_counts.items()):
        print(f"Class {class_id}: {count}")
else:
    print("No labels found in the validation set.")

test_class_counts = get_class_distribution(os.path.join(test_dir, 'labels'))
print("\nTesting Set Class Distribution:")
if test_class_counts:
    for class_id, count in sorted(test_class_counts.items()):
        print(f"Class {class_id}: {count}")
else:
    print("No labels found in the testing set.")

# Install Library

## Requirements

In [None]:
%cd /content/drive/MyDrive/Binus/MTI/DL/PTL_component/yolov7

/content/drive/MyDrive/Binus/MTI/DL/PTL_component/yolov7


In [None]:
!pip install -r requirements.txt

Collecting numpy<1.24.0,>=1.18.5 (from -r requirements.txt (line 5))
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Collecting protobuf<4.21.3 (from -r requirements.txt (line 14))
  Downloading protobuf-4.21.2-cp37-abi3-manylinux2014_x86_64.whl.metadata (540 bytes)
Collecting thop (from -r requirements.txt (line 36))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch!=1.12.0,>=1.7.0->-r requirements.txt (line 11))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch!=1.12.0,>=1.7.0->-r requirements.txt (line 11))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch!=1.12.0,>=1.7.0->-r requirements.txt (line 11))
  Downloading nvidia_cuda_cupti_cu12-

## Wandb

In [None]:
# Install wandb for experiment tracking (optional but recommended for grid search)
!pip install wandb



In [None]:
import wandb

wandb.login()

<IPython.core.display.Javascript object>

# Train Model

In [None]:
# prompt: train yolov7-tiny model with grid search and early stop
import yaml
import wandb
import subprocess
import json

# Define your dataset configuration file path
data_yaml_path = '/content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07/split_dataset/furnas_dataset.yaml' # Make sure this path is correct and the file exists

# Define the base configuration for yolov7-tiny
config_file = '/content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07/yolov7/cfg/training/yolov7-tiny.yaml' # Adjust path if necessary

In [None]:
# Define the grid search parameters
# Example grid search: varying batch size and learning rate
grid_params = {
    'batch_size': [16, 32],
    'epochs': [50], # Keep epochs relatively low for grid search iterations
    'img_size': [640],
    'cfg': [config_file],
    'data': [data_yaml_path],
    'weights': [''], # Start training from scratch or a pretrained tiny model
    'hyp': ['data/hyp.tiny.yaml'], # Use the default hyperparameter file for tiny
    'name': ['yolov7-tiny-grid-search'],
    'cache-images': [True],
    'patience': [10] # Early stopping patience
}

In [None]:
# Initialize Wandb
wandb.init(project='yolov7-tiny-ptl', entity='is-irfan-moh-binus-university', job_type="train_model")

In [None]:
# Function to train a single model configuration
def train_model(params):
    print("\n" + "="*50)
    print("Starting training with parameters:")
    print(json.dumps(params, indent=2))
    print("="*50 + "\n")

    # Construct the training command
    command = [
        'python', 'train.py',
        '--img-size', str(params['img_size']),
        '--batch-size', str(params['batch_size']),
        '--epochs', str(params['epochs']),
        '--data', params['data'],
        '--cfg', params['cfg'],
        '--weights', params['weights'],
        '--hyp', params['hyp'],
        '--name', params['name'],
        '--cache-images',
        '--patience', str(params['patience'])
    ]

    # Add --sync-bn if needed (for multi-GPU training, not typically needed in Colab)
    # command.append('--sync-bn')

    # Add --device if you want to specify GPU or CPU
    command.extend(['--device', '0']) # Use GPU 0
    # command.extend(['--device', 'cpu']) # Use CPU

    # Add --bbox_interval and --image_weights if using wandb and logging specific things
    command.extend(['--bbox_interval', '1', '--image_weights'])

    try:
        # Execute the training command
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()

        print("STDOUT:")
        print(stdout.decode('utf-8'))
        print("STDERR:")
        print(stderr.decode('utf-8'))

        if process.returncode != 0:
            print(f"Training failed with return code {process.returncode}")
        else:
            print("Training completed successfully.")

    except Exception as e:
        print(f"An error occurred during training: {e}")


In [None]:
# Simple grid search implementation
# This is a basic implementation. For more complex grid searches,
# consider libraries like sklearn.model_selection.ParameterGrid
# or tools integrated with experiment trackers like Wandb Sweeps.
from itertools import product

# Generate combinations of parameters
param_combinations = list(product(
    grid_params['batch_size'],
    grid_params['epochs'],
    grid_params['img_size'],
    grid_params['cfg'],
    grid_params['data'],
    grid_params['weights'],
    grid_params['hyp'],
    grid_params['name'],
    grid_params['cache-images'],
    grid_params['patience']
))

print(f"Total grid search runs: {len(param_combinations)}")

In [None]:
# Run training for each combination
for i, combo in enumerate(param_combinations):
    params = {
        'batch_size': combo[0],
        'epochs': combo[1],
        'img_size': combo[2],
        'cfg': combo[3],
        'data': combo[4],
        'weights': combo[5],
        'hyp': combo[6],
        'name': f"{combo[7]}_{i}", # Append index to name for unique runs
        'cache-images': combo[8],
        'patience': combo[9]
    }
    train_model(params)

print("\n" + "="*50)
print("Grid search completed.")
print("="*50)

# You can now analyze the results in the 'runs/train' directory or on Wandb if used.
# Look for the experiment directories named 'yolov7-tiny-grid-search_X'

# Evaluation

In [None]:
# prompt: evaluate model with test dataset

# After training is complete, evaluate the best model on the test set.

# Find the path to the best trained weights.
# This typically involves looking in the `runs/train` directory for the run
# with the best performance on the validation set (e.g., highest mAP@0.5).
# You'll need to identify the best run and the path to its 'best.pt' weights file.

# Example (you need to replace with the actual path):
# Assuming the best model from the grid search was run 0 and saved to runs/train/yolov7-tiny-grid-search_0/weights/best.pt
best_weights_path = '/content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07/yolov7/runs/train/yolov7-tiny-grid-search_0/weights/best.pt' # **UPDATE THIS PATH**

# Define the path to the test dataset images and labels
test_data_yaml_path = '/content/drive/MyDrive/Binus/MTI/DL/PTL_component/furnas_dataset_v0.07/split_dataset/furnas_dataset.yaml' # Use the same data.yaml, it should point to train, val, and test paths

# Define the image size used during training
img_size_for_eval = 640 # Should match the img_size used for the best training run

# Define the confidence and IoU thresholds for evaluation
# These are common values, you might adjust them based on your requirements
conf_thres = 0.001  # Object confidence threshold (lower to detect more objects)
iou_thres = 0.65    # IoU threshold for NMS and evaluation metrics (higher for stricter matching)

print("\n" + "="*50)
print("Starting model evaluation on the test set...")
print(f"Using weights: {best_weights_path}")
print(f"Using data config: {test_data_yaml_path}")
print("="*50 + "\n")

# Construct the evaluation command (using detect.py or test.py if available)
# YOLOv7 typically uses `test.py` for evaluation on a dataset split.
# If you want to detect on individual images and visualize, use `detect.py`.
# We'll use `test.py` for standard evaluation metrics (mAP).

eval_command = [
    'python', 'test.py',
    '--img-size', str(img_size_for_eval),
    '--data', test_data_yaml_path,
    '--weights', best_weights_path,
    '--batch-size', '32', # Use a batch size that fits in memory
    '--conf-thres', str(conf_thres),
    '--iou-thres', str(iou_thres),
    '--task', 'test', # Specify that we are evaluating the 'test' split
    '--name', 'yolov7-tiny-test-eval' # Name for the evaluation run directory
]

# Add --device if you want to specify GPU or CPU
eval_command.extend(['--device', '0']) # Use GPU 0
# eval_command.extend(['--device', 'cpu']) # Use CPU

try:
    # Execute the evaluation command
    process = subprocess.Popen(eval_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()

    print("STDOUT:")
    print(stdout.decode('utf-8'))
    print("STDERR:")
    print(stderr.decode('utf-8'))

    if process.returncode != 0:
        print(f"Evaluation failed with return code {process.returncode}")
    else:
        print("Evaluation completed successfully.")
        # The evaluation results (mAP, precision, recall, etc.) will be printed to stdout
        # and saved in the 'runs/test/yolov7-tiny-test-eval' directory.

except Exception as e:
    print(f"An error occurred during evaluation: {e}")

print("\n" + "="*50)
print("Evaluation completed.")
print("Check the output above and the 'runs/test' directory for results.")
print("="*50)

# Note: The path to the best weights (`best_weights_path`) is a placeholder.
# You need to manually identify the best training run from your grid search results
# (e.g., by checking the validation metrics printed during training or on Wandb)
# and update `best_weights_path` to point to the `weights/best.pt` file of that specific run.
# You might need to list the directories in `yolov7/runs/train` to find the correct path.
```