# 1. Imports

In [None]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import numpy as np
from tqdm import tqdm


# 2. Merging datasets

## Seaship

In [None]:
source = 'sea_ship/seaship.v1i.yolov8/test/images'
source_1 = 'sea_ship/seaship.v1i.yolov8/train/images'
destination = 'merged_dataset/images'

# gather all files
allfiles = os.listdir(source)
allfiles_1 = os.listdir(source_1)

# iterate on all files to move them to destination folder
for f in allfiles:
    src_path = os.path.join(source, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

for f in allfiles_1:
    src_path = os.path.join(source_1, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)


In [None]:
source_labels = 'sea_ship/seaship.v1i.yolov8/test/labels'
source_1_labels = 'sea_ship/seaship.v1i.yolov8/train/labels'
destination = 'merged_dataset/labels'

# gather all files
allfiles = os.listdir(source_labels)
allfiles_1 = os.listdir(source_1_labels)

# iterate on all files to move them to destination folder
for f in allfiles:
    src_path = os.path.join(source_labels, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

for f in allfiles_1:
    src_path = os.path.join(source_1_labels, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

## Singapore Maritime Dataset

### Replacing numbering of the labels

In [None]:
class_mapping = {0: 6, 1: 7, 2: 8, 3: 9, 4: 10, 5: 11, 6: 12, 7: 13, 8: 14}

input_dir = "Singapore maritime.v5i.yolov8/valid/labels"
output_dir = "Singapore maritime.v5i.yolov8/valid/labels_new"

os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(input_dir):
    if filename.endswith('.txt'):
        with open(os.path.join(input_dir, filename), 'r') as f_in, \
             open(os.path.join(output_dir, filename), 'w') as f_out:
            for line in f_in:
                parts = line.strip().split()
                if parts:
                    old_class = int(parts[0])
                    new_class = class_mapping.get(old_class, old_class)
                    parts[0] = str(new_class)
                    f_out.write(' '.join(parts) + '\n')

## Moving the images and labels to the desired location

In [None]:
source = "Singapore maritime.v5i.yolov8/train/images"
source_1 = "Singapore maritime.v5i.yolov8/valid/images"
source_2 = "Singapore maritime.v5i.yolov8/test/images"
destination = "merged_dataset/images"

# gather all files
allfiles = os.listdir(source)
allfiles_1 = os.listdir(source_1)
allfiles_2 = os.listdir(source_2)

# iterate on all files to move them to destination folder
for f in allfiles:
    src_path = os.path.join(source, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

for f in allfiles_1:
    src_path = os.path.join(source_1, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

for f in allfiles_2:
    src_path = os.path.join(source_2, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)


In [None]:
source = "Singapore maritime.v5i.yolov8/train/labels_new"
source_1 = "Singapore maritime.v5i.yolov8/valid/labels_new"
source_2 = "Singapore maritime.v5i.yolov8/test/labels_new"
destination = "merged_dataset/labels"

# gather all files
allfiles = os.listdir(source)
allfiles_1 = os.listdir(source_1)
allfiles_2 = os.listdir(source_2)

# iterate on all files to move them to destination folder
for f in allfiles:
    src_path = os.path.join(source, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

for f in allfiles_1:
    src_path = os.path.join(source_1, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

for f in allfiles_2:
    src_path = os.path.join(source_2, f)
    dst_path = os.path.join(destination, f)
    os.rename(src_path, dst_path)

# 2. Data exploration

## Building up the dataframe w/ current files

In [None]:
import glob
import os
import pandas as pd

# Directory containing label .txt files
labels_dir = 'merged_dataset/labels'
files = sorted(glob.glob(os.path.join(labels_dir, '*.txt')))

rows = []
if not files:
    # No label files found: create an empty dataframe with expected columns
    print('No label files found in', labels_dir)
    labels_df = pd.DataFrame(columns=['label_file', 'image_file', 'class', 'x', 'y', 'w', 'h'])
else:
    for fp in files:
        basename = os.path.basename(fp)
        image_name = os.path.splitext(basename)[0] + '.jpg'
        # read non-empty lines
        with open(fp, 'r', encoding='utf-8') as f:
            lines = [ln.strip() for ln in f.readlines() if ln.strip() != '']
        if not lines:
            # file had no labels; create one row with NaNs for class and coords
            rows.append({
                'label_file': basename,
                'image_file': image_name,
                'class': pd.NA,
                'x': pd.NA,
                'y': pd.NA,
                'w': pd.NA,
                'h': pd.NA
            })
        else:
            for ln in lines:
                parts = ln.split()
                # YOLO format: class x_center y_center width height
                try:
                    cls = int(parts[0]) if len(parts) >= 1 else pd.NA
                except ValueError:
                    cls = pd.NA
                coords = [pd.NA, pd.NA, pd.NA, pd.NA]
                if len(parts) >= 5:
                    try:
                        coords = [float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])]
                    except ValueError:
                        coords = [pd.NA, pd.NA, pd.NA, pd.NA]
                rows.append({
                    'label_file': basename,
                    'image_file': image_name,
                    'class': cls,
                    'x': coords[0],
                    'y': coords[1],
                    'w': coords[2],
                    'h': coords[3],
                })
    # build dataframe from parsed rows
    labels_df = pd.DataFrame(rows)

# compute summary stats
total_files = len(files)
total_boxes = int(labels_df['class'].notna().sum()) if not labels_df.empty else 0
empty_label_files = int(labels_df.loc[labels_df['class'].isna(), 'label_file'].nunique()) if not labels_df.empty else 0
print(f'Total label files scanned: {total_files}')
print(f'Total bounding boxes (rows): {total_boxes}')
print(f'Empty label files: {empty_label_files}')

# show a sample preview
labels_df.head(10)

## Summing up and plotting the class distribution

In [None]:
# Try to load class names from merged_dataset/data.yaml
class_names = {}
data_yaml_path = 'merged_dataset/data.yaml'
if os.path.exists(data_yaml_path):
    try:
        import yaml
        with open(data_yaml_path, 'r', encoding='utf-8') as f:
            data = yaml.safe_load(f)
        names = data.get('names') if isinstance(data, dict) else None
        if isinstance(names, dict):
            # keys may be strings; convert to ints
            class_names = {int(k): v for k, v in names.items()}
        elif isinstance(names, list):
            class_names = {i: n for i, n in enumerate(names)}
    except Exception:
        # fallback: simple parse for a YAML 'names' block
        try:
            with open(data_yaml_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()
            names_list = []
            in_names = False
            for ln in lines:
                s = ln.strip()
                if s.startswith('names:'):
                    # might be inline list or start of block
                    rest = s.split('names:', 1)[1].strip()
                    if rest.startswith('['):
                        # literal list, try eval safely
                        import ast
                        try:
                            names_list = ast.literal_eval(rest)
                        except Exception:
                            names_list = []
                        break
                    else:
                        in_names = True
                        continue
                if in_names:
                    if s.startswith('-'):
                        names_list.append(s.lstrip('-').strip().strip('"'))
                    else:
                        break
            class_names = {i: n for i, n in enumerate(names_list)}
        except Exception:
            class_names = {}
else:
    print(f'No data.yaml at {data_yaml_path}; falling back to numeric class IDs')

# If labels_df is empty or has no class entries, show message
if 'labels_df' not in globals() or labels_df.empty or labels_df['class'].dropna().empty:
    print('No labeled bounding boxes to plot.')
else:
    counts = labels_df['class'].dropna().astype(int).value_counts().sort_index()
    idx = list(counts.index)
    vals = counts.values
    x_labels = [class_names.get(i, str(i)) for i in idx]
    plt.figure(figsize=(12,6))
    bars = plt.bar(x_labels, vals, color='tab:blue')
    plt.title('Class Distribution')
    plt.ylabel('Number of bounding boxes')
    plt.xlabel('Class')
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    # annotate counts on bars
    for bar in bars:
        h = bar.get_height()
        plt.annotate(f'{int(h)}', xy=(bar.get_x() + bar.get_width() / 2, h), xytext=(0, 3), textcoords='offset points', ha='center', va='bottom', fontsize=9)
    plt.tight_layout()
    plt.show()


## Creating video with ground truth

In [None]:
# Paths - adjust these to your setup
images_dir = "merged_dataset/images"
labels_dir = "merged_dataset/labels"
output_video = "ground_truth_video_singapore_maritime.mp4"

# Class names dictionary (adjust based on your dataset)
class_names = {
    0: "bulk cargo carrier",
    1: "container ship",
    2: "fishing boat",
    3: "general cargo ship",
    4: "ore carrier",
    5: "passenger ship",
    6: "Boat",
    7: "Buoy",
    8: "Ferry",
    9: "Flying bird-plane",
    10: "Kayak",
    11: "Other",
    12: "Sail boat",
    13: "Speed boat",
    14: "Vessel-ship",
}

# Filter only Singapore Maritime dataset images (they start with 'MVI')
image_files = sorted(glob.glob(os.path.join(images_dir, "MVI*.jpg")))
# Try other extensions if needed
if not image_files:
    image_files = sorted(glob.glob(os.path.join(images_dir, "MVI*.png")))

# Check if we found any images
if not image_files:
    print("No Singapore Maritime dataset images found!")
else:
    # Read first image to get dimensions
    first_img = cv2.imread(image_files[0])
    height, width = first_img.shape[:2]

    # Define the output video writer using mp4v codec, 30fps
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, 30.0, (width, height))

    # Process each image
    print(f"Processing {len(image_files)} images...")

    for idx, img_path in enumerate(image_files):
        # Print progress message
        if (idx + 1) % 10 == 0:
            print(f"Processed {idx + 1}/{len(image_files)} images")

        # Read the image
        img = cv2.imread(img_path)

        # Skip if image couldn't be loaded
        if img is None:
            print(f"Warning: Could not load {img_path}")
            continue

        # Get corresponding label file
        img_name = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(labels_dir, f"{img_name}.txt")

        # Get all bounding boxes for this image
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    parts = line.strip().split()

                    # Skip if no class information
                    if len(parts) < 5:
                        continue

                    class_id = int(parts[0])
                    x_center, y_center, box_width, box_height = map(
                        float, parts[1:5]
                    )

                    # Convert YOLO format to pixel coordinates
                    x_center_px = int(x_center * width)
                    y_center_px = int(y_center * height)
                    box_width_px = int(box_width * width)
                    box_height_px = int(box_height * height)

                    # Calculate the corner points from center, width, height
                    x1 = int(x_center_px - box_width_px / 2)
                    y1 = int(y_center_px - box_height_px / 2)
                    x2 = int(x_center_px + box_width_px / 2)
                    y2 = int(y_center_px + box_height_px / 2)

                    # Draw rectangle (green color)
                    cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

                    # Get class name if available from the class_names dict
                    class_label = class_names.get(class_id, f"Class_{class_id}")

                    # Put class name text above the box
                    cv2.putText(
                        img, class_label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
                    )

        # Write frame to video
        out.write(img)

    # Release the video writer
    out.release()
    #print(f"Video saved to {output_video}")
    #print(f"Total frames: {len(image_files)}")

In [None]:
for label, label_name in class_names.items():
    print(f"Class ID: {label}, Class Name: {label_name}")

In [None]:
labels_df = labels_df.loc[(labels_df['class'] != 7) & (labels_df['class'] != 9) & (labels_df['class'] != 11) & (labels_df['class'].notna())]
labels_df['class'].replace({4:0, 5:0, 1:0, 0:0, 3:0, 2:0, 14:0, 6:1, 13:1, 8:1, 12:1, 10:1}, inplace=True)

In [None]:
labels_df = labels_df.loc[(labels_df['class'] != 7) & (labels_df['class'] != 9) & (labels_df['class'] != 11) & (labels_df['class'].notna())]
labels_df['class'].replace({1:0}, inplace=True)

# Image preparation for the training

## Dividing data into train and test datasets

In [None]:
train_set = labels_df.sample(frac=0.7, random_state=42)

# Dropping all those indexes from the dataframe that exists in the train_set
test_set = labels_df.drop(train_set.index)
train_set.shape, test_set.shape

valid_set = test_set.sample(frac=0.33, random_state=42)
test_set = test_set.drop(valid_set.index)
train_set.shape, test_set.shape, valid_set.shape

In [None]:
valid_set

## Creating folders for train, test, valid splits

In [None]:
import shutil

In [None]:
for file in train_set['image_file'].unique():
    src_path = os.path.join('merged_dataset/images', file)
    dst_path = os.path.join('final_dataset/train/images', file)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    shutil.copy2(src_path, dst_path)

for file_label in train_set['label_file'].unique():
    src_path = os.path.join('merged_dataset/labels', file_label)
    dst_path = os.path.join('final_dataset/train/labels', file_label)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    shutil.copy2(src_path, dst_path)

for file in valid_set['image_file'].unique():
    src_path = os.path.join('merged_dataset/images', file)
    dst_path = os.path.join('final_dataset/valid/images', file)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    shutil.copy2(src_path, dst_path)

for file_label in valid_set['label_file'].unique():
    src_path = os.path.join('merged_dataset/labels', file_label)
    dst_path = os.path.join('final_dataset/valid/labels', file_label)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    shutil.copy2(src_path, dst_path)

for file in test_set['image_file'].unique():
    src_path = os.path.join('merged_dataset/images', file)
    dst_path = os.path.join('final_dataset/test/images', file)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    shutil.copy2(src_path, dst_path)

for file_label in test_set['label_file'].unique():
    src_path = os.path.join('merged_dataset/labels', file_label)
    dst_path = os.path.join('final_dataset/test/labels', file_label)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    shutil.copy2(src_path, dst_path)

In [None]:
import gc
gc.collect()
t = gc.get_threshold()
t

## Training the yolo model w/out augmentation

In [None]:
import albumentations as A

train_transform = A.Compose([
    # --- Geometry ---
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.15),   # flip
    A.ShiftScaleRotate(
        shift_limit=0.05,
        scale_limit=0.15,
        rotate_limit=10,
        border_mode=0,
        p=0.7
    ),

    # --- Noise & blur ---
    A.GaussNoise(var_limit=(10, 50), p=0.4),
    A.MotionBlur(blur_limit=5, p=0.2),

    # --- Weather / bridges shadow simulation ---
    A.RandomShadow(
        shadow_roi=(0, 0.4, 1, 1),
        num_shadows_limit=(1, 2),
        shadow_dimension=5,
        p=0.25
    ),

    A.RandomFog(
        fog_coef_lower=0.1,
        fog_coef_upper=0.3,
        p=0.12
    ),

    # --- Color ---
    A.RandomBrightnessContrast(
        brightness_limit=0.25,
        contrast_limit=0.3,
        p=0.6
    ),
    A.HueSaturationValue(
        hue_shift_limit=10,
        sat_shift_limit=20,
        val_shift_limit=15,
        p=0.4
    ),

    # --- Compression ---
    A.ImageCompression(quality_lower=30, quality_upper=100, p=0.4),

],
    bbox_params=A.BboxParams(
        format='yolo',
        min_area=16,
        min_visibility=0.1,
        label_fields=['labels']
    )
)


In [None]:
from ultralytics import YOLO

# Load a COCO-pretrained YOLOv8n model
model = YOLO("yolov8n.pt")

# Display model information (optional)
model.info()

# Train the model on the COCO8 example dataset for 100 epochs
results = model.train(data="final_dataset/data.yaml", epochs=10, imgsz=640)
results

In [None]:
import gc
gc.collect()

## Fine tuning

In [None]:
from ultralytics import YOLO

# Ładowanie modelu
model = YOLO(r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\runs\detect\train\weights\best.pt")

# Trening z agresywniejszą augmentacją pod trudne światło
results = model.train(
    data="fine_set/data.yaml",
    epochs=15,          # Krótki fine-tuning
    imgsz=1280,         # Duża rozdzielczość (dobrze dla małych detali na moście)

    # --- AUGMENTACJE FOTOMETRYCZNE (Kluczowe dla słońca) ---
    hsv_h=0.015,        # Zmiana odcienia (delikatna)
    hsv_s=0.7,          # Zmiana nasycenia (agresywna - słońce zmienia kolory)
    hsv_v=0.6,          # Zmiana jasności (agresywna - symulacja cienia i prześwietlenia)

    # --- AUGMENTACJE GEOMETRYCZNE ---
    degrees=5.0,        # Lekkie obroty (kamera może się chybotać na wietrze)
    translate=0.1,      # Przesunięcie obrazu
    scale=0.5,          # Skalowanie (ważne, gdy obiekt jest raz blisko, raz daleko)
    fliplr=0.5,         # Odbicie lustrzane poziome (prawie zawsze warto)

    # --- SPECJALNE ---
    mosaic=1.0,         # Włączone (uczy kontekstu)
    mixup=0.1,
    batch = -1,        # Nakładanie obrazów na siebie (opcjonalne, może pomóc w gęstym tłumie/detalach)
)

New https://pypi.org/project/ultralytics/8.3.242 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.235  Python-3.12.5 torch-2.4.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=fine_set/data.yaml, degrees=5.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=15, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.6, imgsz=1280, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.1, mode=train, model=C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\runs\detec

## Trying it out on the previously recorded video

In [None]:
from ultralytics import YOLO

# 1. Load your trained YOLO model
model = YOLO(
    r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\runs\detect\train2\weights\best.pt"
)

# 2. Run prediction on an image
results = model.predict(
    source=r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\15_12_2025\frames_sb1\frame_20251215_104955.jpg",
    save=True,           # save output image with boxes into runs/detect/predict/
    show=False,          # set to True only if you want a popup window
    conf=0.4,           # reasonable confidence threshold
    iou=0,            # standard NMS IoU threshold
)
# 3. Display results
results[0].show()  # show results for the first image


image 1/1 C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\15_12_2025\frames_sb1\frame_20251215_104955.jpg: 736x1280 1 ship, 46.5ms
Speed: 9.4ms preprocess, 46.5ms inference, 2.1ms postprocess per image at shape (1, 3, 736, 1280)
Results saved to [1mC:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\runs\detect\predict4[0m


In [None]:
# 1. Load your trained YOLO model
model = YOLO(
     r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\runs\detect\train\weights\best.pt"
)

# 2. Run prediction on an image
results = model.predict(
    source=r"C:\Users\szymo\OneDrive\Wideo\Clipchamp\Video Project\Exports\Video Project.mp4",
    save=True,           # save output image with boxes into runs/detect/predict/
    show=False,          # set to True only if you want a popup window
    conf=0.4,           # reasonable confidence threshold
    iou=0,            # standard NMS IoU threshold
)
# 3. Display results
results[0].show()  # show results for the first image


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/8364) C:\Users\szymo\OneDrive\Wideo\Clipchamp\Video Project\Exports\Video Project.mp4: 736x1280 (no detections), 41.6ms
video 1/1 (frame 2/8364) C:\Users\szymo\OneDrive\Wideo\Clipchamp\Video Project\Exports\Video Project.mp4: 736x1280 (no detections), 38.1ms
video 1/1 (frame 3/8364) C:\Users\szymo\OneDrive\Wideo\Clipchamp\Video Project\Exports\Video Project.mp4: 736x1280 1 ship, 37.2ms
video 1/1 (frame 4/8364) C:\Users\szymo\OneDrive\Wideo\Clipc

In [None]:
del results
import gc
gc.collect()

44855

## Trying the code on the photos from the whole day

In [None]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
from pathlib import Path

# 1. Load your trained YOLO model
model = YOLO(r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\runs\detect\train2\weights\best.pt")

# 2. Define paths
input_folder = r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\30_12_2025\frames_sb1"
output_folder = r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\30_12_2025\frames_sb1_detect"

# 3. Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# 4. Define water region as bottom 60% of image height
def create_water_mask(img_shape):
    """Create a binary mask for the water region (bottom 60% of image)"""
    height, width = img_shape[:2]
    mask = np.zeros((height, width), dtype=np.uint8)

    # Water starts at 40% from top (so bottom 60% is water)
    water_start_y = int(height * 0.4)
    mask[water_start_y:, :] = 255

    return mask

def filter_detections_in_roi(boxes, mask):
    """Filter detections to only include those in the water region"""
    filtered_indices = []

    for idx, box in enumerate(boxes):
        # Get bounding box coordinates
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())

        # Calculate center point of bounding box
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Check if center point is in the water mask
        if mask[center_y, center_x] > 0:
            filtered_indices.append(idx)

    return filtered_indices

# 5. Get all image files from the input folder
image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']
image_files = [f for f in os.listdir(input_folder)
               if os.path.splitext(f)[1].lower() in image_extensions]

print(f"Found {len(image_files)} images to process")

# 6. Iterate through each image
for img_file in image_files:
    img_path = os.path.join(input_folder, img_file)

    # Load image to get dimensions and create mask
    img = cv2.imread(img_path)
    if img is None:
        print(f"Could not read: {img_file}")
        continue

    # Create water mask (bottom 60% of image)
    water_mask = create_water_mask(img.shape)

    # Run prediction on full image
    results = model.predict(
        source=img_path,
        save=False,
        show=False,
        conf=0.5,
        iou=0.5
    )

    # Filter detections to only water region
    if len(results[0].boxes) > 0:
        valid_indices = filter_detections_in_roi(results[0].boxes, water_mask)

        if valid_indices:
            # Keep only detections in water region
            results[0].boxes = results[0].boxes[valid_indices]

            # Save the image with filtered detections
            output_path = os.path.join(output_folder, img_file)
            results[0].save(filename=output_path)
            print(f"Saved: {img_file} ({len(valid_indices)} detections in water)")
        else:
            print(f"Skipped: {img_file} (no detections in water region)")
    else:
        pass  # No detections at all

print(f"\nProcessing complete! Detected images saved to: {output_folder}")
print("Analyzing bottom 60% of each image (water region)")

Found 1529 images to process

image 1/1 C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\30_12_2025\frames_sb1\frame_20251230_080627.jpg: 736x1280 (no detections), 43.7ms
Speed: 6.0ms preprocess, 43.7ms inference, 0.5ms postprocess per image at shape (1, 3, 736, 1280)

image 1/1 C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\30_12_2025\frames_sb1\frame_20251230_080643.jpg: 736x1280 (no detections), 30.5ms
Speed: 6.0ms preprocess, 30.5ms inference, 0.7ms postprocess per image at shape (1, 3, 736, 1280)

image 1/1 C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\30_12_2025\frames_sb1\frame_20251230_080659.jpg: 736x1280 (no detections), 27.9ms
Speed: 6.1ms preprocess, 27.9ms inference, 0.6ms postprocess per image at shape (1, 3, 736, 1280)

image 1/1 C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\sprogo_first_colab_frames\30_12_2025\frames_sb1\frame

## Running it on the livestream

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from datetime import datetime
import os
from IPython.display import display, Image, clear_output
import ipywidgets as widgets
from threading import Thread
import time

# --- 1. KONFIGURACJA ---
# Ścieżka do modelu
model_path = r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\runs\detect\train2\weights\best.pt"
model = YOLO(model_path)

CAMERA_SOURCE = "https://stream.sob.m-dn.net/live/sb1/vKVhWPO2ysiYNGrNfA+Krw1/stream.m3u8?plain=true"
CONFIDENCE_THRESHOLD = 0.5
IOU_THRESHOLD = 0.0
WATER_REGION_PERCENT = 0.6
PROCESS_EVERY_N_FRAMES = 1
SHOW_WATER_LINE = False

# --- KONFIGURACJA ZAPISU AUTOMATYCZNEGO ---
SAVE_DETECTIONS = True
OUTPUT_FOLDER = r"C:\Users\szymo\Desktop\DTU\3rd_semester\Individual_project_demo\live_detections"
SAVE_COOLDOWN = 2.0  # Ile sekund czekać przed kolejnym zapisem tego samego obiektu

if SAVE_DETECTIONS:
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# --- 2. KLASA DO OBSŁUGI KAMERY BEZ LAGÓW ---
class CameraStream:
    def __init__(self, src=0):
        self.stream = cv2.VideoCapture(src)
        self.stream.set(cv2.CAP_PROP_BUFFERSIZE, 1)
        (self.ret, self.frame) = self.stream.read()
        self.stopped = False

    def start(self):
        Thread(target=self.update, args=(), daemon=True).start()
        return self

    def update(self):
        while True:
            if self.stopped:
                return
            (self.ret, self.frame) = self.stream.read()
            time.sleep(0.005)

    def read(self):
        return self.frame

    def stop(self):
        self.stopped = True
        self.stream.release()

# --- 3. FUNKCJE POMOCNICZE ---
def create_water_mask(frame_shape):
    height, width = frame_shape[:2]
    mask = np.zeros((height, width), dtype=np.uint8)
    water_start_y = int(height * (1 - WATER_REGION_PERCENT))
    mask[water_start_y:, :] = 255
    return mask, water_start_y

def filter_detections_in_roi(boxes, mask):
    filtered_indices = []
    for idx, box in enumerate(boxes):
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2
        if 0 <= center_y < mask.shape[0] and 0 <= center_x < mask.shape[1]:
            if mask[center_y, center_x] > 0:
                filtered_indices.append(idx)
    return filtered_indices

def draw_water_line(frame, water_start_y):
    cv2.line(frame, (0, water_start_y), (frame.shape[1], water_start_y), (0, 255, 255), 2)
    cv2.putText(frame, "Water Region", (10, water_start_y + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

# --- 4. INTERFEJS ---
start_button = widgets.Button(description="Start Auto-Detect", button_style='success')
stop_button = widgets.Button(description="Stop Camera", button_style='danger')
status_label = widgets.Label(value="Ready")
image_widget = widgets.Image(format='jpeg', width=800, height=600)

display(widgets.HBox([start_button, stop_button]))
display(status_label)
display(image_widget)

running = False
video_stream = None

def start_camera(b):
    global running
    if not running:
        running = True
        status_label.value = "Starting..."
        Thread(target=main_processing_loop, daemon=True).start()

def stop_camera(b):
    global running
    running = False
    status_label.value = "Stopping..."

start_button.on_click(start_camera)
stop_button.on_click(stop_camera)

# --- 5. GŁÓWNA PĘTLA Z AUTO-ZAPISEM ---
def main_processing_loop():
    global running, video_stream

    video_stream = CameraStream(src=CAMERA_SOURCE).start()
    time.sleep(1.0) # Rozgrzewka kamery

    first_frame = video_stream.read()
    if first_frame is None:
        status_label.value = "Error: Camera returned None"
        running = False
        return

    water_mask, water_line_y = create_water_mask(first_frame.shape)

    frame_counter = 0
    detection_total_count = 0
    cached_boxes = []

    # Zmienna do kontroli czasu zapisu
    last_save_time = 0

    while running:
        frame = video_stream.read()
        if frame is None:
            continue

        frame_counter += 1
        annotated_frame = frame.copy()

        # --- DETEKCJA ---
        if frame_counter % PROCESS_EVERY_N_FRAMES == 0:
            results = model.predict(source=frame, save=False, show=False, conf=CONFIDENCE_THRESHOLD, iou=IOU_THRESHOLD, verbose=False)

            cached_boxes = []
            if len(results[0].boxes) > 0:
                valid_indices = filter_detections_in_roi(results[0].boxes, water_mask)
                if valid_indices:
                    detection_total_count += 1 # To tylko licznik ogólny
                    for idx in valid_indices:
                        box = results[0].boxes[idx]
                        coords = tuple(map(int, box.xyxy[0].cpu().numpy()))
                        conf = float(box.conf[0])
                        cls = int(box.cls[0])
                        cached_boxes.append((coords, conf, cls))

        # --- RYSOWANIE ---
        detections_now = len(cached_boxes) # Ile obiektów widzimy TERAZ na ekranie

        for (coords, conf, cls) in cached_boxes:
            x1, y1, x2, y2 = coords
            cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            label = f"{model.names[cls]} {conf:.2f}"
            cv2.putText(annotated_frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        if SHOW_WATER_LINE:
            draw_water_line(annotated_frame, water_line_y)

        # Info overlay
        info_text = f"Auto-Save Mode | Detections: {detections_now}"
        # Jeśli trwa cooldown, wyświetl informację
        if time.time() - last_save_time < SAVE_COOLDOWN:
            color = (0, 0, 255) # Czerwony (czekanie)
        else:
            color = (0, 255, 0) # Zielony (gotowy do zapisu)

        cv2.putText(annotated_frame, info_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

        timestamp_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        cv2.putText(annotated_frame, timestamp_str, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        # --- AUTO SAVE LOGIC ---
        # Jeśli coś wykryto I minął czas cooldownu
        current_time = time.time()
        if detections_now > 0 and (current_time - last_save_time > SAVE_COOLDOWN):
            if SAVE_DETECTIONS:
                ts_filename = datetime.now().strftime("%Y%m%d_%H%M%S_%f") # %f dodaje milisekundy dla unikalności
                save_path = os.path.join(OUTPUT_FOLDER, f"auto_detect_{ts_filename}.jpg")
                cv2.imwrite(save_path, annotated_frame)

                last_save_time = current_time # Reset licznika czasu
                status_label.value = f"AUTO-SAVED: {save_path}"

        # Wyświetlanie
        _, buffer = cv2.imencode('.jpg', annotated_frame)
        image_widget.value = buffer.tobytes()

        time.sleep(0.001)

    video_stream.stop()
    status_label.value = "Stopped"

print("=== YOLO Auto-Save Mode ===")
print(f"Zdjęcia będą zapisywane automatycznie co {SAVE_COOLDOWN}s, gdy wykryto obiekt.")

HBox(children=(Button(button_style='success', description='Start Auto-Detect', style=ButtonStyle()), Button(bu…

Label(value='Ready')

Image(value=b'', format='jpeg', height='600', width='800')

=== YOLO Auto-Save Mode ===
Zdjęcia będą zapisywane automatycznie co 2.0s, gdy wykryto obiekt.
