In [None]:
!pip install ultralytics

In [18]:
!pip install imageio[ffmpeg]

Collecting imageio-ffmpeg (from imageio[ffmpeg])
  Obtaining dependency information for imageio-ffmpeg from https://files.pythonhosted.org/packages/a9/1c/1b9c72bf839def47626436ea5ebaf643404f7850482c5fafd71a3deeaa94/imageio_ffmpeg-0.5.1-py3-none-win_amd64.whl.metadata
  Downloading imageio_ffmpeg-0.5.1-py3-none-win_amd64.whl.metadata (1.6 kB)
Downloading imageio_ffmpeg-0.5.1-py3-none-win_amd64.whl (22.6 MB)
   ---------------------------------------- 0.0/22.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/22.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/22.6 MB 991.0 kB/s eta 0:00:23
   ---------------------------------------- 0.1/22.6 MB 1.3 MB/s eta 0:00:18
   ---------------------------------------- 0.2/22.6 MB 1.4 MB/s eta 0:00:17
    --------------------------------------- 0.4/22.6 MB 2.4 MB/s eta 0:00:10
   - -------------------------------------- 0.7/22.6 MB 3.1 MB/s eta 0:00:08
   -- ------------------------------------- 1.6/22.6 MB 5.6 MB/

In [16]:
import os
name_video = "DJI_20240308110454_0007_V_1"

def list_files(directory):
    try:
        # List all files in the given directory
        files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
        return files
    except Exception as e:
        return str(e)  # Return the error message as a string

def list_directories(path='.'):
    return [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]

path_mot_dataset = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/dataset/{name_video}/valid/images"
#files_in_directory = [os.path.join(path_mot_dataset, file) for file in list_files(path_mot_dataset)]

In [17]:
files_sorted = []
for i in range(1, len(list_files(path_mot_dataset))+1):
    files_sorted.append(os.path.join(path_mot_dataset, f"frame_{i:05d}.png"))

In [18]:
import cv2

def convert_video_fps(input_video_path, output_video_path, desired_fps):
    """Convert video to a specified frame rate."""
    reader = imageio.get_reader(input_video_path, 'ffmpeg')
    input_fps = reader.get_meta_data()['fps']

    # Create a writer object for the output video
    writer = imageio.get_writer(output_video_path, fps=desired_fps)

    frame_count = 0
    accumulated_time = 0.0

    # Calculate the time per frame in the original and desired frame rates
    time_per_frame_input = 1.0 / input_fps
    time_per_frame_output = 1.0 / desired_fps

    for frame in reader:
        accumulated_time += time_per_frame_input
        if accumulated_time >= time_per_frame_output:
            writer.append_data(frame)
            accumulated_time -= time_per_frame_output
            frame_count += 1
    print(f"Total frames written: {frame_count}")
    reader.close()
    writer.close()

import imageio

def get_video_fps(video_path):
    """Get the frame rate of a video."""
    reader = imageio.get_reader(video_path, 'ffmpeg')
    fps = reader.get_meta_data()['fps']
    reader.close()
    return fps

def get_total_frames(video_path):
    """Get the total number of frames in a video."""
    reader = imageio.get_reader(video_path, 'ffmpeg')
    total_frames = reader.count_frames()
    reader.close()
    return total_frames

In [19]:
# Example usage
input_video = f"C:/Users/dnnxl/Downloads/{name_video}.mp4"  # Replace with your input video file path
output_video = f"{name_video}.mp4"  # Replace with your output video file path
convert_video_fps(input_video, output_video, 25)



Total frames written: 762


In [20]:
get_total_frames(input_video)

915

In [21]:
get_total_frames(output_video)

762

In [22]:
from ultralytics import YOLO, RTDETR


# 45 grados
# "C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/yolo11nCombined@fine-tuning/best.pt"
# vertical linea 2
# "C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/yolo11xLine2Vertical@fine-tuning/best_model.pt"
# Vertical linea 1
# "C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/yolo11xLine1Vertical@fine-tuning/best.pt"

model = YOLO("C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/yolo11n_DJI_0008_V_and_0010_V_2@fine-tuning/best.pt")

In [25]:
import cv2 
import os 

from collections import defaultdict
from typing import List, Dict, Union  

def get_video_duration(path):
    if not os.path.exists(path):
        raise ValueError(f"Video file not found: %s" % path)
    video = cv2.VideoCapture(path)
    fps = get_video_fps(path)
    frame_count = get_total_frames(path)
    duration = frame_count / fps 
    return frame_count, duration 

def predict_regions(files_in_directory, path):
    conf = 0.50 
    iou = 0.70 
    tracker_name = "C:/Users/dnnxl/Documents/GitHub/drone-sort/scripts/botsort.yaml"#"C:/Users/dnnxl/Documents/GitHub/drone-sort/scripts/botsort.yaml" 
    results = []
    for file in files_in_directory:
        frame = cv2.imread(file)
        result = model.track(
            frame, conf=conf, iou=iou, tracker=tracker_name, persist=True,
        )
        results.append(result[0])
    return create_video_rectangles(results, path)

def create_video_rectangles(results, path):
    label_map = ["Pineapple"]
    frames_count, duration = get_video_duration(path)
    model_names = "model_v1"
    tracks = defaultdict(list)
    track_labels = dict() 
    frame = -1 
    for result in results:
        frame += 1
        data = result.boxes 
        if not data.is_track:
            continue 

        for i, track_id in enumerate(data.id.tolist()):
            score = float(data.conf[i])
            x, y, w, h = data.xywhn[i].tolist() 
            print(x, y, w, h)

            model_label = "Pineapple"
            if model_label not in label_map:
                continue 
            track_labels[track_id] = model_label 

            box = {
                "frame": frame + 1,
                "enabled": True,
                "rotation": 0,
                "x": (x-w/2)*100,
                "y": (y-h/2)*100,
                "width": w*100,
                "height": h*100,
                "time": (frame+1)*(duration/frames_count),
                "score": score 
            }
            tracks[track_id].append(box)
    regions = []
    for track_id in tracks:
        sequence = tracks[track_id]
        sequence = process_lifespans_enabled(sequence)

        label = "Pineapple"
        region = {
            "from_name": "box",
            "to_name": "video",
            "type": "videorectangle",
            "value": {
                "frameCount": frames_count,
                "duration": duration,
                "sequence": sequence,
                "labels": [label]
            },
            "score": max([frame_info["score"] for frame_info in sequence]),
            "origin": "manual"
        }
        regions.append(region)
    return regions 

def process_lifespans_enabled(sequence: List[Dict]) -> List[Dict]:
    """This function detects gaps in the sequence of boxes 
    and disables lifespan line for the gaps assigning "enabled":False 
    to the last bboxes in the whole span sequence.
    """     
    prev = None 
    for i, box in enumerate(sequence):
        if prev is None:
            prev = sequence[i]
            continue 
        if box["frame"] - prev["frame"] > 1:
            sequence[i-1]["enabled"] = False 
        prev = sequence[i] 
    sequence[-1]["enabled"] = False 
    return sequence  

In [26]:
predictions = predict_regions(files_sorted, output_video)


0: 384x640 (no detections), 163.9ms
Speed: 51.8ms preprocess, 163.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 91.5ms
Speed: 0.0ms preprocess, 91.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 121.0ms
Speed: 0.0ms preprocess, 121.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 82.7ms
Speed: 3.7ms preprocess, 82.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 83.5ms
Speed: 2.2ms preprocess, 83.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 96.6ms
Speed: 0.0ms preprocess, 96.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 176.8ms
Speed: 0.0ms preprocess, 176.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 140.0ms
Speed: 3.0ms preprocess, 

In [16]:
import json 

json_to_save = {
    "predictions": [{
        "model_version": "v1",
        "result": predictions
    }]
}
filename = f"{name_video}.json"
with open(filename, "w") as json_file:
    json.dump(json_to_save, json_file, indent=4)

In [16]:
#path_video = "C:/Users/dnnxl/Downloads/DJI_20240308105544_0002_V_1.mp4"
#predictions = predict_regions(path_video)

video 1/1 (frame 34/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 10 pineapples, 119.3ms
video 1/1 (frame 35/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 10 pineapples, 118.4ms
video 1/1 (frame 36/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 10 pineapples, 119.6ms
video 1/1 (frame 37/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 12 pineapples, 155.2ms
video 1/1 (frame 38/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 13 pineapples, 117.0ms
video 1/1 (frame 39/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 12 pineapples, 108.5ms
video 1/1 (frame 40/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 12 pineapples, 135.1ms
video 1/1 (frame 41/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x640 12 pineapples, 137.5ms
video 1/1 (frame 42/392) C:\Users\dnnxl\Downloads\DJI_20240308105544_0002_V_1.mp4: 384x6

KeyboardInterrupt: 

In [None]:
import cv2
import re
from collections import defaultdict

import numpy as np
#track_history = defaultdict(lambda: [])
track_history = []

for file in files_in_directory:
    frame = cv2.imread(file)
    results = model.track(frame, persist=True, tracker="botsort.yaml")

    if results[0].boxes.id == None:
        continue
    else:
        match = re.search(r'frame_\d+\.png', file)
        if match:
            frame_file = match.group(0)
            frame_id = int(frame_file.split(".")[0].split("_")[1])
        for _, result in enumerate(results):
            for box in result.boxes:
                bbox = box.xyxy[0].tolist()  # Convert from tensor to list
                track_id = box.id.item()  # Get track id
                conf = box.conf.item()  # Get confidence score
                track_history.append((frame_id, track_id, bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1], conf, -1,-1,-1))

In [None]:
track_history

In [75]:
# Function to write MOT data to a text file
def write_mot_format(data, filename="botsort_mot_.txt"):
    with open(filename, "w") as f:
        for entry in data:
            # Formatting each entry as per MOT challenge format (one line per object)
            line = "{},{},{},{},{},{},{},{},{},{}\n".format(*entry)
            f.write(line)

# Call the function to write to file
write_mot_format(track_history)

In [None]:
import cv2
import re
from collections import defaultdict

import numpy as np
#track_history = defaultdict(lambda: [])
track_history = []

for file in files_in_directory:
    frame = cv2.imread(file)
    results = model.track(frame, persist=True, tracker="botsort.yaml")

    if results[0].boxes.id == None:
        continue
    else:
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        # Visualize the results on the frame
        annotated_frame = results[0].plot()
        for box, track_id in zip(boxes, track_ids):
            x, y, w, h = box

            match = re.search(r'frame_\d+\.png', file)
            if match:
                frame_file = match.group(0)
                frame_id = int(frame_file.split(".")[0].split("_")[1])
            #track = track_history[track_id]
            track_history.append((frame_id, track_id, float(x), float(y), float(w), float(h), 1, -1, -1, -1))  # x, y center point
            #if len(track) > 30:  # retain 90 tracks for 90 frames
            #    track.pop(0)

In [None]:
# Function to write MOT data to a text file
def write_mot_format(data, filename="botsort_mot.txt"):
    with open(filename, "w") as f:
        for entry in data:
            # Formatting each entry as per MOT challenge format (one line per object)
            line = "{},{},{},{},{},{},{},{},{},{}\n".format(*entry)
            f.write(line)

# Call the function to write to file
write_mot_format(track_history)

In [None]:
track_history