<a href="https://colab.research.google.com/github/guy-meld-ai/meld-ml/blob/main/MELD_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.4.10-py3-none-any.whl.metadata (38 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.4.10-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.4.10 ultralytics-thop-2.0.18


In [1]:
!git clone https://github.com/guy-meld-ai/meld-ml.git

Cloning into 'meld-ml'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (3/3), 4.58 MiB | 17.78 MiB/s, done.


In [104]:
import cv2
import numpy as np
import json
import os
import base64
from ultralytics import YOLO
import sys
from google.colab import userdata
from datetime import datetime, timedelta

try:
    from openai import OpenAI
    os.environ['OPENAI_API_KEY'] = userdata.get('OPEN_AI')
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    print("OpenAI client initialized.")
except ImportError:
    print("Error: 'openai' library not found. Please run 'pip install openai'")
    sys.exit()
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    print("Please make sure your OPENAI_API_KEY environment variable is set correctly.")
    sys.exit()

class FrameQualityFilter:
    def __init__(self, min_mean=20, max_mean=235, blur_threshold=60, std_threshold=5,
                 motion_threshold=0.005, failure_ratio_limit=0.5):
        self.min_mean = min_mean
        self.max_mean = max_mean
        self.blur_threshold = blur_threshold
        self.std_threshold = std_threshold
        self.motion_threshold = motion_threshold
        self.failure_ratio_limit = failure_ratio_limit
        self.prev_gray = None

    def check_quality(self, frame):
        if frame is None: return False, "Empty"
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        mean_val, std_val = np.mean(gray), np.std(gray)

        if mean_val < self.min_mean or mean_val > self.max_mean:
            return False, f"Bad Exposure ({mean_val:.1f})"
        if std_val < self.std_threshold:
            return False, f"Low Contrast ({std_val:.1f})"

        blur_score = cv2.Laplacian(gray, cv2.CV_64F).var()
        if blur_score < self.blur_threshold:
            return False, f"Blurry ({blur_score:.1f})"

        is_moving, reason = True, "Valid"
        if self.prev_gray is not None:
            diff = cv2.absdiff(self.prev_gray, gray)
            _, thresh = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
            changed_ratio = np.count_nonzero(thresh) / float(gray.size)
            if changed_ratio < self.motion_threshold:
                is_moving, reason = False, f"Static ({changed_ratio*100:.3f}% change)"

        self.prev_gray = gray
        return is_moving, reason

class DogBehaviorPipeline:
    def __init__(self, model_path, llm_model, quality_filter, question = "What the dog is doing?", heartbeat_interval=10, stability_threshold=2):
        self.model = YOLO(model_path)
        self.llm_model = llm_model
        self.filter = quality_filter
        self.question = question
        self.heartbeat_interval = heartbeat_interval
        self.stability_threshold = stability_threshold

        self.last_confirmed_label = None
        self.current_candidate = None
        self.stability_counter = 0
        self.frames_since_llm = 0

        # State for repetition
        self.last_yolo_output = None
        self.last_llm_description = None

        # Safety: Track consecutive static 'Kept' frames
        self.consecutive_kept_count = 0

    def get_best_detection(self, results):
        for r in results:
            if len(r.boxes) == 0: return None
            idx = int(r.boxes.conf.argmax())
            return {
                "bbox": r.boxes.xyxy[idx].tolist(),
                "conf": round(float(r.boxes.conf[idx]), 3),
                "label": self.model.names[int(r.boxes.cls[idx])]
            }
        return None

    def calculate_iou(self, box1, box2):
        # box format: [x1, y1, x2, y2]
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])

        intersection = max(0, x2 - x1) * max(0, y2 - y1)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

        union = area1 + area2 - intersection
        if union == 0: return 0
        return intersection / union

    def analyze_frame_with_llm(self, frame_data):
        _, buffer = cv2.imencode('.jpg', frame_data)
        base64_image = base64.b64encode(buffer).decode('utf-8')
        data_url = f"data:image/jpeg;base64,{base64_image}"

        try:
            response = client.responses.create(
            model=self.llm_model,
            input=[
                {
                        "role": "system",
                        "content": "You are an expert canine behaviorist. Answer with objective information only, make no guesses or assumptions."
                    },
                    {
                        "role": "user",
                        "content": [
                            {"type": "input_text", "text": self.question},
                            {"type": "input_image", "image_url": data_url, "detail": "high"}
                        ]
                    }
            ],
            text={
             "verbosity": "low"
            }

        )
            return response.output_text
        except Exception as e:
            return f"Error analyzing frame: {e}"

    def process_chunk(self, frame_array, chunk_start_dt, fps):
        chunk_data = []
        dropped = 0
        static_preserved_count = 0

        for i, frame in enumerate(frame_array):
            offset_seconds = i / fps
            current_dt = chunk_start_dt + timedelta(seconds=offset_seconds)
            timestamp_str = current_dt.strftime("%H:%M:%S.%f")[:-4] # HH:MM:SS.mm

            is_valid, reason = self.filter.check_quality(frame)

            # --- Force re-check on first frame of chunk ---
            if i == 0 and not is_valid and "Static" in reason:
                is_valid = True
                reason = "Chunk Start Recheck"
            # ----------------------------------------------

            # --- Force re-check if static safety limit reached ---
            if not is_valid and "Static" in reason and self.consecutive_kept_count >= 10:
                is_valid = True
                reason = "Static Safety Recheck"
            # ----------------------------------------------

            frame_entry = {
                "timestamp": timestamp_str,
                "status": "Valid",
                "yolo_output": None,
                "llm_description": None
            }

            trigger_llm = False

            if is_valid:
                results = self.model(frame, verbose=False)
                best = self.get_best_detection(results)
                current_label = best['label'] if best else "no_dog"
                frame_entry["yolo_output"] = best

                # --- IoU Trigger Check ---
                if best and self.last_yolo_output:
                    iou = self.calculate_iou(best['bbox'], self.last_yolo_output['bbox'])
                    if iou < 0.3:
                        trigger_llm = True
                        best['label'] = "walk"
                        current_label = "walk"
                # -------------------------

                # Update state
                self.last_yolo_output = best
                self.consecutive_kept_count = 0

                if current_label != self.last_confirmed_label:
                    if current_label == self.current_candidate:
                        self.stability_counter += 1
                    else:
                        self.current_candidate = current_label
                        self.stability_counter = 1

                    # Check for immediate trigger condition (Initial detection)
                    is_initial = (self.last_confirmed_label is None or self.last_confirmed_label == "no_dog") and current_label != "no_dog"
                    threshold = 1 if is_initial else self.stability_threshold

                    if self.stability_counter >= threshold:
                        self.last_confirmed_label = current_label
                        trigger_llm = True

            elif "Static" in reason and self.last_yolo_output is not None:
                # Increment safety counter
                self.consecutive_kept_count += 1

                # Special handling for Static frames to repeat behavior
                frame_entry["status"] = "Kept"
                frame_entry["yolo_output"] = self.last_yolo_output
                static_preserved_count += 1

            else:
                frame_entry["status"] = "Dropped"
                frame_entry["reason"] = reason
                dropped += 1

            # --- Common LLM Logic for Valid and Kept frames ---
            if frame_entry["status"] in ["Valid", "Kept"]:
                self.frames_since_llm += 1

                # Heartbeat check
                if self.frames_since_llm >= self.heartbeat_interval:
                    trigger_llm = True

                # Only call LLM if trigger is Active AND we have a valid YOLO detection
                if trigger_llm and frame_entry["yolo_output"] is not None:
                    desc = self.analyze_frame_with_llm(frame)
                    frame_entry["llm_description"] = desc
                    self.last_llm_description = desc
                    self.frames_since_llm = 0
            # --------------------------------------------------

            chunk_data.append(frame_entry)


        return chunk_data, (dropped / len(frame_array))

def run_video_to_json(video_path, pipeline, output_dir="behavior_analysis_output", chunk_seconds=30, target_fps=None, video_start_time="00:00:00"):
    if not os.path.exists(output_dir): os.makedirs(output_dir)

    cap = cv2.VideoCapture(video_path)
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    if video_fps <= 0: video_fps = 30

    # Parse start time
    try:
        start_dt = datetime.strptime(video_start_time, "%H:%M:%S")
    except ValueError:
        print("Warning: Invalid start time format. Using 00:00:00")
        start_dt = datetime.strptime("00:00:00", "%H:%M:%S")

    # Calculate step for frame skipping
    step = 1
    processing_fps = video_fps
    if target_fps is not None and target_fps > 0:
        step = max(1, int(round(video_fps / target_fps)))
        processing_fps = video_fps / step
        print(f"Processing at ~{processing_fps:.2f} FPS (Video FPS: {video_fps:.2f}, Step: {step})")

    frames_per_chunk_video = int(video_fps * chunk_seconds)
    chunk_index = 0

    while cap.isOpened():
        frames_to_process = []
        frames_read_count = 0

        # Collect frames covering the duration of 'chunk_seconds'
        while frames_read_count < frames_per_chunk_video:
            ret, frame = cap.read()
            if not ret: break

            # Maintain continuity of step count across chunks relative to global grid
            global_frame_idx = (chunk_index * frames_per_chunk_video) + frames_read_count

            if global_frame_idx % step == 0:
                frames_to_process.append(frame)

            frames_read_count += 1

        if frames_read_count == 0: break

        # Calculate current chunk start time (Time of Day)
        chunk_offset_seconds = chunk_index * chunk_seconds
        current_chunk_dt = start_dt + timedelta(seconds=chunk_offset_seconds)

        print(f"--> Processing Chunk {chunk_index} (Time: {current_chunk_dt.strftime('%H:%M:%S')})...")

        results, fail_rate = pipeline.process_chunk(frames_to_process, current_chunk_dt, processing_fps)

        # Include start time in filename
        time_label = current_chunk_dt.strftime("%H-%M-%S")
        chunk_filename = os.path.join(output_dir, f"{time_label}.json")

        output_payload = {
            "chunk_id": chunk_index,
            "start_time": current_chunk_dt.strftime("%H:%M:%S"),
            "quality_fail_rate": round(fail_rate, 3),
            "processing_fps": round(processing_fps, 2),
            "frames": results
        }

        with open(chunk_filename, 'w') as f:
            json.dump(output_payload, f, indent=4)

        chunk_index += 1

    cap.release()
    print("Workflow finished.")

OpenAI client initialized.


In [None]:
filter_obj = FrameQualityFilter(failure_ratio_limit=0.5)
pipe = DogBehaviorPipeline("/content/meld-ml/yolo_v26n_Dog.pt", "gpt-5.1", filter_obj, question = "What the dog is doing?", heartbeat_interval=15, stability_threshold=2)

In [None]:
run_video_to_json(
    "/content/drive/MyDrive/RingXMELD/Product/POC/Videos/DONE/Annika Bremhorst/vid8.mp4",
    pipe,
    output_dir="/content/drive/MyDrive/RingXMELD/AI/Experiments/video_run",
    chunk_seconds=30,
    target_fps=1,
    video_start_time="00:00:00")

In [110]:
import os
import glob

output_dir = "/content/drive/MyDrive/RingXMELD/AI/Experiments/video_run"
json_files = sorted(glob.glob(os.path.join(output_dir, "*.json")))

total_frames = 0
dog_frames = 0
kept_frames = 0
confidences = []
timeline = []
behavior_counts = {}

print(f"Found {len(json_files)} chunk files in {output_dir}.")

for jf in json_files:
    try:
        with open(jf, 'r') as f:
            data = json.load(f)

        for frame in data.get('frames', []):
            total_frames += 1

            if frame.get('status') == 'Kept':
                kept_frames += 1

            # Check for YOLO detection
            if frame.get('yolo_output'):
                dog_frames += 1
                confidences.append(frame['yolo_output']['conf'])

                # Aggregate Behavior Counts
                label = frame['yolo_output']['label']
                behavior_counts[label] = behavior_counts.get(label, 0) + 1

            # Check for LLM description
            if frame.get('llm_description'):
                desc = frame['llm_description']
                # Simple check to filter out obvious API errors from the timeline display if desired
                # if "Error" not in desc:
                timeline.append({
                     "time": frame['timestamp'],
                     "description": desc
                 })
    except Exception as e:
        print(f"Error reading {jf}: {e}")

# --- Display Statistics ---
avg_conf = np.mean(confidences) if confidences else 0
none_count = total_frames - dog_frames

print("\n" + "="*40)
print("       DOG BEHAVIOR ANALYTICS       ")
print("="*40)
print(f"Total Frames Processed: {total_frames}")
print(f"Frames with Dog Detected: {dog_frames} ({ (dog_frames/total_frames*100) if total_frames else 0:.1f}%)")
print(f"Frames with 'Kept' Status: {kept_frames}")
print(f"Average Confidence Score: {avg_conf:.3f}")

print("-" * 20)
print("YOLO Behavior Distribution (Including 'None'):")
sorted_behaviors = sorted(behavior_counts.items(), key=lambda x: x[1], reverse=True)

if total_frames > 0:
    for label, count in sorted_behaviors:
        pct = (count / total_frames) * 100
        print(f"  - {label}: {count} ({pct:.1f}%)")

    pct_none = (none_count / total_frames) * 100
    print(f"  - None (No Detection): {none_count} ({pct_none:.1f}%)")
else:
    print("  No frames processed.")

print("-" * 20)
print("YOLO Behavior Distribution (Excluding 'None'):")
if dog_frames > 0:
    for label, count in sorted_behaviors:
        pct = (count / dog_frames) * 100
        print(f"  - {label}: {count} ({pct:.1f}%)")
else:
    print("  No dog behaviors detected.")

print("="*40)

# --- Display Timeline ---
print("\nBehavior Timeline:")
print("-"*40)
if not timeline:
    print("No behavior descriptions recorded.")
else:
    for event in timeline:
        # Adjusted format to handle string timestamps (HH:MM:SS.mm)
        print(f"[{event['time']}] {event['description']}")
print("-"*40)

Running analysis verification (IoU override)...
Found 5 chunk files in /content/drive/MyDrive/RingXMELD/AI/Experiments/video_run.

       DOG BEHAVIOR ANALYTICS       
Total Frames Processed: 123
Frames with Dog Detected: 21 (17.1%)
Frames with 'Kept' Status: 2
Average Confidence Score: 0.644
--------------------
YOLO Behavior Distribution (Including 'None'):
  - unknown: 9 (7.3%)
  - sniff: 7 (5.7%)
  - walk: 3 (2.4%)
  - stand: 2 (1.6%)
  - None (No Detection): 102 (82.9%)
--------------------
YOLO Behavior Distribution (Excluding 'None'):
  - unknown: 9 (42.9%)
  - sniff: 7 (33.3%)
  - walk: 3 (14.3%)
  - stand: 2 (9.5%)

Behavior Timeline:
----------------------------------------
[00:00:51.01] The dog is standing on the rug, head lowered toward the floor, appearing to sniff or investigate the area in front of its front paws.
[00:00:57.02] The dog is standing or sitting still near the bottom center of the image, facing toward the room.
[00:01:08.00] The dog is standing near the left