# üê± Chat Clip Classifier

In [1]:
# üì¶ Installation des d√©pendances
!pip install opencv-python moviepy torch torchvision ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to c:\users\nacer\appdata\local\temp\pip-req-build-ujhwb6br
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git 'C:\Users\nacer\AppData\Local\Temp\pip-req-build-ujhwb6br'


In [5]:
# üì¶ Imports n√©cessaires
import os
import torch
import clip
from PIL import Image
import cv2
import subprocess


In [23]:
# üè∑Ô∏è D√©finition des comportements du chat (labels CLIP)
labels = [
    "a cat playing with a toy",
    "a cat sleeping curled up",
    "a cat eating from a bowl",
    "a cat grooming itself",
    "a cat walking around",
    "a cat jumping",
    "a cat looking at the camera",
    "a cat hiding under furniture",
    "a cat running fast",
    "a cat sitting still",
    "a cat meowing with its mouth open",
    "a cat interacting affectionately with a human"
]
for label in labels:
    folder = f"clips_output/{label.replace(' ', '_')}"
    os.makedirs(folder, exist_ok=True)

In [25]:
# üß† Chargement du mod√®le CLIP
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
text_tokens = clip.tokenize(labels).to(device)


In [27]:
# üéûÔ∏è Extraction des frames d‚Äôune vid√©o
def extract_frames(video_path, output_folder, frame_rate=1):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    interval = int(fps * frame_rate)
    count = 0
    img_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if count % interval == 0:
            frame_path = os.path.join(output_folder, f"frame_{img_count}.jpg")
            cv2.imwrite(frame_path, frame)
            img_count += 1
        count += 1
    cap.release()


In [29]:
# üè∑Ô∏è Classification d‚Äôune frame avec CLIP
def classify_frame(image_path):
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        logits_per_image, _ = model(image, text_tokens)
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
    return labels[probs.argmax()], probs.max()


In [31]:
# ‚úÇÔ∏è D√©coupe d‚Äôun extrait vid√©o avec ffmpeg
def extract_clip(video_path, start_time, end_time, output_path):
    cmd = [
        "ffmpeg", "-i", video_path,
        "-ss", str(start_time),
        "-to", str(end_time),
        "-c", "copy", output_path
    ]
    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)


In [33]:
# üß© Traitement complet d'une vid√©o (extraction + classification + d√©coupe)
def process_video(video_path):
    basename = os.path.basename(video_path).split('.')[0]
    frames_folder = f"extracted_frames/{basename}"
    extract_frames(video_path, frames_folder)

    frame_files = sorted(os.listdir(frames_folder))
    if not frame_files:
        print(f"‚ö†Ô∏è Aucune frame extraite pour {basename}, vid√©o ignor√©e.")
        return

    timestamps = []
    for i, filename in enumerate(frame_files):
        label, prob = classify_frame(os.path.join(frames_folder, filename))
        timestamps.append((i, label))

    grouped = []
    current_label = timestamps[0][1]
    start = timestamps[0][0]
    for i in range(1, len(timestamps)):
        if timestamps[i][1] != current_label:
            grouped.append((current_label, start, timestamps[i-1][0]))
            current_label = timestamps[i][1]
            start = timestamps[i][0]
    grouped.append((current_label, start, timestamps[-1][0]))

    for idx, (label, start_frame, end_frame) in enumerate(grouped):
        duration = end_frame - start_frame + 1  # approx en secondes si frame_rate=1

        if duration < 2:
            print(f"‚è© Clip ignor√© ({label}) ‚Äì dur√©e trop courte : {duration} sec")
            continue  # on passe au suivant

        output_name = f"clips_output/{label.replace(' ', '_')}/{basename}_{label.replace(' ', '_')}_{idx}.mp4"
        extract_clip(video_path, start_frame, end_frame + 1, output_name)



In [35]:
# üîÅ Traitement automatique de toutes les vid√©os .mp4 et .mov
for video in os.listdir("input_videos"):
    if video.lower().endswith((".mp4", ".mov")):
        print(f"‚ñ∂Ô∏è Traitement de : {video}")
        process_video(f"input_videos/{video}")


‚ñ∂Ô∏è Traitement de : 003248DE-A118-43DC-8A99-FB8468499114.mp4
‚è© Clip ignor√© (a cat looking at the camera) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat hiding under furniture) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat sleeping curled up) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat hiding under furniture) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat looking at the camera) ‚Äì dur√©e trop courte : 1 sec
‚ñ∂Ô∏è Traitement de : 0DB5E928-8DDF-484C-9F49-960776FB3F83.mp4
‚è© Clip ignor√© (a cat grooming itself) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat grooming itself) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat interacting affectionately with a human) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat grooming itself) ‚Äì dur√©e trop courte : 1 sec
‚ñ∂Ô∏è Traitement de : 0FFB4727-0795-48F8-8335-1B196F4F3962.mp4
‚è© Clip ignor√© (a cat grooming itself) ‚Äì dur√©e trop courte : 1 sec
‚è© Clip ignor√© (a cat sleeping cu

In [1]:
import os
import cv2

input_dir = "input_videos"
output_dir = "clips_output"

# üì¶ 1. Nombre de vid√©os brutes
raw_videos = [f for f in os.listdir(input_dir) if f.lower().endswith((".mp4", ".mov"))]
nb_raw_videos = len(raw_videos)

# üìÅ 2. Parcours des clips g√©n√©r√©s
total_clips = 0
total_duration_sec = 0
clips_per_category = {}

for category in os.listdir(output_dir):
    cat_path = os.path.join(output_dir, category)
    if os.path.isdir(cat_path):
        clips = [f for f in os.listdir(cat_path) if f.endswith(".mp4")]
        clips_per_category[category] = len(clips)
        total_clips += len(clips)
        
        # Calculer la dur√©e totale
        for clip in clips:
            clip_path = os.path.join(cat_path, clip)
            cap = cv2.VideoCapture(clip_path)
            fps = cap.get(cv2.CAP_PROP_FPS)
            frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
            duration = frame_count / fps if fps > 0 else 0
            total_duration_sec += duration
            cap.release()

# ‚è±Ô∏è 3. Moyenne dur√©e clip
avg_duration = total_duration_sec / total_clips if total_clips > 0 else 0

# üìä R√©sum√©
print("üìä Statistiques du traitement vid√©o :\n")
print(f"üé• Vid√©os brutes trait√©es : {nb_raw_videos}")
print(f"üé¨ Clips g√©n√©r√©s : {total_clips}")
print(f"‚è±Ô∏è Dur√©e moyenne par clip : {avg_duration:.1f} sec")
print("üóÇÔ∏è R√©partition par cat√©gorie :")
for cat, count in clips_per_category.items():
    print(f"   - {cat} : {count} clips")


üìä Statistiques du traitement vid√©o :

üé• Vid√©os brutes trait√©es : 358
üé¨ Clips g√©n√©r√©s : 1157
‚è±Ô∏è Dur√©e moyenne par clip : 3.0 sec
üóÇÔ∏è R√©partition par cat√©gorie :
   - a_cat_eating_from_a_bowl : 57 clips
   - a_cat_grooming_itself : 248 clips
   - a_cat_hiding_under_furniture : 146 clips
   - a_cat_interacting_affectionately_with_a_human : 160 clips
   - a_cat_jumping : 113 clips
   - a_cat_looking_at_the_camera : 44 clips
   - a_cat_meowing_with_its_mouth_open : 55 clips
   - a_cat_playing_with_a_toy : 151 clips
   - a_cat_running_fast : 56 clips
   - a_cat_sitting_still : 6 clips
   - a_cat_sleeping_curled_up : 74 clips
   - a_cat_walking_around : 47 clips
