In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
import tensorflow_hub as hub
import yt_dlp as youtube_dl
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

VIDEO_SAVE_PATH = "kaggle/working/videos"



In [None]:

def download_video_segment(video_id, start_time, end_time):
    """Download and save the video segment from YouTube using yt-dlp."""
    try:
        # Configure yt-dlp options
        ydl_opts = {
            'format': 'bestvideo+bestaudio/best',  # Download best video and audio
            'outtmpl': os.path.join(VIDEO_SAVE_PATH, f"{video_id}.%(ext)s"),  # Set save path template
            'noplaylist': True,  # Don't download playlists
            'quiet': False,  # Show logs
        }

        # Download the video using yt-dlp
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([f'https://www.youtube.com/watch?v={video_id}'])
        
        # After downloading, extract the segment from the video
        video_path = os.path.join(VIDEO_SAVE_PATH, f"{video_id}.mp4")
        segment_path = os.path.join(VIDEO_SAVE_PATH, f"{video_id}_segment.mp4")
        ffmpeg_extract_subclip(video_path, start_time, end_time, targetname=segment_path)
        
        os.remove(video_path)  # Clean up original video file
        return segment_path
    except Exception as e:
        print(f"Error downloading video {video_id}: {e}")
        return None


def load_i3d_model():
    """Load a pretrained I3D model from TensorFlow Hub."""
    model_url = "https://tfhub.dev/deepmind/i3d-kinetics-400/1"
    model = hub.KerasLayer(model_url, trainable=False, input_shape=(None, 224, 224, 3))
    return model


def preprocess_video(video_path):
    """Preprocess video into frames for I3D model."""
    frames = []
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)  # Get frames per second of the video
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Select 16 frames (adjust the frame sampling strategy if needed)
    frame_indices = np.linspace(0, frame_count - 1, num=16, dtype=int)
    
    for idx in frame_indices:
        video.set(cv2.CAP_PROP_POS_FRAMES, idx)  # Jump to the specific frame
        ret, frame = video.read()
        if not ret:
            break
        frame_resized = cv2.resize(frame, (224, 224))  # Resize for model input
        frames.append(frame_resized)

    frames = np.array(frames) / 255.0  # Normalize the frames to [0, 1]
    frames = frames.astype(np.float32)  # Ensure correct dtype
    return frames


def predict_action(video_path, model):
    """Predict action from video using I3D."""
    frames = preprocess_video(video_path)
    frames = np.expand_dims(frames, axis=0)  # Add batch dimension
    prediction = model(frames)  # Using the model directly, since KerasLayer is callable
    return prediction

In [2]:
#Running example video

video_id = '-5KQ66BBWC4'
clip_duration = 2
timestamp = 922
start_time = max(0, timestamp - clip_duration // 2)  # Ensure non-negative start time
end_time = start_time + clip_duration
video_path = download_video_segment(video_id, start_time, end_time)
print("video saved")
if video_path:
    model = load_i3d_model()
    prediction = predict_action(video_path, model)
    print(f"Predicted Action: {prediction}")