In [None]:
!pip install youtube_dl

In [None]:
!python3 -m pip install -U yt-dlp

In [None]:
!pip install opencv-python

In [None]:
!pip install torch

In [None]:
!pip install librosa

In [None]:
!pip install numpy

In [None]:
!pip install torchvision

In [None]:
!pip install ffmpeg

In [None]:
# error fix change youtube.py to 'uploader_id': self._search_regex(r'/(?:channel/|user/|@)([^/?&#]+)', owner_profile_url, 'uploader id', default=None),

In [None]:
#youtube_ids = ['lJiyXBRglgs','q7tqLBK5HTQ','-8zo9XKvnEs','ktnmaWrxCww']

In [None]:
import cv2
import youtube_dl
import torch
import os
import librosa
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

def download_video(video_url, output_path):
    ydl_opts = {
        'format': 'bestvideo[height<=480]+bestaudio/best[height<=480]',
        'outtmpl': output_path,
    }

    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
      ydl.download([video_url])

def extract_central_frame(video_path, output_path):
    cap = cv2.VideoCapture(video_path)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    central_frame_index = total_frames // 2

    cap.set(cv2.CAP_PROP_POS_FRAMES, central_frame_index)
    ret, frame = cap.read()

    if ret:
        cv2.imwrite(output_path, frame)
        print(f"Central frame extracted and saved to {output_path}")
    else:
        print("Failed to extract central frame")

    cap.release()

def extract_audio(video_url, output_path):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_path.replace('.wav', '.%(ext)s'),
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
            'preferredquality': '192',
        }],
    }

    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])

class YouTubeDataset(Dataset):
    def __init__(self, video_ids_file, transform=None):
        self.video_ids = self.load_video_ids(video_ids_file)
        self.transform = transform

    def load_video_ids(self, video_ids_file):
        with open(video_ids_file, 'r') as file:
            video_ids = [line.strip() for line in file.readlines()]
        return video_ids

    def __len__(self):
        return len(self.video_ids)

    def __getitem__(self, idx):
        video_id = self.video_ids[idx]
        video_url = f'https://youtu.be/{video_id}'
        video_output_path = f'temp_video_{video_id}.mp4'
        frame_output_path = f'output_frame_{video_id}.jpg'
        audio_output_path = f'output_audio_{video_id}.wav'

        # Extract audio
        extract_audio(video_url, audio_output_path)
        # Read the audio
        audio, _ = librosa.load(audio_output_path, sr=None)

        # Download video and extract central frame
        download_video(video_url, video_output_path)
        extract_central_frame(video_output_path, frame_output_path)

        # Read the frame
        frame = cv2.imread(frame_output_path)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Apply transforms if specified
        if self.transform:
            frame = self.transform(frame)

        # Clean up temporary files
        os.remove(video_output_path)
        os.remove(frame_output_path)
        os.remove(audio_output_path)

        return {'frame': frame, 'audio': audio}

In [None]:
#video_id = 'lJiyXBRglgs'
#video_url = f'https://youtu.be/{video_id}'
#video_output_path = f'temp_video_{video_id}.mp4'
#frame_output_path = f'output_frame_{video_id}.jpg'
#audio_output_path = f'output_audio_{video_id}.wav'
#download_video(video_url, video_output_path)
#extract_central_frame(video_output_path, frame_output_path)
#extract_audio(video_url, audio_output_path)

In [None]:
# e.g. video_ids_file = dataset text file
video_ids_file = "dataset_file.txt"

transform = transforms.Compose([transforms.ToTensor()])  # Add more transforms if needed
dataset = YouTubeDataset(video_ids_file, transform=transform)
#dataset = YouTubeDataset(video_ids_file, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True) # Adjust batch size

# Iterate over the dataloader
for batch in dataloader:
    frames, audios = batch['frame'], batch['audio']
    # Process the batch and pass it to your model
    #pass