In [None]:
import os
import pandas as pd
import yt_dlp
import librosa
import re
from concurrent.futures import ThreadPoolExecutor

In [None]:
def download_audio_from_youtube(youtube_url, output_folder):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': os.path.join(output_folder, '%(title)s.%(ext)s'),
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'ffmpeg_location': '/opt/homebrew/bin/ffmpeg',
        'quiet': True,
        'no_warnings': True,
        'prefer_ffmpeg': True,
        'keepvideo': False
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([youtube_url])
        
def process_row(row, output_folder):
    youtube_url = row['Youtube Link']
    download_audio_from_youtube(youtube_url, output_folder)


def process_csv_and_download_audio(csv_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    df = pd.read_csv(csv_path)
    df = df.dropna(subset=['Youtube Link'])
    
    with ThreadPoolExecutor(max_workers=20) as executor:
        futures = [executor.submit(process_row, row, output_folder) for _, row in df.iterrows()]
        for future in futures:
            future.result()
            
# Main execution
csv_file_path = r"specify path"  # Path to your dataseet
output_folder = r"specify path"  # Folder to save downloaded audio

process_csv_and_download_audio(csv_file_path, output_folder)