## Configurations Setup

In [1]:
# Install the dependencies from venv terminal

#pip install yt-dlp
#pip install "numpy<2.0"

In [3]:
import os
import yt_dlp

TEST_LINK = "https://www.youtube.com/watch?v=w4sJoZ9D1YM"
OUTPUR_DIR = "downloads"
OUTPUT_VIDEO = "video.mp4"
OUTPUT_AUDIO = "audio"

os.makedirs("downloads", exist_ok=True)

## Download Video

In [None]:
ydl_opts = {
    'format': 'best',  # Download best quality
    'outtmpl': f"{OUTPUR_DIR}/{OUTPUT_VIDEO}",
    'noplaylist': True,  # Download single video, not playlist
    'progress_hooks': [lambda d: print(f"Downloading: {d['_percent_str']} complete")],
    
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([TEST_LINK])
    
print("Download complete!")

## Download Audio as .mp3

In [4]:
ydl_opts = {
    'format': 'bestaudio/best',  # Select best audio format
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',  # Extract audio using FFmpeg
        'preferredcodec': 'mp3',      # Convert to MP3
        'preferredquality': '192',    # Set quality (kbps)
    }],
    'outtmpl': f"{OUTPUR_DIR}/{OUTPUT_AUDIO}",  # Output file name
    'noplaylist': True,               # Download single video, not playlist
    'progress_hooks': [lambda d: print(f"Downloading: {d['_percent_str']} complete")],
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([TEST_LINK])

print("Audio download complete!")

[youtube] Extracting URL: https://www.youtube.com/watch?v=w4sJoZ9D1YM
[youtube] w4sJoZ9D1YM: Downloading webpage
[youtube] w4sJoZ9D1YM: Downloading tv client config
[youtube] w4sJoZ9D1YM: Downloading player 69f581a5
[youtube] w4sJoZ9D1YM: Downloading tv player API JSON
[youtube] w4sJoZ9D1YM: Downloading ios player API JSON


  File "/Users/hissain/git/github/youtuber-debunked/.venv/lib/python3.11/site-packages/yt_dlp/cache.py", line 43, in store
    write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
  File "/Users/hissain/git/github/youtuber-debunked/.venv/lib/python3.11/site-packages/yt_dlp/utils/_utils.py", line 190, in write_json_file
    tf = tempfile.NamedTemporaryFile(
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/tempfile.py", line 563, in NamedTemporaryFile
    file = _io.open(dir, mode, buffering=buffering,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/tempfile.py", line 560, in opener
    fd, name = _mkstemp_inner(dir, prefix, suffix, flags, output_type)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/tempfile.py", line 256, in _mkstemp_inner

[youtube] w4sJoZ9D1YM: Downloading m3u8 information
[info] w4sJoZ9D1YM: Downloading 1 format(s): 251
[download] Destination: downloads/audio
[download]   0.0% of    2.69MiB at  746.18KiB/s ETA 00:03Downloading:   0.0% complete
[download]   0.1% of    2.69MiB at  994.07KiB/s ETA 00:02Downloading:   0.1% complete
[download]   0.3% of    2.69MiB at    1.61MiB/s ETA 00:01Downloading:   0.3% complete
[download]   0.5% of    2.69MiB at    2.61MiB/s ETA 00:01Downloading:   0.5% complete
[download]   1.1% of    2.69MiB at    4.15MiB/s ETA 00:00Downloading:   1.1% complete
[download]   2.3% of    2.69MiB at    6.89MiB/s ETA 00:00Downloading:   2.3% complete
[download]   4.6% of    2.69MiB at    7.24MiB/s ETA 00:00Downloading:   4.6% complete
[download]   9.3% of    2.69MiB at    7.55MiB/s ETA 00:00Downloading:   9.3% complete
[download]  18.5% of    2.69MiB at    9.08MiB/s ETA 00:00Downloading:  18.5% complete
[download]  37.1% of    2.69MiB at   10.78MiB/s ETA 00:00Downloading:  37.1% complete

## Using BanglaASR

In [5]:
import os
import librosa
import torch
import torchaudio
import numpy as np
import requests

from transformers import WhisperTokenizer, WhisperProcessor, WhisperFeatureExtractor, WhisperForConditionalGeneration

# Configurations
local_mp3_path = "downloads/audio.mp3"
local_output_path = "downloads/transcription.txt"
chunk_duration = 30  # Seconds per chunk
sampling_rate_target = 16000

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_path = "bangla-speech-processing/BanglaASR"

# Load Whisper components
feature_extractor = WhisperFeatureExtractor.from_pretrained(model_path)
tokenizer = WhisperTokenizer.from_pretrained(model_path)
processor = WhisperProcessor.from_pretrained(model_path)
model = WhisperForConditionalGeneration.from_pretrained(model_path).to(device)

# Load audio file
speech_array, sampling_rate = torchaudio.load(local_mp3_path)

# Convert to NumPy and Resample
speech_array = speech_array[0].numpy()
speech_array = librosa.resample(speech_array, orig_sr=sampling_rate, target_sr=sampling_rate_target)

# Split into chunks
chunk_size = chunk_duration * sampling_rate_target  # Number of samples per chunk
num_chunks = int(np.ceil(len(speech_array) / chunk_size))

transcriptions = []

for i in range(num_chunks):
    print(f"Processing chunk {i + 1} of {num_chunks}...")
    start = i * chunk_size
    end = min((i + 1) * chunk_size, len(speech_array))
    
    chunk = speech_array[start:end]
    input_features = feature_extractor(chunk, sampling_rate=sampling_rate_target, return_tensors="pt").input_features.to(device)

    # Generate transcription
    predicted_ids = model.generate(input_features)[0]
    transcription = processor.decode(predicted_ids, skip_special_tokens=True)
    transcriptions.append(transcription)

# Merge transcriptions
full_transcription = " ".join(transcriptions)

with open(local_output_path, "w", encoding="utf-8") as f:
        f.write(full_transcription)

print(full_transcription)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Processing chunk 1 of 7...
Processing chunk 2 of 7...
Processing chunk 3 of 7...
Processing chunk 4 of 7...
Processing chunk 5 of 7...
Processing chunk 6 of 7...
Processing chunk 7 of 7...
বিষয়মার্যার ভোররাতে নিজের ভেটিভাইট ফেসভুক পেইচ থেকেকে ওই স্টেটাসে হাসনাত দাবি করেন যে ক্যান্টনমেন্ট থেকে আওয়ামী লিককে পুনরবাসনের পরিকল্পনা করা হচ্ছে,ওই স্টেটাসে তিনি দাবি করেন, তিনি সহ আরো কয়েকজন ছাত্রণে সমযোতার বিনিময বিষয়টিকে রিফান্ড আওয়ামী লীগ নামে নতুন একটি শরযন্ত্র হিসাবে বর্ণনা করেছেন, তিনি দাবি করেন নতুন নেতৃত্বের অধীনে আওয়ামী লীগকে রাজনীতিতে ফিরিয়ানার পরিকল্পনা চলছে। এতোমধ্যে একাধিক রাজনৈতিক দলকেও একই পুনরবাচনে রাজ্যেতাসে।াহাপ� বিএনপির জ্যেষ্ঠ যুগ্ম মহাসচিব রুহল কবির রিজভি শুক্রবার ঢাকার একনুষ্ঠানে বলেছেন, হত্যা ও লুটপাটের সঙ্গে জড়িত নয় এমন কারও নেতৃত্বে আওয়ামী লীগের রাজনীতিতে কোনো বাধানী। যে লোক, আওয়ামীলীগীতে নৃত্যাশ বেন, সেযুটি আপরাধ না করে, সেযুটি ছাত্র হত্যা না করে, সেয়োদি কোনো অর্থলোপাঠ না করে, টাকা পাচার না করে, এরকম লোক দিদ্য নৃত্যাশে, তাদের আপত্তি থাকবেন।  দলটির আমির শফিক র