# Downloading Audio with the RSS Feed

## Notebook Setup

In [1]:
# Importing the necessary Python libraries
import os
import json
import yaml

import feedparser
import mlx_whisper
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Loading the RSS feed from file if file exists
if os.path.exists('../keys/watercooler_rss_feed.yaml'):
    with open('../keys/watercooler_rss_feed.yaml', 'r') as f:
        wc_rss_feed_url = yaml.safe_load(f)['WATERCOOLER_RSS_FEED']
else:
    wc_rss_feed_url = os.getenv('WATERCOOLER_RSS_FEED')

In [3]:
# Instantiating the object representing the RSS feed
wc_rss_feed = feedparser.parse(wc_rss_feed_url)

In [4]:
len(wc_rss_feed['entries'])

364

In [5]:
response = requests.get(wc_rss_feed['entries'][1]['links'][1]['href'], stream = True)

with open('test.mp3', 'wb') as f:
    for chunk in response.iter_content(chunk_size = 1024):
        f.write(chunk)

In [None]:
import time

model_types = [
    'whisper-tiny',
    'whisper-base-mlx',
    'whisper-small-mlx',
    'whisper-medium-mlx',
    'whisper-large-v3-mlx',
    'whisper-large-v3-turbo'
]

runtimes = []

for model_type in model_types:

    if os.path.exists(f"results/results_{model_type}.txt"):
        continue

    start_time = time.time()
    
    text = mlx_whisper.transcribe('test.mp3', path_or_hf_repo=f'mlx-community/{model_type}')['text']
    
    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    
    with open(f"results/results_{model_type}.txt", "w") as file:
        file.write(text)

if not os.path.exists("results/runtimes.txt"):
    with open("results/runtimes.txt", "w") as file:
        for model_type, runtime in zip(model_types, runtimes):
            file.write(f"{model_type}: {runtime}\n")

Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 76959.71it/s]
Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 129055.51it/s]
Fetching 4 files: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it]
Fetching 4 files: 100%|██████████| 4/4 [00:15<00:00,  3.91s/it]
Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 31242.49it/s]
