In [None]:
!pip install openai-whisper
!brew install ffmpeg  # For macOS (if using Homebrew)

In [1]:
import os
import subprocess
import time
import csv
from datetime import datetime
import whisper  # OpenAI Whisper for Speech-to-Text

# 📌 Define radio station streams
radio_stations = {
    "BBC World News": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service",
    "NPR News": "https://npr-ice.streamguys1.com/live.mp3",
    "Classic FM": "http://media-ice.musicradio.com/ClassicFMMP3",
    "Jazz FM": "http://media-ice.musicradio.com/JazzFMMP3",
    "Radio Paradise": "http://stream-dc1.radioparadise.com/aac-320",
    "Smooth Radio": "http://media-ice.musicradio.com/SmoothUKMP3"
}

# 📁 Output directories
output_dir = "RadioStream_30"
text_dir = "Transcriptions"
metadata_file = os.path.join(output_dir, "metadata.csv")

# 📌 Ensure directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(text_dir, exist_ok=True)

# 📌 Load Whisper model (small or base recommended for speed)
whisper_model = whisper.load_model("small")

# 📌 Open metadata CSV file
with open(metadata_file, "w", newline="", encoding="utf-8") as csvfile:
    fieldnames = ["Station", "File Name", "Timestamp", "Duration (s)", "Transcription File"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    # 🔄 Record 30 audio files
    for i in range(30):
        station_name, stream_url = list(radio_stations.items())[i % len(radio_stations)]  # Cycle through stations
        duration = 30 + (i % 61)  # Random duration between 30-90s
        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        audio_filename = f"{station_name.replace(' ', '_')}_{timestamp}.mp3"
        audio_filepath = os.path.join(output_dir, audio_filename)

        print(f"🎙️ Recording {station_name} for {duration}s...")

        # 🎵 Use FFmpeg to record audio stream
        command = [
            "ffmpeg",
            "-i", stream_url,
            "-t", str(duration),
            "-acodec", "mp3",
            "-y", audio_filepath  # Overwrite if exists
        ]
        subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        # 🎤 Convert audio to text using Whisper
        print(f"📝 Transcribing {audio_filename}...")
        transcription = whisper_model.transcribe(audio_filepath)["text"]

        # 📜 Save transcription
        text_filename = f"{station_name.replace(' ', '_')}_{timestamp}.txt"
        text_filepath = os.path.join(text_dir, text_filename)
        with open(text_filepath, "w", encoding="utf-8") as textfile:
            textfile.write(transcription)

        # 📝 Store metadata
        writer.writerow({
            "Station": station_name,
            "File Name": audio_filename,
            "Timestamp": timestamp,
            "Duration (s)": duration,
            "Transcription File": text_filename
        })

        print(f"✅ Done! Audio & transcription saved.\n")
        time.sleep(2)  # Small delay between recordings

print("🎉 Recording & Transcription Complete! All files are saved.")
