In [None]:
import os

os.environ["WANDB_DISABLED"] = "true"

In [1]:
# Install required packages (run this cell once)
!pip install diffusers transformers accelerate scipy torch --upgrade



In [2]:
import random
from diffusers import AudioLDMPipeline
import torch
import scipy.io.wavfile as wavfile

In [3]:
# Load the AudioLDM pipeline (diffusion-based text-to-audio model)
pipe = AudioLDMPipeline.from_pretrained("cvssp/audioldm-s-full-v2", torch_dtype=torch.float16)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

In [4]:
# Predefined prompts for Surprise Me mode
surprise_prompts = [
    "The sound of a gentle breeze rustling through leaves",
    "A cartoon explosion with a quirky boing",
    "A futuristic robot beep and glitch effect",
    "A mysterious door creaking open in a dark hallway",
    "An otherworldly chime with echoing synths"
]

In [5]:
def generate_audio_ldm(prompt, output_filename="output.wav", audio_length=5.0):
    print("Generating audio for prompt:", prompt)
    # Generate audio using AudioLDM with specified inference steps and duration
    result = pipe(prompt, num_inference_steps=50, audio_length_in_s=audio_length)
    audio = result.audios[0]
    # Try to access sample rate from the unet config; if missing, default to 24000 Hz
    sample_rate = pipe.unet.config.get("sample_rate", 24000)
    wavfile.write(output_filename, sample_rate, audio)
    print(f"Audio saved as {output_filename}\n")

In [None]:
def main():
    while True:
        print("Choose an option:")
        print("1. Generate audio by typing a description")
        print("2. See example prompts")
        print("3. Surprise me with a random prompt")
        print("4. Exit")
        choice = input("Enter your choice (1-4): ").strip()

        if choice == "1":
            desc = input("Enter your sound description: ").strip()
            filename = input("Enter filename (default: output.wav): ").strip() or "output.wav"
            length_input = input("Enter sound length in seconds (default: 5): ").strip()
            audio_length = float(length_input) if length_input else 5.0
            generate_audio_ldm(desc, filename, audio_length)
        elif choice == "2":
            print("\nExample prompts:")
            for prompt in surprise_prompts:
                print(" -", prompt)
            print("")
        elif choice == "3":
            random_prompt = random.choice(surprise_prompts)
            print("\nSurprise prompt:", random_prompt)
            length_input = input("Enter sound length in seconds for surprise prompt (default: 5): ").strip()
            audio_length = float(length_input) if length_input else 5.0
            generate_audio_ldm(random_prompt, "surprise.wav", audio_length)
        elif choice == "4":
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please try again.\n")

In [7]:
if __name__ == "__main__":
    main()

Choose an option:
1. Generate audio by typing a description
2. See example prompts
3. Surprise me with a random prompt
4. Exit
Enter your choice (1-4): 3

Surprise prompt: A cartoon explosion with a quirky boing
Enter sound length in seconds for surprise prompt (default: 5): 
Generating audio for prompt: A cartoon explosion with a quirky boing


  0%|          | 0/50 [00:00<?, ?it/s]

Audio saved as surprise.wav

Choose an option:
1. Generate audio by typing a description
2. See example prompts
3. Surprise me with a random prompt
4. Exit
Enter your choice (1-4): 4
Exiting...
