<a href="https://colab.research.google.com/github/aalokok/postcards-from-my-jungle/blob/master/soundANDphrase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Stable Audio
https://huggingface.co/stabilityai/stable-audio-open-1.0




In [None]:
# Install required packages
!pip install diffusers transformers accelerate scipy

import torch
from diffusers import AudioLDMPipeline
from IPython.display import Audio, display
import scipy.io.wavfile
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Initialize the model
model_id = "cvssp/audioldm-s-full-v2"
pipe = AudioLDMPipeline.from_pretrained(model_id, torch_dtype=torch.float16)

# Move to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)

class AudioGenerator:
    def __init__(self, pipe):
        self.pipe = pipe

    def generate_sound(self, prompt, negative_prompt, duration, output_filename):
        """
        Generate an animal sound using AudioLDM.

        Args:
            prompt (str): Description of the desired sound
            negative_prompt (str): What to avoid in the generation
            duration (int): Length of audio in seconds (rounded to nearest supported value)
            output_filename (str): Where to save the generated audio
        """
        # Generate audio
        audio = self.pipe(
            prompt,
            negative_prompt=negative_prompt,
            num_inference_steps=20,
            audio_length_in_s=duration
        ).audios[0]

        # Convert to proper format and save
        audio_norm = np.int16(audio * 32767)
        scipy.io.wavfile.write(output_filename, rate=16000, data=audio_norm)

        # Display in notebook
        display(Audio(audio_norm, rate=16000))
        return Path(output_filename)

# Create output directory
output_dir = Path("/content/generated_sounds")
output_dir.mkdir(exist_ok=True)

# Initialize generator
generator = AudioGenerator(pipe)

# Define our fictional animals and their sounds
animals = [
    {
        "name": "Crystal Chirper",
        "prompt": "high-pitched crystalline chirping sound, magical tinkling bells sequence, ethereal and pure tones evolving over time, harmonic overtones with rhythmic patterns, gentle sparkles and crystal resonances, fantasy creature long vocalization, melodic progression",
        "negative_prompt": "harsh noise, mechanical sounds, distortion, low frequency rumble, static, human voice, sudden changes, aggressive sounds",
        "duration": 10
    },
    {
         "name": "Bioluminescent Florasinger",
        "prompt": "organic humming and pulsing sounds, flowing plant-like resonances, gentle bioluminescent vibrations, living crystal harmonics, fluid water-like tones mixed with soft organic pulses, growing vine movements, natural rhythm like breathing flowers, mystical garden sounds",
        "negative_prompt": "bird calls, chirping, mechanical sounds, electronic tones, urban noise, distortion, synthetic effects, harsh frequencies, artificial reverb, processed sounds, whistling, tweets",
        "duration": 15
    },
    {
        "name": "Cloud Whale",
        "prompt": "ethereal whale-like song with complex melodic structure, extended wind chimes and atmospheric whooshes, heavenly chorus with dynamic changes, sky-high resonance developing over time, mystical creature calls with emotional journey, peaceful floating soundscape",
        "negative_prompt": "underwater bubbles, ocean sounds, traffic noise, industrial sounds, human speech, percussion, choppy editing, mechanical noises",
        "duration": 20
    },
    {
        "name": "Forest Sprite",
        "prompt": "playful melodic trills evolving through a forest atmosphere, wood percussion and sustained leaf rustle, fairy-like giggles with musical development, organic forest sounds with temporal progression, magical creature extended vocalizations, enchanted woodland journey",
        "negative_prompt": "urban noise, mechanical sounds, electronic beeps, artificial effects, human voices, heavy bass, harsh transitions, synthetic textures",
        "duration": 12
    }
]

# Generate each animal sound
for animal in animals:
    print(f"\nGenerating sound for {animal['name']}...")

    output_file = output_dir / f"{animal['name'].lower().replace(' ', '_')}.wav"

    try:
        path = generator.generate_sound(
            prompt=animal['prompt'],
            negative_prompt=animal['negative_prompt'],
            duration=animal['duration'],
            output_filename=str(output_file)
        )
        print(f"Successfully generated and saved to {path}")

    except Exception as e:
        print(f"Failed to generate sound for {animal['name']}: {str(e)}")
        continue

print("\nSound generation complete!")

# Create zip file of all generated sounds
!zip -r /content/animal_sounds.zip /content/generated_sounds/
from google.colab import files
files.download('/content/animal_sounds.zip')

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

model_index.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

text_encoder%2Fconfig.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

tokenizer%2Fspecial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer%2Ftokenizer_config.json:   0%|          | 0.00/424 [00:00<?, ?B/s]

tokenizer%2Ftokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

scheduler%2Fscheduler_config.json:   0%|          | 0.00/439 [00:00<?, ?B/s]

tokenizer%2Fmerges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer%2Fvocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/740M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/222M [00:00<?, ?B/s]

vocoder%2Fconfig.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

vae%2Fconfig.json:   0%|          | 0.00/534 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/221M [00:00<?, ?B/s]

unet%2Fconfig.json:   0%|          | 0.00/1.30k [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]


Generating sound for Crystal Chirper...


  0%|          | 0/20 [00:00<?, ?it/s]

Successfully generated and saved to /content/generated_sounds/crystal_chirper.wav

Generating sound for Bioluminescent Florasinger...


  0%|          | 0/20 [00:00<?, ?it/s]

Successfully generated and saved to /content/generated_sounds/bioluminescent_florasinger.wav

Generating sound for Cloud Whale...


  0%|          | 0/20 [00:00<?, ?it/s]

Successfully generated and saved to /content/generated_sounds/cloud_whale.wav

Generating sound for Forest Sprite...


  0%|          | 0/20 [00:00<?, ?it/s]

Successfully generated and saved to /content/generated_sounds/forest_sprite.wav

Sound generation complete!
  adding: content/generated_sounds/ (stored 0%)
  adding: content/generated_sounds/cloud_whale.wav (deflated 14%)
  adding: content/generated_sounds/forest_sprite.wav (deflated 32%)
  adding: content/generated_sounds/bioluminescent_florasinger.wav (deflated 13%)
  adding: content/generated_sounds/crystal_chirper.wav (deflated 13%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!pip install torchsde
import torch
import soundfile as sf
from diffusers import StableAudioPipeline
from IPython.display import Audio  # Import the Audio module for playback

pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch.float16)
pipe = pipe.to("cuda")

# define the prompts
prompt = "Bossa nova"
negative_prompt = "Low quality."

# set the seed for generator
generator = torch.Generator("cuda")

# run the generation
audio = pipe(
    prompt,
    negative_prompt=negative_prompt,
    num_inference_steps=200,
    audio_end_in_s=21.0,
    num_waveforms_per_prompt=3,
    generator=generator,
).audios

output = audio[0].T.float().cpu().numpy()
sf.write("output.wav", output, pipe.vae.sampling_rate)

# Play the audio in Colab
Audio("output.wav")

Collecting torchsde
  Downloading torchsde-0.2.6-py3-none-any.whl.metadata (5.3 kB)
Collecting trampoline>=0.1.2 (from torchsde)
  Downloading trampoline-0.1.2-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.6.0->torchsde)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.6.0->torchsde)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.6.0->torchsde)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.6.0->torchsde)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.6.0->torchsde)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-man

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

model_index.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

projection_model%2Fconfig.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/1.59M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.85G [00:00<?, ?B/s]

tokenizer%2Fspecial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

text_encoder%2Fconfig.json:   0%|          | 0.00/1.50k [00:00<?, ?B/s]

scheduler%2Fscheduler_config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

tokenizer%2Ftokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

tokenizer%2Ftokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/624M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/4.23G [00:00<?, ?B/s]

vae%2Fconfig.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

transformer%2Fconfig.json:   0%|          | 0.00/391 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

  WeightNorm.apply(module, name, dim)


  0%|          | 0/200 [00:00<?, ?it/s]



In [None]:
import openai
import json
import time

class PhraseGenerator:
    def __init__(self, api_key):
        self.client = openai.OpenAI(api_key=api_key)
        self.meanings = [
            "greeting - friendly welcome",
            "danger warning - apex predator spotted",
            "food location - abundant berries found",
            "territorial claim - marking boundaries",
            "mating call - seeking partner",
            "celebration - successful hunt",
            "group movement - migration time",
            "weather alert - storm approaching",
            "rest time - safe shelter found",
            "play invitation - social bonding",
            "submission - conflict resolution",
            "dominance - leadership assertion",
            "pain/distress - injury signal",
            "joy/excitement - new discovery",
            "affection - parent-child bonding",
            "hunting coordination - group strategy"
        ]

    def generate_phrase(self, meaning):
        prompt = f"""Create a single made-up word for an animal language that means "{meaning}".
        Word must be 2-4 syllables, use Latin characters plus ā, ē, ī, ō, ū.
        Return only the word itself."""

        try:
            response = self.client.chat.completions.create(
                model="gpt-4",
                messages=[
                    {"role": "system", "content": "You are a linguistic expert. Respond only with the constructed word."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.7,
                max_tokens=50
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            print(f"Error: {e}")
            return None

    def generate_all_phrases(self):
        phrases = []

        for meaning in self.meanings:
            phrase = self.generate_phrase(meaning)
            if phrase:
                phrases.append({
                    "phrase": phrase,
                    "meaning": meaning
                })
                time.sleep(0.5)

        return phrases

def main():
    api_key = input("Enter your OpenAI API key: ")
    generator = PhraseGenerator(api_key)
    phrases = generator.generate_all_phrases()

    with open('phrases.json', 'w', encoding='utf-8') as f:
        json.dump(phrases, f, ensure_ascii=False, indent=2)
    print("Phrases have been saved to phrases.json")

if __name__ == "__main__":
    main()

Enter your OpenAI API key: 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
Phrases have been saved to phrases.json
