# AudioLDM

## Install git lfs

In [None]:
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
!sudo apt-get install git-lfs

## Clone model from huggingface

In [None]:
!git clone https://huggingface.co/haoheliu/AudioLDM-S-Full
!rm AudioLDM-S-Full/audioldm-s-full AudioLDM-S-Full/audioldm-s-full.ckpt

## Install AudioLDM

In [None]:
!git clone https://github.com/jkrukowski/AudioLDM.git && git checkout -t origin/removed-model-download
!pip install -e AudioLDM/

In [None]:
import sys
sys.path.append('./AudioLDM/')

In [None]:
import numpy as np
import random
import uuid
import soundfile as sf
from audioldm import text_to_audio, style_transfer, build_model, latent_diffusion

audioldm = build_model(ckpt_path='./AudioLDM-S-Full/audioldm-s-full.ckpt')

def text2audio(text, duration, guidance_scale, random_seed, n_candidates, steps):
  waveform = text_to_audio(
    audioldm,
    text,
    random_seed,
    duration=duration,
    guidance_scale=guidance_scale,
    ddim_steps=steps,
    n_candidate_gen_per_text=int(n_candidates)
  )
  if(len(waveform) == 1):
    waveform = waveform[0]
  return waveform

def styleaudio(text, duration, audio_path, strength, guidance_scale, random_seed, steps):
  waveform = style_transfer(
    audioldm,
    text,
    audio_path,
    strength,
    random_seed,
    duration=duration,
    guidance_scale=guidance_scale,
    ddim_steps=steps,
  )
  if(len(waveform) == 1):
    waveform = waveform[0]
  return waveform

In [None]:
input = "blend of haunting soundscapes and minimalist electronic music. The resulting sound would be characterized by manipulated and processed samples, beats, and textures, combined with atmospheric and nostalgic elements. Repetitive loops, glitch elements, and sound decay would create a mesmerizing and hypnotic effect. The focus would be on sound design, textures, and timbres, while incorporating elements of emotion and memory. The resulting music would be experimental, dreamy, and immersive, offering a unique and captivating listening experience."
seed = random.randint(0, 10_000_000)
generated_audio = text2audio(input, 25, 10, seed, 10, 500)
sf.write(f'{uuid.uuid4()}_{seed}.wav', generated_audio.T, 16000, subtype='PCM_24')