<a href="https://colab.research.google.com/github/hj245668/DL/blob/main/1211_MusicGen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

🎵 Re-me Music Generator - Google Colab


Cell 1: Setup & Installation

In [None]:
# ============================================
# INSTALL DEPENDENCIES
# ============================================

print("📦 Installing packages (takes ~1 minute)...")
!pip install -q transformers==4.45.0 accelerate scipy

print("✅ Installation complete!")

📦 Installing packages (takes ~1 minute)...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m104.1 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Installation complete!


Cell 2: Import & Load Model

In [None]:
# ============================================
# LOAD MUSICGEN MODEL
# ============================================

import torch
from transformers import AutoProcessor, MusicgenForConditionalGeneration
import scipy.io.wavfile
from IPython.display import Audio, display

print("📦 Loading MusicGen model (takes 2-3 minutes)...")

processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

print(f"✅ Model loaded on {device}!")
if device == 'cuda':
    print(f"   GPU: {torch.cuda.get_device_name(0)}")

📦 Loading MusicGen model (takes 2-3 minutes)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/275 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/2.36G [00:00<?, ?B/s]

  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)


generation_config.json:   0%|          | 0.00/224 [00:00<?, ?B/s]

✅ Model loaded on cpu!


Cell 3: Project Configuration

In [None]:
# ============================================
# RE-ME CONFIGURATION
# ============================================

config = {
    'song': {
        'title': 're-me',
        'version': 'v1',
        'duration': 30,  # seconds
    },

    'style': {
        'genre': 'soul',
        'mood': 'melancholic, introspective',
        'tempo': 'medium-slow',
        'instruments': 'piano, soft strings, subtle percussion'
    },

    'lyrics_theme': 'remembrance, nostalgia, emotional reflection'
}

# Build prompt
prompt = (
    f"{config['style']['genre']} music, "
    f"{config['style']['mood']} mood, "
    f"{config['style']['tempo']} tempo, "
    f"featuring {config['style']['instruments']}"
)

print(f"🎵 Project: {config['song']['title']} {config['song']['version']}")
print(f"   Duration: {config['song']['duration']}s")
print(f"\n📝 Prompt:")
print(f"   {prompt}")

🎵 Project: re-me v1
   Duration: 30s

📝 Prompt:
   soul music, melancholic, introspective mood, medium-slow tempo, featuring piano, soft strings, subtle percussion


Cell 4: Generation Function

In [None]:
# ============================================
# MUSIC GENERATION FUNCTION
# ============================================

def generate_music(prompt, duration=30, filename="output", guidance_scale=3.0):
    """
    Generate music from text prompt

    Args:
        prompt: Text description of the music
        duration: Length in seconds
        filename: Output filename (without .wav)
        guidance_scale: How closely to follow prompt (1-5, higher = stricter)
    """

    print(f"\n🎵 Generating '{filename}.wav'...")
    print(f"   Duration: {duration}s")
    print(f"   Guidance: {guidance_scale}")

    # Prepare inputs
    inputs = processor(
        text=[prompt],
        padding=True,
        return_tensors="pt",
    ).to(device)

    # Calculate tokens (MusicGen: ~50 tokens per second)
    max_tokens = int(duration * 50)

    # Generate
    print("   🎼 Generating audio...")
    with torch.no_grad():
        audio = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            guidance_scale=guidance_scale,
            temperature=1.0,
            top_k=250
        )

    # Convert to numpy
    audio_np = audio[0, 0].cpu().numpy()
    sample_rate = model.config.audio_encoder.sampling_rate

    # Save
    output_path = f"{filename}.wav"
    scipy.io.wavfile.write(output_path, sample_rate, audio_np)

    actual_duration = len(audio_np) / sample_rate

    print(f"✅ Complete!")
    print(f"   File: {output_path}")
    print(f"   Sample rate: {sample_rate} Hz")
    print(f"   Actual duration: {actual_duration:.1f}s")

    return output_path, audio_np, sample_rate

print("✅ Generation function ready!")

✅ Generation function ready!


Cell 5: Generate Your Track

In [None]:
# ============================================
# GENERATE RE-ME V1
# ============================================

filename = f"{config['song']['title']}_{config['song']['version']}"

output_path, audio_data, sample_rate = generate_music(
    prompt=prompt,
    duration=config['song']['duration'],
    filename=filename,
    guidance_scale=3.0
)

print(f"\n🎉 Generation complete!")


🎵 Generating 're-me_v1.wav'...
   Duration: 30s
   Guidance: 3.0
   🎼 Generating audio...




KeyboardInterrupt: 

In [None]:
# ============================================
# PREVIEW YOUR TRACK
# ============================================

print("🎧 Listen to your track:")
display(Audio(output_path, rate=sample_rate))

Cell 7: Download File

In [None]:
# ============================================
# DOWNLOAD TO YOUR COMPUTER
# ============================================

from google.colab import files

print("📥 Downloading file...")
files.download(output_path)
print(f"✅ Downloaded: {output_path}")

Cell 8 (Optional): Generate Multiple Variations

In [None]:
# ============================================
# GENERATE VARIATIONS
# ============================================

variations = [
    {
        'name': 'reme_v1_bright',
        'prompt': 'soul music, uplifting and hopeful mood, medium tempo, piano and strings',
        'guidance': 3.5
    },
    {
        'name': 'reme_v1_dark',
        'prompt': 'soul music, deep melancholic mood, slow tempo, piano and cello',
        'guidance': 4.0
    },
    {
        'name': 'reme_v1_ambient',
        'prompt': 'ambient soul music, introspective and ethereal, slow tempo, soft piano and pads',
        'guidance': 2.5
    }
]

print("🎵 Generating variations...\n")

for var in variations:
    output_path, _, _ = generate_music(
        prompt=var['prompt'],
        duration=30,
        filename=var['name'],
        guidance_scale=var['guidance']
    )
    print(f"   ✅ {var['name']}.wav")
    print()

print("🎉 All variations complete!")

Cell 9 (Optional): Batch Download All Files

In [None]:
# ============================================
# DOWNLOAD ALL GENERATED FILES
# ============================================

import glob
from google.colab import files

wav_files = glob.glob("*.wav")

print(f"📥 Downloading {len(wav_files)} files...")
for file in wav_files:
    files.download(file)
    print(f"   ✅ {file}")

print("✅ All files downloaded!")

<------- GPU에서 돌려야겠다. cpu: run 30" dropping

# CPU version

지금 CPU라서:

AudioCraft 설치 절대 불가

singing 모델 절대 불가

medium/melody 모델 불가

대신:

MusicGen-small은 가능

Instrumental BGM 생성은 가능

이후 ElevenLabs에서 보컬 추가 가능

In [None]:
!git clone https://github.com/facebookresearch/audiocraft.git
%cd /content/audiocraft
!pip install -e . --no-deps


Cloning into 'audiocraft'...
remote: Enumerating objects: 2011, done.[K
remote: Counting objects: 100% (14/14), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 2011 (delta 10), reused 8 (delta 8), pack-reused 1997 (from 2)[K
Receiving objects: 100% (2011/2011), 24.72 MiB | 19.79 MiB/s, done.
Resolving deltas: 100% (1193/1193), done.
/content/audiocraft
Obtaining file:///content/audiocraft
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: audiocraft
  Running setup.py develop for audiocraft
Successfully installed audiocraft-1.4.0a2


In [None]:
!pip install transformers accelerate einops soundfile




In [None]:
from transformers import AutoProcessor, MusicgenForConditionalGeneration
import torch
import soundfile as sf

processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")


In [None]:
prompt = "a soulful emotional ballad with warm piano, strings, and cinematic atmosphere"
inputs = processor(text=[prompt], padding=True, return_tensors="pt")


In [None]:
prompt = (
    "a soulful emotional ballad with warm piano, lush strings, "
    "soft drums and an atmospheric cinematic rise. heartfelt and nostalgic."
)

inputs = processor(
    text=[prompt],
    padding=True,
    return_tensors="pt"
)


In [None]:
audio_values = model.generate(**inputs, max_new_tokens=1024)
sf.write("reme_cpu_output.wav", audio_values[0,0].cpu().numpy(), 32000)


In [None]:
from google.colab import files
files.download("reme_cpu_output.wav")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
prompt = (
    "A bright, heavenly and emotional ballad instrumental. "
    "Warm, shimmering grand piano playing gentle uplifting chords. "
    "Light, airy strings and soft high violins that feel like sunlight in the sky. "
    "Soft ethereal pads and subtle choir creating an angelic, floating atmosphere. "
    "A gentle deep bass for warmth, and very soft brushed drums that rise slowly. "
    "The music should feel hopeful, comforting, peaceful and radiant, "
    "like standing in soft morning light after a long night. "
    "Very cinematic, tender, and full of gentle joy and healing."
)

inputs = processor(
    text=[prompt],
    padding=True,
    return_tensors="pt"
)


In [None]:
with torch.no_grad():
    audio_values = model.generate(
        **inputs,
        max_new_tokens=1300,   # 대략 14~17초 정도
    )


In [None]:
output_path = "re-me_instrumental_heavenly_cpu.wav"
sf.write(output_path, audio_values[0,0].cpu().numpy(), 32000)
print("Saved:", output_path)


Saved: re-me_instrumental_heavenly_cpu.wav


In [None]:
from google.colab import files
files.download("re-me_instrumental_heavenly_cpu.wav")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>