In [1]:
# Settings used in GenerateAudio.py
audio_lengths = {
    '5' : 256,
    '7' : 384,
    '10' : 512,
    '20' : 1024,
    '30' : 1536,
    '40' : 2048,
    '50' : 2560,
    '60' : 3072
}

model_dict = {
    'S' : ['gc_s', 'small_model', "MusicGen Small"],
    'M' : ['gc_m', 'medium_model', "MusicGen Medium"],
    'L' : ['gc_l', 'large_model', "MusicGen Large"]
}

sample_rate = 32000
output_dir = 'dataset'
csv_output_filename = 'AcousticMusic.csv'
csv_columns = ["Path", "TrueCaption", "Generated", "Model", "Duration", "Sample Rate"]
generated = 1
prompts_filename = 'prompts.txt'
duration = '7'
audio_length = audio_lengths[duration]
model_type = 'M'
filename_ext, model_dir, model_name = model_dict[model_type]

In [4]:
# Imports
import os
import torch
import soundfile as sf
import numpy as np
import pandas as pd
from transformers import MusicgenForConditionalGeneration, AutoProcessor
import warnings
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Function to load the saved and processor from directory
def load_model(model_dir):
    try:
        print(f"Loading *{model_name}* model...")
        model = MusicgenForConditionalGeneration.from_pretrained(model_dir)
        processor = AutoProcessor.from_pretrained(model_dir)
        print("Model loaded succesfully.")
    except Exception as e:
        print(f"Error loading model:\n{e}")
    return model, processor


In [6]:
def get_prompts(filename):
    try:
        with open(filename, 'r') as file:
            prompts = file.readlines()
    except Exception as e:
        print(f"Error reading prompts file:\n{e}")
    print(f"{len(prompts)} prompts found.")
    return prompts

In [7]:
def save_audio(output_filename, waveform, csv_data, prompt, model_type):
    try:
        # Save audio file
        sf.write(output_filename, waveform, sample_rate)
        print(f"Music generated and saved as: '{output_filename}'.")

        # Path | TrueCaption | Generated | Model | Duration | Sample Rate
        csv_data = [[output_filename, prompt, generated, model_name, duration, sample_rate]]
    except Exception as e:
        print(f"Error saving audio:\n{e}")

    return csv_data

In [8]:
# Function to ensure any existing data isn't overwritten by checking
# if the file already exists, if so, generate a unique filename
def generate_unique_filename(base_filename):
    counter = 1
    full_path = os.path.join(output_dir, base_filename)
    while os.path.exists(full_path):
        base_filename = f"MG{filename_ext}_{counter}.wav"
        full_path = os.path.join(output_dir, base_filename)
        counter += 1
    return full_path

In [9]:
# Function to get and format the waveform
def get_waveform(audio_values, processor):
    # Convert the audio tokens to waveform
    waveform = processor.batch_decode(audio_values, output_audio=True)[0]

    # Ensure waveform is in the correct formt
    if isinstance(waveform, torch.Tensor):
        waveform = waveform.cpu().numpy()

    # Flatten the waveform if it's 2D
    if waveform.ndim == 2:
        waveform = waveform.flatten()

    # Check if the type is float32, convert if not
    if waveform.dtype != np.float32:
        waveform = waveform.astype(np.float32)

    return waveform

In [10]:
def generate_music(model, processor, prompts, device, audio_length, model_type):
    csv_data = []
    for idx, prompt in enumerate(prompts):
        prompt = prompt.strip() # Remove leading/trailing whitespaces & newlines

        if not prompt: # Skip any empty lines
            continue
        print(f"\nGenerating music for: '{prompt}'...")

        # Tokenise the input text prompt
        inputs = processor(text=[prompt], return_tensors="pt", padding=True).to(device)

        # Generate the audio tokens
        with torch.no_grad():
            audio_values = model.generate(**inputs, max_new_tokens=audio_length)

        waveform = get_waveform(audio_values=audio_values, processor=processor)

        # Generate a unique filename for the output
        output_filename = generate_unique_filename(base_filename=f"MG{filename_ext}_{idx + 1}.wav")

        csv_data = save_audio(
            output_filename=output_filename, waveform=waveform,
            csv_data=csv_data, prompt=prompt, model_type=model_type
        )
        write_to_csv(filename=csv_output_filename, csv_data=csv_data)

In [11]:
def write_to_csv(filename, csv_data):
    csv_filename = os.path.join(output_dir, filename)

    # If the csv file already exists, append the data, otherwise create a new one
    df = pd.DataFrame(csv_data, columns=csv_columns)
    if os.path.exists(csv_filename):
        df.to_csv(csv_filename, mode='a', header=False, index=False)
    else:
        df.to_csv(csv_filename, index=False)
    print(f"CSV file: '{csv_filename}' updated succesfully.")

In [12]:
def main():
    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    generate_music(
        model=model, processor=processor,
        prompts=prompts, device=device,
        audio_length=audio_length, model_type=model_type
    )

In [13]:
model_dir = "facebook/musicgen-medium"
model, processor = load_model(model_dir=model_dir)

Loading *MusicGen Medium* model...


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Config of the text_encoder: <class 'transformers.models.t5.modeling_t5.T5EncoderModel'> is overwritten by shared text_encoder config: T5Config {
  "_name_or_path": "t5-base",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 

Model loaded succesfully.


In [None]:
prompts = get_prompts("prompts.txt")

145 prompts found.


In [None]:
if __name__ == "__main__":
    # Suppress logs and warnings
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TensorFlow
    warnings.filterwarnings("ignore") # Pytorch
    logging.getLogger("transformers").setLevel(logging.ERROR) # Huggingface transformers
    logging.getLogger("tensorflow").setLevel(logging.ERROR) # ^
    logging.getLogger("torch").setLevel(logging.ERROR) # ^

    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)


    main()
