In [2]:
import torch
import pandas as pd
import soundfile as sf
import random
from transformers import AutoTokenizer, AutoModelForTextToWaveform


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load CSV data from a file
df = pd.read_csv('../NLP Project/processed_lyrics_sentiment_keywords.csv')

# Shuffle and select the first 10 records
df = df.sample(n=10, random_state=14)

# Extract the 'prompt' field
prompts = df['prompt'].tolist()

print(prompts)


["All Saints's song 'Under the Bridge' has a positive tone and includes themes like ever wanna feel like, ever wanna feel like, ever wanna feel like and belongs to genres such as dance pop, europop with a tempo of around 85.996 BPM. The lyrics often reflect topics such as love like know say wanna", "Bach Johann Sebastian's song 'Fugue in G' has a negative tone and includes themes like baroque wig two times every year zamp sends, bit slower piano pianissimo pianissimo staccato, staccato arpegiatto fumante calzone graziosso and belongs to genres such as baroque, classical with a tempo of around 127.469 BPM. The lyrics often reflect topics such as sex da la che de", "Battisti's song 'Il vento' has a neutral tone and belongs to genres such as canzone d'autore, classic italian pop with a tempo of around 122.53 BPM. The lyrics often reflect topics such as love like know say wanna", "Bjork's song 'Human Behaviour' has a positive tone and includes themes like logic human human human human43emb

In [4]:
df

Unnamed: 0,midi_file_path,artist_name,song_title,sentiment,keywords,spotify_metadata,topics,prompt
419,archive-new\All_Saints\Under_the_Bridge.mid,All Saints,Under the Bridge,0.250178,"['ever wanna feel like', 'ever wanna feel like...","{'energy': 0.564, 'acousticness': 0.0452, 'dan...",love like know say wanna,All Saints's song 'Under the Bridge' has a pos...
757,archive-new\Bach_Johann_Sebastian\Fugue_in_G.mid,Bach Johann Sebastian,Fugue in G,-0.022662,['baroque wig two times every year zamp sends'...,"{'energy': 0.137, 'acousticness': 0.99, 'dance...",sex da la che de,Bach Johann Sebastian's song 'Fugue in G' has ...
975,archive-new\Battisti\Il_vento.mid,Battisti,Il vento,0.0,[],"{'energy': 0.384, 'acousticness': 0.741, 'danc...",love like know say wanna,Battisti's song 'Il vento' has a neutral tone ...
1317,archive-new\Bjork\Human_Behaviour.mid,Bjork,Human Behaviour,0.006667,"['logic human human human human43embed', 'huma...","{'energy': 0.65, 'acousticness': 0.0396, 'danc...",get got call face back,Bjork's song 'Human Behaviour' has a positive ...
1635,archive-new\Brassens_Georges\Je_rejoindrai_ma_...,Brassens Georges,Je rejoindrai ma belle,0.0,[],"{'energy': 0.295, 'acousticness': 0.854, 'danc...",love like know say wanna,Brassens Georges's song 'Je rejoindrai ma bell...
871,archive-new\Barry_John\Main_Title_Looks_Like_a...,Barry John,Main Title Looks Like a Suicide,0.0,[],"{'energy': 0.0709, 'acousticness': 0.839, 'dan...",love like know say wanna,Barry John's song 'Main Title Looks Like a Sui...
770,archive-new\Bach_Johann_Sebastian\Musette_BWV_...,Bach Johann Sebastian,Musette BWV Anh.126,0.0,[],"{'energy': 0.262, 'acousticness': 0.949, 'danc...",love like know say wanna,Bach Johann Sebastian's song 'Musette BWV Anh....
259,archive-new\Alabama\High_Cotton.mid,Alabama,High Cotton,0.109355,"['kept us fed taught us', 'sunday mornings rol...","{'energy': 0.581, 'acousticness': 0.423, 'danc...",little one day fire every,Alabama's song 'High Cotton' has a positive to...
857,archive-new\Barbra_Streisand\What_Kind_of_Fool...,Barbra Streisand,What Kind of Fool,0.0,[],"{'energy': 0.245, 'acousticness': 0.39, 'dance...",love like know say wanna,Barbra Streisand's song 'What Kind of Fool' ha...
13,archive-new\2_Brothers_on_the_4th_Floor\Come_T...,2 Brothers on the 4th Floor,Come Take My Hand,0.0,[],"{'energy': 0.928, 'acousticness': 0.0068, 'dan...",love like know say wanna,2 Brothers on the 4th Floor's song 'Come Take ...


In [3]:
# Check if CUDA is available and set the device accordingly
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 4070'

In [4]:
torch.cuda.empty_cache()

In [5]:
# Initialize the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/musicgen-medium")

In [6]:
model = AutoModelForTextToWaveform.from_pretrained("facebook/musicgen-medium")



: 

In [None]:
# Move the model to the GPU if available
model.to(device)

In [None]:
# Process each prompt and generate WAV files
file_paths = []
for i, prompt in enumerate(prompts):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        waveform = model.generate(**inputs)

    # Move the waveform back to CPU and convert to numpy array
    waveform_np = waveform.squeeze().detach().cpu().numpy()

    # Save the waveform as a WAV file
    file_name = f"./medium_model/output_{i}.wav"
    sf.write(file_name, waveform_np, 44100)
    file_paths.append(file_name)


In [None]:
# Save the file paths to a CSV file
output_df = pd.DataFrame({'prompt': prompts, 'wav_file': file_paths})
output_df.to_csv('./medium_model/output_with_wav.csv', index=False)