In [82]:
import os
import subprocess
import librosa
import tensorflow as tf
import soundfile as sf
import numpy as np
from spectrogramGenerator import generate_mel_spectrogram

# Function to translate note names from '0-a' to 'A0' etc.
def translate_note_name(note):
    if '-' not in note:
        raise ValueError(f"Invalid note format: {note}")
    octave, note_name = note.split('-')
    translation = {'a': 'A', 'as': 'A#', 'b': 'B', 'c': 'C', 'cs': 'C#', 'd': 'D', 'ds': 'D#', 'e': 'E', 'f': 'F', 'fs': 'F#', 'g': 'G', 'gs': 'G#'}
    return translation[note_name] + octave

def translate_note_name_to_lilypond(note):
    note_translation = {'C': 'c', 'D': 'd', 'E': 'e', 'F': 'f', 'G': 'g', 'A': 'a', 'B': 'b'}
    lilypond_note = note_translation[note[0]]
    octave = int(note[1])
    if octave < 4:
        lilypond_note += "," * (4 - octave)
    elif octave > 4:
        lilypond_note += "'" * (octave - 4)
    return lilypond_note

def segment_audio(file_path, segments_folder):
    y, sr = librosa.load(file_path, sr=None)
    onsets = librosa.onset.onset_detect(y=y, sr=sr, units='samples')
    os.makedirs(segments_folder, exist_ok=True)
    segments = []
    for i in range(len(onsets)):
        start = onsets[i]
        end = onsets[i + 1] if i < len(onsets) - 1 else len(y)
        segment_file = os.path.join(segments_folder, f'segment_{i}.wav')
        sf.write(segment_file, y[start:end], sr)
        segments.append((segment_file, start, end))
    return segments

def extract_beats_and_timing(audio_file_path):
    y, sr = librosa.load(audio_file_path, sr=None)
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr, trim=False)
    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
    onsets = librosa.onset.onset_detect(y=y_percussive, sr=sr)
    onset_times = librosa.frames_to_time(onsets, sr=sr)
    return tempo, beat_times, onset_times

def find_silence_end(y, sr, onset_times):
    # Detect silent sections
    silent_sections = librosa.effects.split(y, top_db=60)  # top_db is the threshold for silence
    # Assuming 20 seconds of silence after the last note
    expected_silence_start = onset_times[-1] + 20
    # Check if any silent section starts after the expected silence start time
    for start, end in silent_sections:
        start_time = start / sr
        if start_time >= expected_silence_start:
            return start_time
    return librosa.get_duration(y=y, sr=sr)

def calculate_note_durations(onset_times, audio_length, silence_start_time):
    note_durations = np.diff(onset_times)
    # Estimate the duration of the last note before the silence
    last_note_duration = silence_start_time - onset_times[-1] - 20  # Subtracting the 20 seconds of silence
    note_durations = np.append(note_durations, last_note_duration)
    return note_durations

def classify_note_types(note_durations, tempo):
    quarter_note_duration = 60 / tempo
    half_note_duration = 2 * quarter_note_duration
    eighth_note_duration = quarter_note_duration / 2
    whole_note_duration = 4 * quarter_note_duration

    note_types = []
    for duration in note_durations:
        if np.isclose(duration, whole_note_duration, atol=0.2):
            note_types.append('1')
        elif np.isclose(duration, half_note_duration, atol=0.2):
            note_types.append('2')
        elif np.isclose(duration, quarter_note_duration, atol=0.2):
            note_types.append('4')
        elif np.isclose(duration, eighth_note_duration, atol=0.2):
            note_types.append('8')
        else:
            note_types.append('1')  # Default to no specific duration
    return note_types


def guess_time_signature(audio_file_path):
    y, sr = librosa.load(audio_file_path, sr=None)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)

    # Basic assumption: if the tempo is slow, it might be 3/4, otherwise 4/4
    # This is a naive approach and might not be accurate
    if tempo < 100:
        time_signature = '3/4'
    else:
        time_signature = '4/4'

    return tempo, time_signature

def note_to_lilypond(note):
    """Convert a note to LilyPond format."""
    note_map = {'C': 'c', 'C#': 'cis', 'D': 'd', 'D#': 'dis', 'E': 'e', 'F': 'f', 'F#': 'fis', 'G': 'g', 'G#': 'gis', 'A': 'a', 'A#': 'ais', 'B': 'b'}
    pitch, octave = note[:-1], int(note[-1])
    lilypond_note = note_map[pitch]

    # Adjust the octave in LilyPond format
    octave_diff = octave - 4  # LilyPond uses octave 4 ('c') as middle C ('c\'')
    if octave_diff > 0:
        lilypond_note += '\'' * octave_diff
    elif octave_diff < 0:
        lilypond_note += ',' * abs(octave_diff)

    return lilypond_note

def is_new_bar(measure_length, time_signature):
    # Parse the time signature string
    beats, beat_type = map(int, time_signature.split('/'))
    # Convert the measure length to a fraction of a whole note
    bar_length = beats / beat_type
    return measure_length >= bar_length

def generate_lilypond_file(tempo, time_signature, notes_with_types, ly_file_name):
    with open(ly_file_name, 'w') as file:
        file.write("\\version \"2.18.2\"\n")
        file.write("\\score {\n  \\new StaffGroup <<\n    \\new Staff {\n      \\clef treble\n")
        file.write(f"      \\time {time_signature}\n")
        file.write(f"      \\tempo 4 = {int(tempo)}\n")

        upper_staff = "      "
        lower_staff = "    \\new Staff {\n      \\clef bass\n      "
        measure_length = 0
        first_note = True

        for note, note_type in notes_with_types:
            lilypond_note = note_to_lilypond(note)
            measure_length += 1 / int(note_type)  # Update measure length

            if is_new_bar(measure_length, time_signature) and not first_note:
                upper_staff += "} \\bar \"|\" \\relative c' { "
                lower_staff += "} \\bar \"|\" \\relative c'' { "
                measure_length -= 1  # Adjust measure length for the new bar
            elif first_note:
                upper_staff += "\\relative c' { "
                lower_staff += "\\relative c'' { "
                first_note = False

            if note.endswith('4') or note.startswith(('d', 'e', 'f', 'g', 'a', 'b')):
                upper_staff += f"{lilypond_note}{note_type} "
                lower_staff += f"r{note_type} "
            else:
                upper_staff += f"r{note_type} "
                lower_staff += f"{lilypond_note}{note_type} "

        # Close any open relative blocks at the end
        upper_staff += "}"
        lower_staff += "}"

        file.write(upper_staff + "\n    }\n")
        file.write(lower_staff + "\n    }\n")
        file.write("  >>\n  \\layout {}\n  \\midi {}\n}\n")

def generate_lilypond_file_acdll(tempo, time_signature, notes_with_types, ly_file_name):
    with open(ly_file_name, 'w') as file:
        file.write("\\version \"2.18.2\"\n")
        file.write("\\score {\n  \\new StaffGroup <<\n    \\new Staff {\n      \\clef treble\n")
        file.write(f"      \\time {time_signature}\n")
        file.write(f"      \\tempo 4 = {int(tempo)}\n")
        file.write("      \\relative c' {\n")

        upper_staff = ""
        lower_staff = "\\new Staff {\n      \\clef bass\n"

        for note, note_type in notes_with_types:
            lilypond_note = note_to_lilypond(note)
            print(note, lilypond_note)

            if note.endswith('4') or note.startswith(('d', 'e', 'f', 'g', 'a', 'b')):
                upper_staff += f" {lilypond_note}{note_type}"
                lower_staff += f" \\relative c'' {{ r{note_type} }}"
            else:
                upper_staff += f" r{note_type}"
                lower_staff += f" \\relative c'' {{ {lilypond_note}{note_type} }}"

        file.write(upper_staff)
        file.write("\n      }\n    }\n")
        file.write(lower_staff)
        file.write("\n    }\n  >>\n  \\layout {}\n  \\midi {}\n}\n")

def generate_lilypond_file_1(tempo, time_signature, notes_with_types, ly_file_name):
    with open(ly_file_name, 'w') as file:
        file.write("\\version \"2.18.2\"\n")
        file.write("\\score {\n  \\new StaffGroup <<\n    \\new Staff {\n      \\clef treble\n")
        file.write(f"      \\time {time_signature}\n")
        file.write(f"      \\tempo 4 = {int(tempo)}\n")
        file.write("      \\relative c' {\n")

        upper_staff = ""
        lower_staff = "\\new Staff {\n      \\clef bass\n"

        for note, note_type in notes_with_types:
            lilypond_note = note_to_lilypond(note)
            print(note, lilypond_note)

            if note.endswith('4') or note.startswith(('d', 'e', 'f', 'g', 'a', 'b')):
                upper_staff += f" \\relative c' {{ {lilypond_note}{note_type} }}"
                lower_staff +=  f" r{note_type}"
            else:
                upper_staff += f" \\relative c' {{ r{note_type} }}"
                lower_staff += f" {lilypond_note}{note_type}"

        file.write(upper_staff)
        file.write("\n      }\n    }\n")
        file.write(lower_staff)
        file.write("\n    }\n  >>\n  \\layout {}\n  \\midi {}\n}\n")

              
def generate_pdf(ly_file_name, pdf_file_name):
    lilypond_path = "C:\\Program Files (x86)\\LilyPond\\bin\\lilypond.exe"  # Adjust this path as necessary
    command = [lilypond_path, "-o", pdf_file_name, ly_file_name]
    subprocess.run(command, check=True, shell=True)
note_mapping = {'0-a': 0, '0-as': 1, '0-b': 2, '1-a': 3, '1-as': 4, '1-b': 5, '1-c': 6, '1-cs': 7, '1-d': 8, '1-ds': 9, '1-e': 10, '1-f': 11, '1-fs': 12, '1-g': 13, '1-gs': 14, '2-a': 15, '2-as': 16, '2-b': 17, '2-c': 18, '2-cs': 19, '2-d': 20, '2-ds': 21, '2-e': 22, '2-f': 23, '2-fs': 24, '2-g': 25, '2-gs': 26, '3-a': 27, '3-as': 28, '3-b': 29, '3-c': 30, '3-cs': 31, '3-d': 32, '3-ds': 33, '3-e': 34, '3-f': 35, '3-fs': 36, '3-g': 37, '3-gs': 38, '4-a': 39, '4-as': 40, '4-b': 41, '4-c': 42, '4-cs': 43, '4-d': 44, '4-ds': 45, '4-e': 46, '4-f': 47, '4-fs': 48, '4-g': 49, '4-gs': 50, '5-a': 51, '5-as': 52, '5-b': 53, '5-c': 54, '5-cs': 55, '5-d': 56, '5-ds': 57, '5-e': 58, '5-f': 59, '5-fs': 60, '5-g': 61, '5-gs': 62, '6-a': 63, '6-as': 64, '6-b': 65, '6-c': 66, '6-cs': 67, '6-d': 68, '6-ds': 69, '6-e': 70, '6-f': 71, '6-fs': 72, '6-g': 73, '6-gs': 74, '7-a': 75, '7-as': 76, '7-b': 77, '7-c': 78, '7-cs': 79, '7-d': 80, '7-ds': 81, '7-e': 82, '7-f': 83, '7-fs': 84, '7-g': 85, '7-gs': 86, '8-c': 87}


def music_decoder(file_path, model_path):

    segments_folder = 'segments'
    spectrograms_folder = 'spectrograms'
    segments = segment_audio(file_path, segments_folder)
    model = tf.keras.models.load_model(model_path)
    predictions = []
    for segment_file, start, end in segments:
        spectrogram_file = os.path.join(spectrograms_folder, os.path.splitext(os.path.basename(segment_file))[0] + '.png')
        print(f"Generating spectrogram and predicting note for segment: {segment_file}")
        generate_mel_spectrogram(segment_file, spectrograms_folder)
        img = tf.keras.preprocessing.image.load_img(spectrogram_file, target_size=(308, 775))
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = tf.expand_dims(img_array, 0)

        prediction = model.predict(img_array)
        predictions.append(prediction)
        
        # Translate predictions to note names
    note_predictions = []
    for prediction in predictions:
        predicted_index = np.argmax(prediction)
        note_key = [note for note, index in note_mapping.items() if index == predicted_index][0]
        predicted_note = translate_note_name(note_key)
        note_predictions.append(predicted_note)

    y, sr = librosa.load(file_path, sr=None)  # Loading the audio file again to get its length
    audio_length = librosa.get_duration(y=y, sr=sr)
    tempo, beat_times, onset_times = extract_beats_and_timing(file_path)
    silence_start_time = find_silence_end(y, sr, onset_times)
    note_durations = calculate_note_durations(onset_times, audio_length, silence_start_time)
    note_types = classify_note_types(note_durations, tempo)

    # Assume you have a list of predicted notes called 'predicted_notes'
    predicted_notes_with_types = [((note), type) for note, type in zip(note_predictions, note_types)]
    print(predicted_notes_with_types)
    tem, time_signature = guess_time_signature(file_path)

    file_name_without_extension = os.path.splitext(os.path.basename(file_path))[0]

    ly_file_name = f'{file_name_without_extension}.ly'
    pdf_file_name = f'{file_name_without_extension}_music_score.pdf'

    if file_path == 'acdll.wav' or file == 'rowRow.wav':
        print("-----------")
        generate_lilypond_file_acdll(tempo, time_signature, predicted_notes_with_types, ly_file_name)
    else:
        generate_lilypond_file_1(tempo, time_signature, predicted_notes_with_types, ly_file_name)
    generate_pdf(ly_file_name, pdf_file_name)

if __name__ == '__main__':
    music_decoder('rowRow.wav', 'cnn_model.h5')


Generating spectrogram and predicting note for segment: segments\segment_0.wav
Generating spectrogram and predicting note for segment: segments\segment_1.wav
Generating spectrogram and predicting note for segment: segments\segment_2.wav
Generating spectrogram and predicting note for segment: segments\segment_3.wav
Generating spectrogram and predicting note for segment: segments\segment_4.wav
Generating spectrogram and predicting note for segment: segments\segment_5.wav
Generating spectrogram and predicting note for segment: segments\segment_6.wav
Generating spectrogram and predicting note for segment: segments\segment_7.wav
Generating spectrogram and predicting note for segment: segments\segment_8.wav
Generating spectrogram and predicting note for segment: segments\segment_9.wav
Generating spectrogram and predicting note for segment: segments\segment_10.wav
Generating spectrogram and predicting note for segment: segments\segment_11.wav
Generating spectrogram and predicting note for seg