In [5]:
import librosa

def extract_melody_info_from_wav(wav_file):
    # Load the WAV file and extract the melody
    y, sr = librosa.load(wav_file)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)

    # Get the most prominent pitch for each frame
    melody = []
    for frame in range(pitches.shape[1]):
        index = magnitudes[:, frame].argmax()
        frequency = pitches[index, frame]
        melody.append(frequency)

    # Calculate note onset and offset times
    onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr)
    duration = librosa.get_duration(y=y, sr=sr)

    # Calculate duration of each note
    note_durations = [onset_times[i + 1] - onset_times[i] for i in range(len(onset_times) - 1)]
    note_durations.append(duration - onset_times[-1])

    # Prepare list of dictionaries containing frequency, start time, and duration
    melody_info = []
    min_length = min(len(melody), len(onset_times))
    for i in range(min_length):
        freq = melody[i]
        if freq != 0:  # Only include non-silent notes
            note_info = {
                "frequency": freq,
                "start_time": onset_times[i],
                "duration": note_durations[i]
            }
            melody_info.append(note_info)

    return melody_info

# Example usage:
input_wav = "melody.wav"
melody_info = extract_melody_info_from_wav(input_wav)
print(melody_info)


[{'frequency': 207.20628, 'start_time': 0.06965986394557823, 'duration': 1.253877551020408}, {'frequency': 311.67514, 'start_time': 1.3235374149659864, 'duration': 0.20897959183673476}, {'frequency': 311.66437, 'start_time': 1.5325170068027212, 'duration': 0.20897959183673454}, {'frequency': 311.6623, 'start_time': 1.7414965986394557, 'duration': 1.6950566893424035}, {'frequency': 311.66287, 'start_time': 3.436553287981859, 'duration': 0.8823582766439908}, {'frequency': 311.66455, 'start_time': 4.31891156462585, 'duration': 0.6269387755102045}, {'frequency': 311.6623, 'start_time': 4.9458503401360545, 'duration': 0.20897959183673454}, {'frequency': 311.6648, 'start_time': 5.154829931972789, 'duration': 0.8823582766439912}, {'frequency': 311.66202, 'start_time': 6.03718820861678, 'duration': 0.8244444444444445}]
