**HMM Logic Sequence**

- Input: MIDI file; audio file
- Separate audio into tonal & transient responses
- Move through the score sequentially
- At each point in the score, take the block of samples
- Transition matrix: geometric distribution model of state durations
- Emission probability: tonal likelihood using GP LML
- Viterbi algorithm 'windowed' to compute most probable state sequence

In [None]:
pip install pretty_midi pydub

In [None]:
from google.colab import drive
drive.mount('/content/drive')
from pretty_midi import PrettyMIDI
import matplotlib.pyplot as plt


def load_and_filter_midi_pretty(file_path, note_range=(59, 73)):
    """
    Loads a MIDI file using pretty_midi and filters it to only include notes within the specified range.

    Args:
        file_path (str): Path to the MIDI file.
        note_range (tuple): A tuple specifying the (min_note, max_note) range.

    Returns:
        PrettyMIDI: A new PrettyMIDI object with the filtered notes.
    """
    try:
        # Load the MIDI file
        midi_data = PrettyMIDI(file_path)

        # Filter notes for each instrument
        for instrument in midi_data.instruments:
            filtered_notes = [
                note for note in instrument.notes
                if note.pitch >= note_range[0] and note.pitch <= note_range[1]
            ]
            instrument.notes = filtered_notes

        return midi_data

    except Exception as e:
        print(f"Error loading MIDI file: {e}")
        return None


def display_midi_and_notes_pretty(midi_data):
    """
    Displays the notes of a PrettyMIDI object.

    Args:
        midi_data (PrettyMIDI): The PrettyMIDI object to analyze.

    Returns:
        list: A list of notes as tuples (start, end, pitch).
    """
    try:
        notes = []
        for instrument in midi_data.instruments:
            for note in instrument.notes:
                notes.append((note.start, note.end, note.pitch))

        # Sort notes by start time
        notes = sorted(notes, key=lambda x: x[0])

        def midi_to_note_name(midi_number):
            """Converts a MIDI note number to a piano note name."""
            note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
            octave = (midi_number // 12) - 1
            note = note_names[midi_number % 12]
            return f"{note}{octave}"

        # Print and display note names
        note_names = [(start, end, midi_to_note_name(pitch)) for start, end, pitch in notes]
        print("\nParsed Notes (start, end, pitch):")
        for note in note_names:
            print(note)

        return notes

    except Exception as e:
        print(f"Error displaying MIDI notes: {e}")
        return []


def plot_midi_piano_roll_pretty(midi_data):
    """
    Plots a piano roll visualization of a PrettyMIDI object.

    Args:
        midi_data (PrettyMIDI): The PrettyMIDI object to visualize.
    """
    try:
        notes = []
        times = []

        for instrument in midi_data.instruments:
            for note in instrument.notes:
                notes.append(note.pitch)
                times.append(note.start)

        # Plot the piano roll
        plt.figure(figsize=(10, 6))
        plt.scatter(times, notes, marker='o', color='blue', alpha=0.7)
        plt.xlabel("Time (seconds)")
        plt.ylabel("MIDI Note Number")
        plt.title("MIDI Piano Roll Visualization (PrettyMIDI)")
        plt.yticks(range(min(notes), max(notes) + 1, 2))  # Display every 2 notes
        plt.grid(True, linestyle='--', alpha=0.6)
        plt.show()

    except Exception as e:
        print(f"Error plotting piano roll: {e}")


# Example usage
midi = load_and_filter_midi_pretty('/content/drive/MyDrive/cmajorscale.mid')
# display_midi_and_notes_pretty(midi)
plot_midi_piano_roll_pretty(midi)


In [None]:
import numpy as np
from scipy.stats import geom
import librosa
import pretty_midi
from scipy.ndimage import median_filter
from pydub import AudioSegment
import librosa
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
from IPython.display import Audio
from scipy.io import wavfile
from scipy.fft import fft, ifft
from scipy.signal.windows import hamming, hann
from scipy.linalg import cho_factor, cho_solve




audio = AudioSegment.from_file('/content/drive/MyDrive/cmajorscale.mp3')
sample_rate = audio.frame_rate
audio_data = np.array(audio.get_array_of_samples())

time_window_size = 2048
hop_size = int(time_window_size * 0.5)
hmm_hop_size = 5000
window_size = 19
alpha = 0.4
sigma_p2 = 1e1 # Transient noise

sigma_f2 = 2e-5  # Decay parameter for covariance
sigma_n2 = 3  # Noise variance
wq = [1.0]  # Weights for fundamentals
M = 9  # Number of harmonics

sample_rate = 44100

T = 0.5
v = 2.37




def parse_midi(midi_data):    #✅
    notes = []
    for instrument in midi_data.instruments:
        if not instrument.is_drum:
            for note in instrument.notes:
                notes.append((note.start, note.end, note.pitch))
    notes = sorted(notes, key=lambda x: x[0])
    print(notes)
    return notes


def compute_transition_matrices(E_Z_list, block_size=2048):
    """
    Compute a single left-to-right transition matrix for the entire score.
    Each state corresponds to a score event (note/chord) from the MIDI file.

    We first compute the average note duration (in samples) from E_Z_list
    and then convert it to the average number of blocks (of size 'block_size').
    For an average of d_avg blocks per note, a simple duration model is:

        self-transition probability:   p = 1 - 1/d_avg
        transition to next state:       1 - p

    The resulting matrix is of size (n_states x n_states) where n_states is
    the number of MIDI events.
    """
    n_states = len(E_Z_list)
    avg_duration_samples = np.mean(E_Z_list)
    d_avg = avg_duration_samples / block_size

    p = 1 - 1/d_avg if d_avg > 1 else 0.0

    T = np.zeros((n_states, n_states))
    for i in range(n_states):
        T[i, i] = p
        if i < n_states - 1:
            T[i, i+1] = 1 - p

    T[n_states-1, n_states-1] = 1
    return T



def extract_features():   #✅
    audio = AudioSegment.from_file('/content/drive/MyDrive/cmajorscale.mp3')
    sample_rate = audio.frame_rate
    samples = np.array(audio.get_array_of_samples())

    if audio.channels > 1:   # If the audio is stereo, reshape and convert to mono
        samples = samples.reshape((-1, audio.channels))
        audio_data = samples.mean(axis=1)
    else:
        audio_data = samples

    time_hamming_window = hann(time_window_size)
    randomized_phase = np.random.uniform(-np.pi, np.pi, time_window_size)
    processed_audio = np.zeros(len(audio_data))
    transient_probabilities = []

    time_hamming_window = hann(time_window_size)
    randomized_phase = np.random.uniform(-np.pi, np.pi, time_window_size)

    processed_audio = np.zeros(len(audio_data))

    for start in range(0, len(audio_data) - time_window_size, hop_size):
        segment = audio_data[start:start + time_window_size].astype(np.float64)
        segment *= time_hamming_window

        segment_ft = fft(segment)
        magnitude = np.abs(segment_ft)
        phase = np.angle(segment_ft)

        median_magnitude = alpha * median_filter(magnitude, size=(window_size,))
        filtered_magnitude = np.minimum(median_magnitude, magnitude)

        filtered_segment_ft = filtered_magnitude * np.exp(1j * phase)
        filtered_segment = ifft(filtered_segment_ft).real

        processed_audio[start:start + time_window_size] += filtered_segment * time_hamming_window

    processed_audio = np.int16(processed_audio / np.max(np.abs(processed_audio)) * 32767)

    tonal_audio = audio_data - processed_audio
    tonal_audio = np.int16(tonal_audio / np.max(np.abs(tonal_audio)) * 32767)

    return tonal_audio, processed_audio


def harmonic_weight(m, T, v):   #✅
    return 1 / (1 + T * m**v)


def covariance_function(tau, wq, midi_frequencies, T, v):
    cov = np.exp(-2 * np.pi**2 * sigma_f2 * tau**2)
    for fq in midi_frequencies:
        harmonic_sum = 0
        for m in range(1, M + 1):
            E_m = harmonic_weight(m, T, v)
            harmonic_sum += E_m * np.cos(2 * np.pi * m * fq * tau)
        cov += wq[0] * harmonic_sum
    return cov


def compute_covariance_matrix(midi_frequencies, block_size, sample_rate, T, v):
    t = np.linspace(0, block_size / sample_rate, block_size)
    tau = t[:, None] - t[None, :]

    cov = np.exp(-2 * np.pi**2 * sigma_f2 * tau**2)

    harmonic_weights = np.array([harmonic_weight(m, T, v) for m in range(1, M + 1)])
    harmonic_weights = harmonic_weights[:, None, None]

    for fq in midi_frequencies:
        harmonic_matrix = harmonic_weights * np.cos(2 * np.pi * np.arange(1, M + 1)[:, None, None] * fq * tau)
        cov += np.sum(harmonic_matrix, axis=0)

    return cov


def tonal_LML(y, K, sigma_n2):
    N = len(y)
    K_noise = K + sigma_n2 * np.eye(N)
    cho_decomp = cho_factor(K_noise, lower=True)
    alpha = cho_solve(cho_decomp, y)
    log_det = 2 * np.sum(np.log(np.diag(cho_decomp[0])))
    LML = (
        -0.5 * np.dot(y.T, alpha)
        - 0.5 * log_det
        - 0.5 * N * np.log(2 * np.pi)
    )
    return LML


def compute_emission_probabilities(tonal_audio, sample_rate, midi_notes,
                                   block_size=2048, hmm_hop_size=5000,
                                   T_val=T, v_val=v, sigma_n2_val=sigma_n2):
    """
    For each observation, this function computes an emission probability vector
    whose length equals the number of score events (notes/chords) in the MIDI file.
    The likelihood for each event is computed using its base frequency (derived
    from its MIDI pitch) via the covariance matrix and tonal log marginal likelihood.
    """
    n_states = len(midi_notes)

    base_frequencies = []
    for note in midi_notes:
        _, _, pitch = note
        base_frequency = 440.0 * 2**((pitch - 69) / 12.0)
        base_frequencies.append(base_frequency)

    n_obs = (len(tonal_audio) - block_size) // hmm_hop_size + 1
    emissions = []

    for obs_idx, start in enumerate(range(0, len(tonal_audio) - block_size, hmm_hop_size)):
        block = tonal_audio[start:start + block_size]
        emission_vector = np.zeros(n_states)

        for i in range(n_states):
            candidate_frequency = base_frequencies[i]
            K = compute_covariance_matrix([candidate_frequency], block_size, sample_rate, T_val, v_val)
            log_likelihood = tonal_LML(block, K, sigma_n2_val)
            emission_vector[i] = log_likelihood

        print(start)
        print(emission_vector)

        emissions.append(emission_vector)

    return np.array(emissions)




def viterbi(transition_matrix, emissions):
    """
    Given a transition matrix (of shape [n_states, n_states]) and an emission
    probability matrix (of shape [n_obs, n_states]), this function computes the
    most likely sequence of states using the full Viterbi algorithm.
    """
    n_obs, n_states = emissions.shape

    dp = np.full((n_obs, n_states), -np.inf)
    backpointer = np.zeros((n_obs, n_states), dtype=int)

    dp[0] = emissions[0]

    for t in range(1, n_obs):
        for j in range(n_states):
            max_prob = -np.inf
            best_state = 0
            for i in range(n_states):
                if transition_matrix[i, j] > 0:
                    prob = dp[t - 1, i] + np.log(transition_matrix[i, j])
                else:
                    prob = -np.inf
                if prob > max_prob:
                    max_prob = prob
                    best_state = i
            dp[t, j] = max_prob + emissions[t, j]
            backpointer[t, j] = best_state

    best_last_state = np.argmax(dp[-1])
    states = np.zeros(n_obs, dtype=int)
    states[-1] = best_last_state
    for t in range(n_obs - 1, 0, -1):
        states[t - 1] = backpointer[t, states[t]]

    return states.tolist()


    #USE FULL Viterbi
    #HMM Forward Algo?


def main(midi_file):
    midi_notes = parse_midi(midi_file)
    tonal, transient = extract_features()

    E_Z_list = [(note[1] - note[0]) * 44100 for note in midi_notes]

    transition_matrix = compute_transition_matrices(E_Z_list, block_size=2048)

    E = compute_emission_probabilities(tonal, sample_rate, midi_notes,
                                       block_size=2048, hmm_hop_size=5000,
                                       T_val=T, v_val=v, sigma_n2_val=sigma_n2)

    # Run the full Viterbi algorithm over the entire observation sequence.
    state_sequence = viterbi(transition_matrix, E)
    print("Most probable state sequence:", state_sequence)




# Run the HMM
midi_notes = parse_midi(midi)
main(midi)




In [None]:
# @title
import numpy as np
import matplotlib.pyplot as plt

def plot_midi_piano_roll_pretty_intervals(midi_data, sample_rate, vertical_line_pattern=None):
    """
    Plots a piano roll visualization of a PrettyMIDI object with vertical lines
    colored according to a specified pattern.

    Args:
        midi_data (PrettyMIDI): The PrettyMIDI object to visualize.
        sample_rate (int or float): The sample rate (samples per second).
                                    Vertical lines will be drawn every 5000 samples.
        vertical_line_pattern (list of tuples, optional): A list of tuples specifying
            (number_of_lines, color) for vertical lines.
            For example, [(4, 'red'), (2, 'blue'), (3, 'red')] means:
                - The first 4 lines are red,
                - The next 2 lines are blue,
                - The following 3 lines are red,
                and then the pattern repeats.
            Defaults to [(1, 'red')], which colors all vertical lines red.
    """
    # Use a default pattern if none is provided
    if vertical_line_pattern is None:
        vertical_line_pattern = [(1, 'red')]

    try:
        # Gather note pitches and start times from all instruments
        notes = []
        times = []
        for instrument in midi_data.instruments:
            for note in instrument.notes:
                notes.append(note.pitch)
                times.append(note.start)

        # Create the piano roll scatter plot
        plt.figure(figsize=(18, 8))
        plt.scatter(times, notes, marker='o', color='blue', alpha=0.7)
        plt.xlabel("Time (seconds)")
        plt.ylabel("MIDI Note Number")
        plt.title("MIDI Piano Roll Visualization (PrettyMIDI)")
        plt.yticks(range(min(notes), max(notes) + 1, 2))  # Display every 2 notes
        plt.grid(True, linestyle='--', alpha=0.6)

        # Calculate the time interval corresponding to 5000 samples.
        interval = 5000 / sample_rate

        # Determine the maximum time from the notes to cover the plot duration
        t_max = max(times) if times else 0
        # Compute all time positions for vertical lines
        positions = np.arange(0, t_max + interval, interval)

        # Prepare to cycle through the provided vertical line color pattern.
        pattern_idx = 0
        group_count, current_color = vertical_line_pattern[pattern_idx]
        count_in_group = 0

        # Draw each vertical line with the color from the pattern.
        for t in positions:
            plt.axvline(x=t, color=current_color, linestyle='--', alpha=0.5)
            count_in_group += 1
            if count_in_group >= group_count:
                # Move to the next group in the pattern (and cycle if needed)
                pattern_idx = (pattern_idx + 1) % len(vertical_line_pattern)
                group_count, current_color = vertical_line_pattern[pattern_idx]
                count_in_group = 0

        plt.show()

    except Exception as e:
        print(f"Error plotting piano roll: {e}")


pattern = [(6, 'red'), (4, 'blue'), (5, 'red'), (4, 'blue'), (5, 'red'), (4, 'blue'), (4, 'red'), (22, 'blue'), (5, 'red'), (4, 'blue'), (5, 'red'), (4, 'blue'), (4, 'red'), (4, 'blue')]
plot_midi_piano_roll_pretty_intervals(midi, sample_rate=44100, vertical_line_pattern=pattern)