In [1]:
%load_ext autoreload
%autoreload 2

In [58]:
import sys
sys.path.append('../..')

from app.core.audio.AudioData import AudioData
from app.core.audio.AudioPlayer import AudioPlayer
from app.core.midi.MidiData import MidiData
from app.core.midi.MidiPlayer import MidiPlayer
from app.core.midi.MidiSynth import MidiSynth

from app.algorithms.pitch.PYin import PYin
from app.algorithms.align.OnsetDf import UserOnsetDf, MidiOnsetDf
from app.config import AppConfig
from app.core.recording.PitchDf import PitchDf, PitchConfig
from app.algorithms.align.DTW import DTW

In [56]:
# AUDIO_FILEPATH = '../../app/resources/audio/rhythm_sally.mp3'
AUDIO_FILEPATH = '../../app/resources/audio/ultra_sally.mp3'
MIN_VIOLIN_FREQ = 196.0
SAMPLE_RATE = 44100

audio_data = AudioData()
audio_data.load_data(AUDIO_FILEPATH)
pitches, most_likely_pitches = PYin.pyin(audio_data.data)

# Play the audio if you want
# audio_player = AudioPlayer()
# audio_player.load_audio_data(audio_data)
# audio_player.play()

Processing frame 8135/8135
Done!


In [66]:
pitch_config = PitchConfig( # Defines resolution of pitch bins
    bins_per_semitone=10, tuning=440.0, fmin=196, fmax=5000
)
pitch_df = PitchDf(audio_data, pitch_config, pitches)
user_onset_df = UserOnsetDf(audio_data, most_likely_pitches, pitch_df)

Detecting onsets... Done!
Detecting pitch changes with rolling median window_size=30 and threshold=0.6... Done!


In [60]:
# Create a synth with a soundfont
SOUNDFONT_FILEPATH = '../data/MuseScore_General.sf3'
midi_synth = MidiSynth(SOUNDFONT_FILEPATH)

# Load the midi file into a MidiData object
MIDI_FILEPATH = '../../app/resources/midi/fugue.mid'
midi_data = MidiData(MIDI_FILEPATH)

# Play midi if you want
# midi_player = MidiPlayer(midi_synth)
# midi_player.load_midi(midi_data)
# midi_player.play(start_time=0)

Loading MidiSynth...
Synth + soundfont loaded.


In [61]:
# Change midi tempo to be same length as the user audio snippet
midi_data.change_tempo(target_length=audio_data.get_length())
midi_data.save_to_file() # Saves it to fugue_slower.mid (for now)

midi_audio = AudioData()
# MIDI_FILEPATH = '../../app/resources/midi/fugue_slower.mid'
SOUNDFONT_FILEPATH = '../data/MuseScore_General.sf3'
midi_audio.load_midi_file('fugue_slower.mid', SOUNDFONT_FILEPATH)

Tempo change applied. New MIDI length is 23.658662131519268 seconds.


## Compute DTW
Maps each midi note to a slice of pitches.

In [62]:
from app.algorithms.align.CQT import CQT

user_cqt = CQT.extract_cqt(audio_data.data)
midi_cqt = CQT.extract_cqt(midi_audio.data)

alignment = DTW.align(user_cqt, midi_cqt)
DTW.parse_alignment(alignment, midi_data) # writes a parsed file to aligned.mid

DTW alignment computed.
Distance: 40.070355822172004
Mean alignment error: 63.45643939393939
Wrote alignment result to aligned.mid.


## Sanity check: Plot pitches

In [68]:
# View the pitch estimates in the app
import sys
sys.path.append('..')

from app.ui.plots.PitchPlot import RunPitchPlot
from app.core.midi.MidiData import MidiData
from PyQt6.QtWidgets import QApplication
from PyQt6.QtCore import QCoreApplication


if __name__ == '__main__':
    if not QCoreApplication.instance():
        app = QApplication(sys.argv)
    else:
        app = QCoreApplication.instance()

    aligned_midi_data = MidiData('aligned.mid')

    # pitchplot = RunPitchPlot(
    #     app, midi_data=midi_data, pitches=most_likely_pitches, onsets=user_onset2_df.onset_df, align_df=align_df2
    # )
    pitchplot = RunPitchPlot(
        app, midi_data=aligned_midi_data, pitches=most_likely_pitches, onsets=user_onset_df.onset_df
    )

Plotting pitches...
Done!


qt.pointer.dispatch: skipping QEventPoint(id=1 ts=0 pos=0,0 scn=567.81,655.108 gbl=567.81,655.108 Released ellipse=(1x1 ∡ 0) vel=0,0 press=-567.81,-655.108 last=-567.81,-655.108 Δ 567.81,655.108) : no target window
qt.pointer.dispatch: skipping QEventPoint(id=1 ts=0 pos=0,0 scn=995.935,542.35 gbl=995.935,542.35 Released ellipse=(1x1 ∡ 0) vel=0,0 press=-995.935,-542.35 last=-995.935,-542.35 Δ 995.935,542.35) : no target window
qt.pointer.dispatch: skipping QEventPoint(id=3 ts=0 pos=0,0 scn=844.372,307.608 gbl=844.372,307.608 Released ellipse=(1x1 ∡ 0) vel=0,0 press=-844.372,-307.608 last=-844.372,-307.608 Δ 844.372,307.608) : no target window
qt.pointer.dispatch: skipping QEventPoint(id=1 ts=0 pos=0,0 scn=689.958,503.852 gbl=689.958,503.852 Released ellipse=(1x1 ∡ 0) vel=0,0 press=-689.958,-503.852 last=-689.958,-503.852 Δ 689.958,503.852) : no target window
QGraphicsView::dragLeaveEvent: drag leave received before drag enter
qt.pointer.dispatch: skipping QEventPoint(id=3 ts=0 pos=0,0 s

## FEEDBACK
Goal: Create a dictionary of all notes which we detect the user played.

### Todo:
1. Create dictionary of all midi notes (keys) and associated user pitch list (values)

In [63]:
pitch_config = PitchConfig( # Defines resolution of pitch bins
    bins_per_semitone=10, tuning=440.0, fmin=196, fmax=5000
)
pitch_df = PitchDf(audio_data, pitch_config, pitches)
pitch_df.df2

Unnamed: 0,time,frequency,midi_num,probability,volume,audio_idx
0,0.000000,15977.474946,131.188705,0.01,0.000007,0
1,0.002902,16503.275149,131.749260,0.01,0.000008,128
2,0.005805,16094.118689,131.314635,0.01,0.000009,256
3,0.008707,15719.817021,130.907245,0.01,0.000010,384
4,0.011610,2751.812051,100.737678,0.01,0.000054,512
...,...,...,...,...,...,...
8130,23.597279,2737.614672,100.648128,0.01,0.002319,1040640
8131,23.600181,2720.391864,100.538869,0.01,0.002178,1040768
8132,23.603084,2732.415611,100.615218,0.01,0.002160,1040896
8133,23.605986,2710.469312,100.475607,0.01,0.001664,1041024


In [64]:
# Create dictionary of all midi notes and associated user pitches
aligned_midi_data = MidiData('aligned.mid')
aligned_midi_data.pitch_df

Unnamed: 0,note_idx,start,channel,pitch,velocity,duration,frequency
0,0,0.000000,0,62,100,1.625000,293.664768
1,1,1.625000,0,69,100,0.325000,440.000000
2,2,1.950000,0,73,100,0.418182,554.365262
3,3,2.368182,0,76,100,0.325000,659.255114
4,4,2.693182,0,77,100,0.325000,698.456463
...,...,...,...,...,...,...,...
56,56,21.338636,0,70,100,0.395455,466.163762
57,57,21.734091,0,67,100,0.347727,391.995436
58,58,22.081818,0,74,100,0.372727,587.329536
59,59,22.454545,0,65,100,0.393182,349.228231


In [65]:
import pandas as pd
import numpy as np

def assign_note_to_pitch(pitch_row, midi_data: MidiData):
    closest_idx = (np.abs(midi_data.pitch_df['start'] - pitch_row['time'])).idxmin()
    closest_note = midi_data.pitch_df.iloc[closest_idx]

    if pitch_row['time'] < closest_note['start']:   
        # Find the previous note, if it exists
        if closest_idx > 0:
            previous_note = midi_data.pitch_df.iloc[closest_idx - 1]
            return previous_note.to_dict()  # Assign to the previous note
    # Otherwise, assign to the closest note
    return closest_note.to_dict()

# iterate through user pitch df and associate it with a midi pitch
# aligned_midi_data.pitch_df = aligned_midi_data.pitch_df.reset_index(drop=True)
# aligned_midi_data.pitch_df['note_idx'] = aligned_midi_data.pitch_df.index
expanded_rows = []
for i, pitch_row in pitch_df.df2.iterrows():
    # print(pitch_row)
    note = assign_note_to_pitch(pitch_row, aligned_midi_data)
    if note:
        # combine midi note + pitch data
        expanded_row = {
            **note, 
            **pitch_row.to_dict()}
        expanded_rows.append(expanded_row)

combined_df = pd.DataFrame(expanded_rows)
combined_df

Unnamed: 0,note_idx,start,channel,pitch,velocity,duration,frequency,time,midi_num,probability,volume,audio_idx
0,0.0,0.000000,0.0,62.0,100.0,1.625000,15977.474946,0.000000,131.188705,0.01,0.000007,0.0
1,0.0,0.000000,0.0,62.0,100.0,1.625000,16503.275149,0.002902,131.749260,0.01,0.000008,128.0
2,0.0,0.000000,0.0,62.0,100.0,1.625000,16094.118689,0.005805,131.314635,0.01,0.000009,256.0
3,0.0,0.000000,0.0,62.0,100.0,1.625000,15719.817021,0.008707,130.907245,0.01,0.000010,384.0
4,0.0,0.000000,0.0,62.0,100.0,1.625000,2751.812051,0.011610,100.737678,0.01,0.000054,512.0
...,...,...,...,...,...,...,...,...,...,...,...,...
8130,60.0,22.847727,0.0,63.0,100.0,0.790909,2737.614672,23.597279,100.648128,0.01,0.002319,1040640.0
8131,60.0,22.847727,0.0,63.0,100.0,0.790909,2720.391864,23.600181,100.538869,0.01,0.002178,1040768.0
8132,60.0,22.847727,0.0,63.0,100.0,0.790909,2732.415611,23.603084,100.615218,0.01,0.002160,1040896.0
8133,60.0,22.847727,0.0,63.0,100.0,0.790909,2710.469312,23.605986,100.475607,0.01,0.001664,1041024.0


## On the combined dataframe...
We want to compute all unique notes within the dataframe
1. Compute rolling medians within each note_idx
2. Ignore "new notes" which are harmonic multiples

Unnamed: 0,index,start,channel,pitch,velocity,duration,frequency
0,0,0.000000,0,62,100,1.322727,293.664768
1,1,1.322727,0,69,100,0.302273,440.000000
2,2,1.625000,0,73,100,0.279545,554.365262
3,3,1.904545,0,76,100,0.302273,659.255114
4,4,2.206818,0,77,100,0.277273,698.456463
...,...,...,...,...,...,...,...
60,60,18.111364,0,63,100,0.325000,311.126984
61,61,18.436364,0,74,100,0.256818,587.329536
62,62,18.693182,0,79,100,0.254545,783.990872
63,63,18.947727,0,72,100,0.325000,523.251131
