In [2]:
import pandas as pd
import pretty_midi

In [2]:
queries_df = pd.read_pickle("./queries_clean_overlap_notes.pkl")
songs_df = pd.read_pickle("./songs_clean_overlap_notes.pkl")

In [28]:
song_sequence_encoding_df = songs_df[['Song ID','clean_overlap_vocals_notes_midi']].copy()

In [29]:
song_sequence_encoding_df.head()

Unnamed: 0,Song ID,clean_overlap_vocals_notes_midi
0,1,<pretty_midi.pretty_midi.PrettyMIDI object at ...
1,4,<pretty_midi.pretty_midi.PrettyMIDI object at ...
2,7,<pretty_midi.pretty_midi.PrettyMIDI object at ...
3,14,<pretty_midi.pretty_midi.PrettyMIDI object at ...
4,15,<pretty_midi.pretty_midi.PrettyMIDI object at ...


In [30]:
def midi_to_notes_list(midi: pretty_midi.PrettyMIDI):
    notes = []
    for inst in midi.instruments:
        if(not inst.is_drum):
            notes = notes+inst.notes
    return sorted(notes, key=lambda x: x.start)

In [31]:
song_sequence_encoding_df['notes']= song_sequence_encoding_df['clean_overlap_vocals_notes_midi'].apply(midi_to_notes_list)

In [32]:
song_sequence_encoding_df.head()

Unnamed: 0,Song ID,clean_overlap_vocals_notes_midi,notes
0,1,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=0.150930, pitch=61, ..."
1,4,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=0.535343, pitch=31, ..."
2,7,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=0.197370, pitch=40, ..."
3,14,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=0.278639, pitch=60, ..."
4,15,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=1.301601, pitch=72, ..."


In [35]:
queries_sequence_encoding_df = queries_df[['Query ID','Song ID','clean_overlap_notes_midi']].copy()

In [36]:
queries_sequence_encoding_df.head()

Unnamed: 0,Query ID,Song ID,clean_overlap_notes_midi
0,q1,1118,<pretty_midi.pretty_midi.PrettyMIDI object at ...
1,q2,1438,<pretty_midi.pretty_midi.PrettyMIDI object at ...
2,q3,1546,<pretty_midi.pretty_midi.PrettyMIDI object at ...
3,q4,1579,<pretty_midi.pretty_midi.PrettyMIDI object at ...
4,q5,1808,<pretty_midi.pretty_midi.PrettyMIDI object at ...


In [37]:
queries_sequence_encoding_df['notes'] = queries_sequence_encoding_df['clean_overlap_notes_midi'].apply(midi_to_notes_list)

In [38]:
queries_sequence_encoding_df.head()

Unnamed: 0,Query ID,Song ID,clean_overlap_notes_midi,notes
0,q1,1118,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=1.207438, pitch=60, ..."
1,q2,1438,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=1.172608, pitch=55, ..."
2,q3,1546,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=0.313469, pitch=60, ..."
3,q4,1579,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=0.290249, pitch=63, ..."
4,q5,1808,<pretty_midi.pretty_midi.PrettyMIDI object at ...,"[Note(start=0.000000, end=0.522449, pitch=56, ..."


In [48]:
def get_pitch_diff_from_notes(notes):
    pitch_diff_list=[]
    for index,note in enumerate(notes):
        if(index>0):
            pitch_diff_list.append(note.pitch - notes[index-1].pitch)
    return pitch_diff_list

In [49]:
queries_sequence_encoding_df['pitch_diff'] = queries_sequence_encoding_df["notes"].apply(get_pitch_diff_from_notes)

In [56]:
song_sequence_encoding_df['pitch_diff'] = song_sequence_encoding_df['notes'].apply(get_pitch_diff_from_notes)

In [60]:
def get_up_same_down_from_diff_pitch(diff_pitches):
    result=[]
    for diff in diff_pitches:
        if(diff>0):
            result.append("U")
        if(diff<0):
            result.append("D")
        if(diff==0):
            result.append("S")    
    return result

In [63]:
queries_sequence_encoding_df['UpSameDown'] = queries_sequence_encoding_df['pitch_diff'].apply(get_up_same_down_from_diff_pitch)
song_sequence_encoding_df['UpSameDown'] = song_sequence_encoding_df['pitch_diff'].apply(get_up_same_down_from_diff_pitch)

In [66]:
queries_sequence_encoding_df[["Query ID","Song ID","notes","pitch_diff","UpSameDown"]].to_pickle("./queries_sequence_df.pkl")

In [68]:
song_sequence_encoding_df[["Song ID","notes","pitch_diff","UpSameDown"]].to_pickle("./songs_sequence_df.pkl")

In [None]:
# TESTING

In [3]:
df = pd.read_pickle("./songs_sequence_df.pkl")

In [5]:
df['num_notes'] = df["notes"].apply(lambda x: len(x))

In [7]:
df.sort_values(by='num_notes').head(20)

Unnamed: 0,Song ID,notes,pitch_diff,UpSameDown,num_notes
221,1081,"[Note(start=0.000000, end=0.452789, pitch=63, ...","[-34, 0, 12, -12, 0, 19, -19, 20, 26, -26, 14,...","[D, S, U, D, S, U, D, U, U, D, U, D, U, D, S, ...",22
26,126,"[Note(start=0.000000, end=0.476009, pitch=36, ...","[25, 5, 0, -10, -27, 46, -38, -4, 4, -4, 0, 42...","[U, U, S, D, D, U, D, D, U, D, S, U, D, D, S, ...",24
327,1566,"[Note(start=0.000000, end=0.603719, pitch=64, ...","[3, -3, -1, 1, 0, -1, 1, 0, 0, 0, 0, 3, 0, 0, ...","[U, D, D, U, S, D, U, S, S, S, S, U, S, S, S, ...",27
250,1212,"[Note(start=0.000000, end=0.174150, pitch=41, ...","[4, 3, -2, -5, 9, -2, -7, 15, -8, -6, 4, -4, 6...","[U, U, D, D, U, D, D, U, D, D, U, D, U, U, D, ...",32
43,240,"[Note(start=0.000000, end=0.162540, pitch=59, ...","[-19, 19, -7, 0, 7, -19, 12, -12, 12, 0, 7, -1...","[D, U, D, S, U, D, U, D, U, S, U, D, U, S, D, ...",35
322,1536,"[Note(start=0.000000, end=0.150930, pitch=58, ...","[4, 0, 0, 0, 0, 0, -4, 0, 8, 1, -7, -2, 0, 4, ...","[U, S, S, S, S, S, D, S, U, U, D, D, S, U, S, ...",52
273,1313,"[Note(start=0.000000, end=0.359909, pitch=50, ...","[2, -12, 22, -3, -2, 5, -3, 0, -2, -2, -1, -2,...","[U, D, U, D, D, U, D, S, D, D, D, D, U, U, D, ...",56
352,1655,"[Note(start=0.000000, end=0.139320, pitch=64, ...","[-2, 2, 8, -8, 0, 0, 0, -2, 7, -7, 2, 0, 0, -1...","[D, U, U, D, S, S, S, D, U, D, U, S, S, D, D, ...",60
355,1691,"[Note(start=0.000000, end=4.878758, pitch=29, ...","[63, 0, -45, 0, 15, -13, -6, 6, 0, 28, -15, -1...","[U, S, D, S, U, D, D, U, S, U, D, D, U, U, D, ...",63
231,1123,"[Note(start=0.000000, end=0.139320, pitch=44, ...","[2, -2, 0, 0, 2, -1, -1, 1, 1, -2, 2, -1, 1, -...","[U, D, S, S, U, D, D, U, U, D, U, D, U, D, U, ...",67
