In [2]:
import pandas as pd
import os
import glob
import numpy as np

In [7]:
# Define all mappings and major keys

# Mapping between notes and numerical representation
note_map = {
    'C0':1,
    'B#':1,
    'C#':2,
    'Db':2,
    'D0':3,
    'D#':4,
    'Eb':4,
    'E0':5,
    'Fb':5,
    'F0':6,
    'E#':6,
    'F#':7,
    'Gb':7,
    'G0':8,
    'G#':9,
    'Ab':9,
    'A0':10,
    'A#':11,
    'Bb':11,
    'B0':12,
    'Cb':12,
    '[]':'[]'
}

# Mapping between major key and notes
C_sharp = [2,4,6,7,9,11,1]
F_sharp = [7,9,11,12,2,4,6]
B = [12,2,4,5,7,9,11]
E = [5,7,9,10,12,2,4]
A = [10,12,2,3,5,7,9]
D = [3,5,7,8,10,12,2]
G = [8,10,12,1,3,5,7]
C = [1,3,5,6,8,10,12]
F = [6,8,10,11,1,3,5]
B_flat = [11,1,3,4,6,8,10]
E_flat = [4,6,8,9,11,1,3]
A_flat = [9,11,1,2,4,6,8]
D_flat = [2,4,6,7,9,11,1]
G_flat = [7,9,11,12,2,4,6]

# Key_fifths mapping to major key
kf_map = {
    -6:G_flat,
    -5:D_flat,
    -4:A_flat,
    -3:E_flat,
    -2:B_flat,
    -1:F,
    0:C,
    1:G,
    2:D,
    3:A,
    4:E,
    5:B,
    6:F_sharp,
    7:C_sharp
}



In [8]:
# Define mappings for chords

chord_map = {
    'major':'major',
    'dominant':'major',
    'minor':'minor',
    'minor-seventh':'minor',
    'major-seventh':'major',
    'maj':'major',
    'major-sixth':'major',
    'dominant-ninth':'major',
    'min':'minor',
    'minor-sixth':'minor',
    '7':'major',
    'suspended-fourth':'major',
    'diminished':'minor',
    'half-diminished':'minor',
    'minor-ninth':'minor',
    'diminished-seventh':'minor',
    'augmented-seventh':'major',
    'min7':'minor',
    'major-ninth':'major',
    'maj7':'major',
    'dominant-seventh':'major',
    'augmented': 'major',
    'dominant-13th':'major',
    'power':'major',
    'suspended-second':'major',
    'dominant-11th':'major',
    'dim':'minor',
    'minor-11th':'minor',
    'minor-major':'minor',
    'major-minor':'minor',
    'maj9':'major',
    '9':'major',
    'pedal':'major',
    'maj69':'major',
    'aug':'major',
    'min9':'minor',
    'augmented-ninth':'major',
    'minor-13th':'minor',
    '6':'major',
    'm7b5':'minor',
    'minMaj7':'minor',
    'sus47':'major',
    'dim7':'minor',
    ' dim7':'minor'
}




In [9]:
# Define mapping for time signature

time_map = {
    '4/4':1,
    '3/4':4/3,
    '2/2':1,
    '6/8':8/6,
    '2/4':2,
    '12/8':8/12,
    '6/4':4/6,
    '9/8':8/9,
    '3/8':8/3,
    '5/4':4/5,
    '1/2':2,
    '4/8':2
}

In [26]:
for filename in glob.glob("*.csv"):
    data = pd.read_csv(filename)
    
    data = data.loc[~data['note_root'].isin(['rest','F2','B-2','C2','D-2','A2'])]
    #data = data.loc[~data['chord_root'].isin(['[]'])]
    #data = data.loc[~data['chord_type'].isin(['[]'])]
    data['chord_root'] = data['chord_root'].fillna('[]')
    data['chord_type'] = data['chord_type'].fillna('[]')
    data = data.dropna()  # Remove NULL values
    data.drop('note_octave', inplace=True, axis=1)  # Remove octave information

    li2 = data.to_numpy()

    # Shift rootnote and rootchord to C major key, and convert chord type to either major or minor
    n = len(li2)
    shifted_li = li2.copy()

    for i in range(n):
        row = li2[i]
        rootnote = row[6]
        notenum = note_map[rootnote]
        kf = row[2]
        major = kf_map[kf]

        rootchord = row[4]
        chordnum = note_map[rootchord]
        chordtype = row[5] # Chord type, eg. major, diminished

        time = row[0]
        normalised_time = time_map[time]
        note_duration = row[7]

        # Find the index of the number in major closest to notenum, but this isnt necessary
        #index = min(range(7), key = lambda j:abs(major[j]-notenum))

        # Find the difference between notenum and the major key num, and add it to the C major key num to get the shifted note.
        # This works since the intervals between each note in a major key is the same across all major keys
        difference = notenum-major[0]
        shifted_note = C[0] + difference

        if shifted_note <=0:
            shifted_note = shifted_note + 12
        elif shifted_note>12:
            shifted_note = shifted_note - 12

        shifted_li[i,6] = shifted_note

        
        # Check if the chord exists, i.e. 'rest chord'
        if chordnum != '[]':
            difference = chordnum-major[0]
            shifted_chord = C[0] + difference

            if shifted_chord <=0:
                shifted_chord = shifted_chord + 12
            elif shifted_chord>12:
                shifted_chord = shifted_chord - 12
        else:
            shifted_chord = '[]'

        # Check if there is a chord type
        if chordtype != '[]':
            shifted_li[i,5] = chord_map[chordtype]
        else:
            shifted_li[i,5] = '[]'
            
        shifted_li[i,4] = shifted_chord
        shifted_li[i,7] = normalised_time*note_duration
        
        
    new_data = []
    start = 1
    measure = 'unknown'
    new_row = np.zeros((1,37))
    chord_saved = 0
    for i in range(n):
        row = shifted_li[i,:]
        rootnote = row[6]
        rootchord = row[4]
        chordtype = row[5]
        normalised_note_duration = row[7]

        if measure == row[1]:
            new_row[0,rootnote-1] +=normalised_note_duration

        else:  # New measure

            # First measure of the song
            if start == 1:
                #new_data = np.zeros((1,37))
                new_row[0,rootnote-1] += normalised_note_duration
                measure = row[1]
                chord_saved = chordtype

                if chordtype == 'major':
                    new_row[0,rootchord+11] = 1
                elif chordtype == '[]':
                    new_row[0,-1] = 1
                else:
                    new_row[0,rootchord+23] = 1
                start = 0

            # Transition from one measure to the next
            else:
                # Add the row for the previous measure to new_data
                if len(new_data) == 0:
                    new_data = new_row
                else:
                    new_data = np.concatenate((new_data, new_row), axis=0)

                # Update the measure to current measure, reset new_row
                measure = row[1]
                new_row = np.zeros((1,37))


                new_row[0,rootnote-1] += normalised_note_duration

                if chordtype == 'major':
                    new_row[0,rootchord+11] = 1
                elif chordtype == '[]':
                    new_row[0,-1] = 1
                else:
                    new_row[0,rootchord+23] = 1





    df = pd.DataFrame(new_data, index=None, columns=["C", "C#",'D','D#','E','F','F#','G','G#','A','A#','B',"C major", "C# major",'D minor','D# major','E major','F major','F# major','G major','G# major','A major','A# major','B major',"C minor", "C# minor",'D minor','D# minor','E minor','F minor','F# minor','G minor','G# minor','A minor','A# minor','B minor','No chords'])
    
    df.to_csv ("~/Documents/Uni stuff/Engineering Science/3YP/dataset/LSTM/Corrected data for LSTM/processed_test v2/"+filename, index = False, header=True)
    
    

In [10]:
data = pd.read_csv("~/Documents/Uni stuff/Engineering Science/3YP/dataset/csv_train/Útkarelek, elmegyek.csv")

In [11]:
data = data.loc[~data['note_root'].isin(['rest','F2','B-2','C2','D-2','A2'])]
#data = data.loc[~data['chord_root'].isin(['[]'])]
#data = data.loc[~data['chord_type'].isin(['[]'])]
data['chord_root'] = data['chord_root'].fillna('[]')
data['chord_type'] = data['chord_type'].fillna('[]')
data = data.dropna()  # Remove NULL values
data.drop('note_octave', inplace=True, axis=1)  # Remove octave information

li2 = data.to_numpy()

# Shift rootnote and rootchord to C major key, and convert chord type to either major or minor
n = len(li2)
shifted_li = li2.copy()

for i in range(n):
    row = li2[i]
    rootnote = row[6]
    notenum = note_map[rootnote]
    kf = row[2]
    major = kf_map[kf]

    rootchord = row[4]
    chordnum = note_map[rootchord]
    chordtype = row[5] # Chord type, eg. major, diminished

    time = row[0]
    normalised_time = time_map[time]
    note_duration = row[7]

    # Find the index of the number in major closest to notenum, but this isnt necessary
    #index = min(range(7), key = lambda j:abs(major[j]-notenum))

    # Find the difference between notenum and the major key num, and add it to the C major key num to get the shifted note.
    # This works since the intervals between each note in a major key is the same across all major keys
    difference = notenum-major[0]
    shifted_note = C[0] + difference

    if shifted_note <=0:
        shifted_note = shifted_note + 12
    elif shifted_note>12:
        shifted_note = shifted_note - 12

    shifted_li[i,6] = shifted_note


    # Check if the chord exists, i.e. 'rest chord'
    if chordnum != '[]':
        difference = chordnum-major[0]
        shifted_chord = C[0] + difference

        if shifted_chord <=0:
            shifted_chord = shifted_chord + 12
        elif shifted_chord>12:
            shifted_chord = shifted_chord - 12
    else:
        shifted_chord = '[]'

    # Check if there is a chord type
    if chordtype != '[]':
        shifted_li[i,5] = chord_map[chordtype]
    else:
        shifted_li[i,5] = '[]'

    shifted_li[i,4] = shifted_chord
    shifted_li[i,7] = normalised_time*note_duration



In [13]:
new_data = []
start = 1
measure = 'unknown'
new_row = np.zeros((1,37))
chord_saved = 0
for i in range(n):
    row = shifted_li[i,:]
    rootnote = row[6]
    rootchord = row[4]
    chordtype = row[5]
    normalised_note_duration = row[7]

    if measure == row[1]:
        new_row[0,rootnote-1] +=normalised_note_duration

    else:  # New measure
        
        # First measure of the song
        if start == 1:
            #new_data = np.zeros((1,37))
            new_row[0,rootnote-1] += normalised_note_duration
            measure = row[1]
            chord_saved = chordtype
            
            if chordtype == 'major':
                new_row[0,rootchord+11] = 1
            elif chordtype == '[]':
                new_row[0,-1] = 1
            else:
                new_row[0,rootchord+23] = 1
            start = 0
        
        # Transition from one measure to the next
        else:
            # Add the row for the previous measure to new_data
            if len(new_data) == 0:
                new_data = new_row
            else:
                new_data = np.concatenate((new_data, new_row), axis=0)
            
            # Update the measure to current measure, reset new_row
            measure = row[1]
            new_row = np.zeros((1,37))
            
            
            new_row[0,rootnote-1] += normalised_note_duration

            if chordtype == 'major':
                new_row[0,rootchord+11] = 1
            elif chordtype == '[]':
                new_row[0,-1] = 1
            else:
                new_row[0,rootchord+23] = 1



#df = pd.DataFrame(new_data[1:,:], index=None, columns=["C", "C#",'D','D#','E','F','F#','G','G#','A','A#','B',"C major", "C# major",'D minor','D# major','E major','F major','F# major','G major','G# major','A major','A# major','B major',"C minor", "C# minor",'D minor','D# minor','E minor','F minor','F# minor','G minor','G# minor','A minor','A# minor','B minor','No chords'])

#df.to_csv ("~/Documents/Uni stuff/Engineering Science/3YP/dataset/processed_test_v2/"+filename, index = False, header=True)

In [15]:
new_data.shape

(15, 37)

In [18]:
df = pd.DataFrame(new_data, index=None, columns=["C", "C#",'D','D#','E','F','F#','G','G#','A','A#','B',"C major", "C# major",'D minor','D# major','E major','F major','F# major','G major','G# major','A major','A# major','B major',"C minor", "C# minor",'D minor','D# minor','E minor','F minor','F# minor','G minor','G# minor','A minor','A# minor','B minor','No chords'])



In [22]:
df.to_csv ("~/Documents/Uni stuff/Engineering Science/3YP/dataset/LSTM/Corrected data for LSTM/"+'test.csv', index = False, header=True)

In [23]:
data

Unnamed: 0,time,measure,key_fifths,key_mode,chord_root,chord_type,note_root,note_duration
1,2/4,1,0,major,[],[],G0,2.0
2,2/4,1,0,major,[],[],C0,2.0
3,2/4,2,0,major,C0,major,E0,2.0
4,2/4,2,0,major,C0,major,E0,2.0
5,2/4,2,0,major,C0,major,E0,2.0
6,2/4,2,0,major,C0,major,F0,2.0
7,2/4,3,0,major,C0,major,E0,4.0
8,2/4,3,0,major,C0,major,D0,2.0
9,2/4,3,0,major,C0,major,C0,2.0
10,2/4,4,0,major,F0,major,A0,2.0
