In [1]:
import pandas as pd
import os
import glob
import numpy as np
import math

In [2]:
# Define all mappings and major keys

# Mapping between notes and numerical representation
note_map = {
    'C0':1,
    'B#':1,
    'C#':2,
    'Db':2,
    'D0':3,
    'D#':4,
    'Eb':4,
    'E0':5,
    'Fb':5,
    'F0':6,
    'E#':6,
    'F#':7,
    'Gb':7,
    'G0':8,
    'G#':9,
    'Ab':9,
    'A0':10,
    'A#':11,
    'Bb':11,
    'B0':12,
    'Cb':12,
    '[]':'[]'
}

# Mapping between major key and notes
C_sharp = [2,4,6,7,9,11,1]
F_sharp = [7,9,11,12,2,4,6]
B = [12,2,4,5,7,9,11]
E = [5,7,9,10,12,2,4]
A = [10,12,2,3,5,7,9]
D = [3,5,7,8,10,12,2]
G = [8,10,12,1,3,5,7]
C = [1,3,5,6,8,10,12]
F = [6,8,10,11,1,3,5]
B_flat = [11,1,3,4,6,8,10]
E_flat = [4,6,8,9,11,1,3]
A_flat = [9,11,1,2,4,6,8]
D_flat = [2,4,6,7,9,11,1]
G_flat = [7,9,11,12,2,4,6]

# Key_fifths mapping to major key
kf_map = {
    -6:G_flat,
    -5:D_flat,
    -4:A_flat,
    -3:E_flat,
    -2:B_flat,
    -1:F,
    0:C,
    1:G,
    2:D,
    3:A,
    4:E,
    5:B,
    6:F_sharp,
    7:C_sharp
}



In [3]:
# Define mappings for chords

chord_map = {
    'major':'major',
    'dominant':'major',
    'minor':'minor',
    'minor-seventh':'minor',
    'major-seventh':'major',
    'maj':'major',
    'major-sixth':'major',
    'dominant-ninth':'major',
    'min':'minor',
    'minor-sixth':'minor',
    '7':'major',
    'suspended-fourth':'major',
    'diminished':'minor',
    'half-diminished':'minor',
    'minor-ninth':'minor',
    'diminished-seventh':'minor',
    'augmented-seventh':'major',
    'min7':'minor',
    'major-ninth':'major',
    'maj7':'major',
    'dominant-seventh':'major',
    'augmented': 'major',
    'dominant-13th':'major',
    'power':'major',
    'suspended-second':'major',
    'dominant-11th':'major',
    'dim':'minor',
    'minor-11th':'minor',
    'minor-major':'minor',
    'major-minor':'minor',
    'maj9':'major',
    '9':'major',
    'pedal':'major',
    'maj69':'major',
    'aug':'major',
    'min9':'minor',
    'augmented-ninth':'major',
    'minor-13th':'minor',
    '6':'major',
    'm7b5':'minor',
    'minMaj7':'minor',
    'sus47':'major',
    'dim7':'minor',
    ' dim7':'minor'
}




In [4]:
# Define mapping for time signature

time_map = {
    '4/4':1,
    '3/4':4/3,
    '2/2':1,
    '6/8':8/6,
    '2/4':2,
    '12/8':8/12,
    '6/4':4/6,
    '9/8':8/9,
    '3/8':8/3,
    '5/4':4/5,
    '1/2':2,
    '4/8':2
}

In [5]:
# Mapping from a numerical representation to a unique note
num_to_note_map = {
    1:'C',
    2:'C#',
    3:'D',
    4:'D#',
    5:'E',
    6:'F',
    7:'F#',
    8:'G',
    9:'G#',
    10:'A',
    11:'A#',
    12:'B',
}

# Mapping from a numerical representation to a unique major/minor chord
num_to_chord_map = {
    0:'rest',
    1:'Cmaj',
    2:'C#maj',
    3:'Dmaj',
    4:'D#maj',
    5:'Emaj',
    6:'Fmaj',
    7:'F#maj',
    8:'Gmaj',
    9:'G#maj',
    10:'Amaj',
    11:'A#maj',
    12:'Bmaj',
    13:'Cmin',
    14:'C#min',
    15:'Dmin',
    16:'D#min',
    17:'Emin',
    18:'Fmin',
    19:'F#min',
    20:'Gmin',
    21:'G#min',
    22:'Amin',
    23:'A#min',
    24:'Bmin',
}


In [6]:
for filename in glob.glob("*.csv"):
    data = pd.read_csv(filename)
    
    # Cleaning up the data
    data = data.loc[~data['note_root'].isin(['rest','F2','B-2','C2','D-2','A2'])]
    #data = data.loc[~data['chord_root'].isin(['[]'])]
    #data = data.loc[~data['chord_type'].isin(['[]'])]
    data['chord_root'] = data['chord_root'].fillna('[]')
    data['chord_type'] = data['chord_type'].fillna('[]')
    data = data.dropna()  # Remove NULL values
    data.drop('note_octave', inplace=True, axis=1)  # Remove octave information
    data = data[data['note_duration'].astype(int) == data['note_duration']]   # Remove any non-integer note durations


    li2 = data.to_numpy()

# Shift rootnote and rootchord to C major key, and convert chord type to either major or minor
    n = len(li2)
    shifted_li = li2.copy()

    for i in range(n):
        row = li2[i]
        rootnote = row[6]
        notenum = note_map[rootnote]
        kf = row[2]
        major = kf_map[kf]

        rootchord = row[4]
        chordnum = note_map[rootchord]
        chordtype = row[5] # Chord type, eg. major, diminished

        time = row[0]

        # Find the difference between notenum and the major key num, and add it to the C major key num to get the shifted note.
        # This works since the intervals between each note in a major key is the same across all major keys
        difference = notenum-major[0]
        shifted_note = C[0] + difference

        if shifted_note <=0:
            shifted_note = shifted_note + 12
        elif shifted_note>12:
            shifted_note = shifted_note - 12

        shifted_li[i,6] = shifted_note


        # Check if the chord exists, i.e. 'rest chord'
        if chordnum != '[]':
            difference = chordnum-major[0]
            shifted_chord = C[0] + difference

            if shifted_chord <=0:
                shifted_chord = shifted_chord + 12
            elif shifted_chord>12:
                shifted_chord = shifted_chord - 12
        else:
            shifted_chord = '[]'

        # Check if there is a chord type
        if chordtype != '[]':
            shifted_li[i,5] = chord_map[chordtype]
        else:
            shifted_li[i,5] = '[]'

        shifted_li[i,4] = shifted_chord

        
#Convert to transformer data input format
    measure = 'unknown'
    note_duration_li = []   # initialise list of note durations for all single measures
    note_li = []            # initialise list of notes present in all measures
    chord_li = []
    chordtype_li = []
    row_chord = []          # initialise full chord row for all measures of a single song
    row_note = []           # initialise full note row for all measures of a single song
    row_chordtype=[]

    for i in range(n):
        row = shifted_li[i,:]
        rootnote = row[6]
        rootchord = row[4]
        chordtype = row[5]
        note_duration = row[7]

        #note_duration_li = []
        #note_li = []

        note_duration_li.append(note_duration)
        note_li.append(rootnote)
        chord_li.append(rootchord)
        chordtype_li.append(chordtype)


    # Find the largest common factor of all the note durations

    m = len(note_duration_li)
    lcf = int(note_duration_li[0])

    for j in range(1,m):
        lcf = math.gcd(lcf,int(note_duration_li[j]))

    normalised_note_duration = [int(item/lcf) for item in note_duration_li]


    # Put everything together into one row
    for k in range(m):
        new = np.ones((1,normalised_note_duration[k]))*note_li[k]
        if len(row_note) == 0:
            row_note = new
        else:
            row_note = np.concatenate((row_note,new),axis=1)


        # Construct row section for chord for a single measure
        new_chord = np.ones((1,normalised_note_duration[k]))            
        if chordtype_li[k] == 'major':
            new_chord *= chord_li[k]
        elif chordtype_li[k] == '[]':
            new_chord *= 0
        else:
            new_chord *= (chord_li[k] + 12)

        if len(row_chord) == 0:
            row_chord = new_chord
        else:
            row_chord = np.concatenate((row_chord,new_chord),axis=1)


    
    row_note1 = [num_to_note_map[int(item)] for item in row_note[0,:]]
    row_chord1 = [num_to_chord_map[int(item)] for item in row_chord[0,:]]

    df_note = pd.DataFrame(row_note1).T
    df_chord = pd.DataFrame(row_chord1).T
    
    filename1 = filename.replace('.csv','')
    
    df_note.to_csv ("~/Documents/Uni stuff/Engineering Science/3YP/dataset/Transformer/processed_train_transformer/"+filename1+'(note)'+'.csv', index = False, header=False)
    df_chord.to_csv ("~/Documents/Uni stuff/Engineering Science/3YP/dataset/Transformer/processed_train_transformer/"+filename1+'(chord)'+'.csv', index = False, header=False)
    
    

IndexError: list index out of range