In [None]:
pip install pyguitarpro

In [8]:
import guitarpro
import music21
import math
import os
import time
import numpy as np
import swifter
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
seperator = ','
columns = {
    #"artist"      : ""
    #"album"       : "",
    #"title"       : "",
    # effect
    "song"        : "",
    "type"        : "",
    "ghostNote"   : "",
    "hammer"      : "",
    "palmMute"    : "",
    "slides"      : "",
    # note
    "duration"    : "",
    "no_of_notes" : "",
    "chord"       : "",
    "root_note"   : "",
    "root_name"   : "",
    "root_octave" : "",
    "root_midi" : "",
    #"root_freq"   : "",
    "scnd_note"   : "",
    "scnd_freq"   : ""
}

class Instrument:
    tuning = []
    def __init__(self, name):
        self.name = name
        self.tuning = []
    
    def add_string(self, note):
        self.tuning.append(music21.note.Note(note))
    
    def get_tuning(self):
        return self.tuning
    
    def get_note(self, string, fret):
        return self.tuning[string - 1].transpose(fret)
        
class Block():
    def __init__(self, duration):
        self.duration = duration
        self.events = []
        
    def add_event(self, event):
        self.events.append(event)
        
    def get_chord_info(self):
        if len(self.events) > 1:
            c = music21.chord.Chord()
            for event in self.events:
                if type(event) == DeadEvent:
                    return ""
                c.add(event.name)
            return c.commonName
        
    def to_dict(self):
        # meta info froim root
        data = self.events[0].meta
        data["duration"]    = self.duration
        data["no_of_notes"] = len(self.events)
        data["root_name"]   = self.events[0].name
        data["root_octave"] = self.events[0].octave
        data["root_midi"]   = self.events[0].midi
        #data["root_freq"]   = self.events[0].frequency
        
        if len(self.events) > 1 :
            data["chord"]     = self.get_chord_info()
            data["scnd_note"] = self.events[1].name
            #data["scnd_freq"] = self.events[1].frequency
            
        return data
        
class RestEvent():
    def __init__(self):
        self.name = 'R'
        self.octave = ""
        self.frequency = ""
        self.midi = "" 
        self.meta = {}
        
class NoteEvent():
    def __init__(self, note, meta):
        self.name = note.name
        self.note = note.nameWithOctave
        self.octave = note.octave
        self.frequency = str(frequency(note))
        self.midi = str(midi(note))
        self.meta = meta
        
class DeadEvent():
    def __init__(self, meta):
        self.name = 'X'
        self.note = 'X'
        self.octave = ""
        self.frequency = ""
        self.midi = ""
        self.meta = meta
        
class EventFactory():
    def __init__(self, instrument):
        self.instrument = instrument
        
    def create(self, note):
        meta = {
            'type'      : note.type.name,
            'ghostNote' : int(note.effect.ghostNote),
            'hammer'    : int(note.effect.hammer),
            'palmMute'  : int(note.effect.palmMute),
            'slides'    : len(note.effect.slides)
        }
        
        if note.type.name == 'dead':
            return DeadEvent(meta)

        return NoteEvent(self.instrument.get_note(note.string, note.value), meta)
    
def frequency(note): 
    p1 = music21.pitch.Pitch(note.nameWithOctave)
    return p1.frequency

def midi(note): 
    p1 = music21.pitch.Pitch(note.nameWithOctave)
    return p1.midi   

def get_duration(beat):
    tuplet = beat.duration.tuplet
    tupletValue = tuplet.times / tuplet.enters
    duration = music21.duration.Duration(4 / beat.duration.value * tupletValue)
    if beat.duration.isDotted:
        duration.dots = 1
    return float(duration.quarterLength)

#https://www.midi.org/specifications/item/gm-level-1-sound-set - id for instrument
def is_bass_midi_instrument(instrument):
    if instrument in range(33, 40):
        return True
    return False

def get_bass_track(song):
    for track in song.tracks:
        if is_bass_midi_instrument(track.channel.instrument) or "bass" in track.name.lower():
            return track
    
def parse_song(file): 
    song = guitarpro.parse(file)
    events = []
    track = get_bass_track(song)
    if not track:
        print('bass track not found')
        return events
    
    bass = Instrument("Bass")
    eventFactory = EventFactory(bass)
    for string in track.strings:
        bass.add_string(str(string))

    
    for measure in track.measures:
        for voice in measure.voices:
            for beat in voice.beats:
                block = Block(get_duration(beat))
                if beat.status == guitarpro.BeatStatus.rest:
                    block.add_event(RestEvent())
                else:
                    for note in beat.notes[::-1]:
                        block.add_event(eventFactory.create(note))         
                
                if len(block.events):
                    result = block.to_dict()
                    for k,v in result.items():
                        result.update({k:str(v)})

                    allColumns = columns.copy()
                    allColumns.update(result)
                    
                    events.append(allColumns)
    return events
    

def process():
    output = open('output/raw_output.csv', 'w+')    
    #write header
    output.write(seperator.join(columns.keys()) + '\n')

    #parsing files
    datadir = 'data/'
    files = os.listdir(datadir)
    song = 0
    try:
        for gpfile in files:
            print(' -- ' + str(song) + ' processing: ' + gpfile)
            song += 1
            #if gpfile == "Test.gp4":
            result = parse_song(datadir + gpfile)
            if len(result) > 0:
                for row in result:
                    row['song'] = str(song)
                    output.write(seperator.join(row.values()) + '\n')
    except KeyError:
        print('parse error')
    #close file                
    output.close()

    
t1 = time.time()
process()
t2 = time.time()
print (t2 - t1)    

 -- 0 processing: Testament - The Burning Times.gp3
 -- 1 processing: Pantera - 5 Minutes Alone.gp3
 -- 2 processing: Opeth - In The Mist She Was Standing.gp4
 -- 3 processing: Kreator - Second Awakening.gp4
 -- 4 processing: KISS - Makin Love.gp3
 -- 5 processing: Tool - Forty Six And 2 (ver 2).gp3
 -- 6 processing: Testament - Burnt Offerings.gp3
 -- 7 processing: Pantera - Shattered.gp4
 -- 8 processing: KISS - God Gave Rock N Roll To You Ii.gp5
 -- 9 processing: Kreator - Against The Rest.gp5
 -- 10 processing: Pantera - Regular People.gp4
bass track not found
 -- 11 processing: Kreator - From Flood Into Fire (ver 2 by rafaelherrera).gp5
 -- 12 processing: Lamb of God - Omerta (ver 2 by lambofgod0127).gp5
bass track not found
 -- 13 processing: Death - Flesh And The Power It Holds (ver 4).gp5
 -- 14 processing: KISS - Parasite.gp3
 -- 15 processing: KISS - Sure Know Something (ver 2).gp3
 -- 16 processing: Pantera - Mouth Of War.gp3
 -- 17 processing: Lamb of God - 11Th Hour (ver 2

In [10]:
df

Unnamed: 0,song,type,ghostNote,hammer,palmMute,slides,duration,no_of_notes,chord,root_note,root_name,root_octave,root_midi,scnd_note,scnd_freq
0,1,normal,0.0,0.0,0.0,0.0,4.00,1,,,D,2.0,38.0,,
1,1,tie,0.0,0.0,0.0,0.0,4.00,1,,,D,2.0,38.0,,
2,1,normal,0.0,0.0,0.0,0.0,4.00,1,,,D,2.0,38.0,,
3,1,normal,0.0,0.0,0.0,0.0,2.00,1,,,E,3.0,52.0,,
4,1,normal,0.0,0.0,0.0,0.0,2.00,1,,,E-,3.0,51.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
582445,621,normal,0.0,0.0,0.0,0.0,0.25,1,,,E,2.0,40.0,,
582446,621,tie,0.0,0.0,0.0,0.0,0.50,1,,,E,2.0,40.0,,
582447,621,normal,0.0,0.0,0.0,0.0,0.50,1,,,F,2.0,41.0,,
582448,621,tie,0.0,0.0,0.0,0.0,1.00,1,,,E,2.0,40.0,,


In [13]:
df = pd.read_csv('output/raw_output.csv')

# strip rests on beginning and end
def strip_rests(df, column_name="root_name"):
    first_note = df[column_name].notna().idxmin()
    last_note  = df[column_name].notna()[::-1].idxmax()
    
    return df[first_note:last_note]

def calc_octave_range(song):
    min = song['root_octave'].min()
    max = song['root_octave'].max()
    
    return  min, max, max - min 

def calc_note_range(song):
    min_midi = song['root_midi'].min()
    max_midi = song['root_midi'].max()
    min_note = music21.note.Note(min_midi).nameWithOctave
    max_note = music21.note.Note(max_midi).nameWithOctave
    
    return  min_midi, max_midi, min_note, max_note

processed = pd.DataFrame()
songs_count = len(df['song'].unique())
for song_idx in range(1, songs_count):
    song = df[df['song'] == song_idx]
    if len(song):
        song.reset_index(inplace=True)
        song = strip_rests(song)
        min_octave, max_octave, octave_range = calc_octave_range(song)
        transpose_value = min_octave - 2
        if transpose_value > 0:
            #print ('note range: ', calc_note_range(song))
            song['octave'] = (song['root_octave'] - transpose_value)
            print('-- transposed song: ' + str(song_idx) + ' octaves: ' + str(min_octave) + '-' +str(max_octave))

    processed = processed.append(song, ignore_index=True,sort=True)
    
df = processed

# - merging tied notes
import numpy as np
duration_sum = 0
duration_adj = []
for index, row in df[::-1].iterrows():
    if row['type'] == 'tie':
        duration_sum = duration_sum + row['duration']
        duration_adj.append(row['duration'])
    elif row['type'] == 'normal':
        duration_adj.append(row['duration'] + duration_sum)
        duration_sum = 0
    else:
        duration_adj.append(row['duration'])
        
durations = duration_adj[::-1]
df['duration'] = np.array(durations)

# droping rows
df = df.drop(df[df['type'] == 'tie'].index)

# droping columns
df.drop(columns=['type', 'no_of_notes', 'chord', 'scnd_note', 'scnd_freq'], inplace=True)

#normalize octave & duration
df['duration'] = df['duration'].clip(upper=2.0) #max 2 x 1/4


#replacing nan with 0
df.fillna(0.0, inplace=True)

df.to_csv('output/preprocessed_output.csv')
df

-- transposed song: 62 octaves: 3.0-4.0
-- transposed song: 129 octaves: 3.0-6.0
-- transposed song: 177 octaves: 3.0-5.0
-- transposed song: 261 octaves: 3.0-4.0
-- transposed song: 277 octaves: 3.0-4.0
-- transposed song: 341 octaves: 3.0-4.0
-- transposed song: 464 octaves: 3.0-4.0
-- transposed song: 484 octaves: 3.0-3.0


Unnamed: 0,duration,ghostNote,hammer,index,octave,palmMute,root_midi,root_name,root_note,root_octave,slides,song
0,2.0,0.0,0.0,0.0,0.0,0.0,38.0,D,0.0,2.0,0.0,1
2,2.0,0.0,0.0,2.0,0.0,0.0,38.0,D,0.0,2.0,0.0,1
3,2.0,0.0,0.0,3.0,0.0,0.0,52.0,E,0.0,3.0,0.0,1
4,2.0,0.0,0.0,4.0,0.0,0.0,51.0,E-,0.0,3.0,0.0,1
5,2.0,0.0,0.0,5.0,0.0,0.0,38.0,D,0.0,2.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
495319,0.5,0.0,0.0,495763.0,0.0,0.0,41.0,F,0.0,2.0,0.0,524
495320,0.5,0.0,0.0,495764.0,0.0,0.0,43.0,G,0.0,2.0,0.0,524
495321,0.5,0.0,0.0,495765.0,0.0,0.0,43.0,G,0.0,2.0,0.0,524
495322,0.5,0.0,0.0,495766.0,0.0,0.0,40.0,E,0.0,2.0,0.0,524


In [14]:
#normalize duration
def get_closest_value(value, available_values):
    pos = (np.abs(available_values-value)).argmin()
    return available_values[pos]

def normalize_duration(df, threshold=100):
    x = df['duration'].value_counts()
    values_to_process = x[x.lt(threshold)].keys().to_numpy()
    available_values =  x[x.ge(threshold)].keys().to_numpy()
    vals = {}
    for value in values_to_process:
        vals[value] = get_closest_value(value, available_values)
        
    return df.replace(vals)
        
##print(df.size())
df = normalize_duration(df)
df.to_csv('output/preprocessed_output.csv')



In [33]:
notes = pd.read_csv('output/preprocessed_output.csv')

#create note name
notes['root_octave'] = notes['root_octave'].astype(int).astype(str).replace('0', '')
notes['note_name'] = (notes['root_name'] + notes['root_octave'])

# create event_name
notes['event'] = (notes['note_name'] + '_'+ notes['duration'].astype(str))

# hot one encoding
def to_list(textdata):
    return "".join(textdata.lower().split()).split(',')

def dummies(series):
    data = pd.get_dummies(series.apply(to_list).apply(pd.Series).stack()).sum(level=0)
    return data

# get_dummies
labelled_notes = dummies(notes['event'])

# save labels to file
#print(labelled_notes.columns.values)
df = pd.DataFrame(labelled_notes.columns.values)
df.to_csv('output/labelled_notes.csv', index=False)

notes = pd.concat([notes, labelled_notes], axis=1)


In [37]:
# REMOVING NOT NEEDED COLUMNS

# all values
output_columns = labelled_notes.columns.values.tolist()
output_columns.append('song')

#columns to remove
columns_to_remove = list(set(notes.columns.values.tolist()) - set(output_columns))
notes.drop(columns=columns_to_remove, inplace=True)
notes

Unnamed: 0,song,a1_0.125,a1_0.25,a1_0.3333333333333333,a1_0.5,a1_0.75,a1_1.0,a1_1.5,a1_2.0,a2_0.125,...,x_0.125,x_0.16666666666666666,x_0.25,x_0.3333333333333333,x_0.5,x_0.6666666666666666,x_0.75,x_1.0,x_1.5,x_2.0
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
476657,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
476658,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
476659,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
476660,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
# normalize 
from sklearn.preprocessing import MinMaxScaler

#scaler = MinMaxScaler() 
#scaled_values = scaler.fit_transform(notes) 
#notes.loc[:,:] = scaled_values
notes.to_csv('output/processed_output.csv', index=False)
notes

Unnamed: 0,song,a1_0.125,a1_0.25,a1_0.3333333333333333,a1_0.5,a1_0.75,a1_1.0,a1_1.5,a1_2.0,a2_0.125,...,x_0.125,x_0.16666666666666666,x_0.25,x_0.3333333333333333,x_0.5,x_0.6666666666666666,x_0.75,x_1.0,x_1.5,x_2.0
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
476657,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
476658,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
476659,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
476660,524,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
