In [1]:
import guitarpro
import music21
import math
import os
import time
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


seperator = ','
columns = {
    #"artist"      : "",
    #"album"       : "",
    #"title"       : "",
    # effect
    "type"        : "",
    "ghostNote"   : "",
    "hammer"      : "",
    "palmMute"    : "",
    "slides"      : "",
    # note
    "duration"    : "",
    "no_of_notes" : "",
    "chord"       : "",
    "root_note"   : "",
    "root_freq"   : "",
    "scnd_note"   : "",
    "scnd_freq"   : ""
}

class Instrument:
    tuning = []
    
    def __init__(self, name):
        self.name = name
        self.tuning = []
    
    def add_string(self, note):
        self.tuning.append(music21.note.Note(note))
    
    def get_tuning(self):
        return self.tuning
    
    def get_note(self, string, fret):
        return self.tuning[string - 1].transpose(fret)
        
class Block():
    def __init__(self, duration):
        self.duration = duration
        self.events = []
        
    def add_event(self, event):
        self.events.append(event)
        
    def get_chord_info(self):
        if len(self.events) > 1:
            c = music21.chord.Chord()
            for event in self.events:
                if type(event) == DeadEvent:
                    return ""
                c.add(event.name)
            return c.commonName
        
    def to_dict(self):
        # meta info froim root
        data = self.events[0].meta
        data["duration"]    = self.duration
        data["no_of_notes"] = len(self.events)
        data["root_note"]   = self.events[0].name
        data["root_freq"]   = self.events[0].frequency
        
        if len(self.events) > 1 :
            data["chord"]     = self.get_chord_info()
            data["scnd_note"] = self.events[1].name
            data["scnd_freq"] = self.events[1].frequency
            
        return data
        
class RestEvent():
    def __init__(self):
        self.name = ''
        self.frequency = ""
        self.meta = {}
        
class NoteEvent():
    def __init__(self, note, meta):
        self.name = note.nameWithOctave.replace('-', '')
        self.frequency = str(frequency(note))
        self.meta = meta
        
class DeadEvent():
    def __init__(self, meta):
        self.name = ''
        self.frequency = ""
        self.meta = meta
        
class EventFactory():
    def __init__(self, instrument):
        self.instrument = instrument
        
    def create(self, note):
        meta = {
            'type'      : note.type.name,
            'ghostNote' : int(note.effect.ghostNote),
            'hammer'    : int(note.effect.hammer),
            'palmMute'  : int(note.effect.palmMute),
            'slides'    : len(note.effect.slides)
        }
        
        if note.type.name == 'dead':
            return DeadEvent(meta)

        return NoteEvent(self.instrument.get_note(note.string, note.value), meta)
    
def frequency(note): 
    p1 = music21.pitch.Pitch(note.nameWithOctave)
    return p1.frequency   

def get_duration(beat):
    tuplet = beat.duration.tuplet
    tupletValue = tuplet.times / tuplet.enters
    duration = music21.duration.Duration(4 / beat.duration.value * tupletValue)
    if beat.duration.isDotted:
        duration.dots = 1
    return float(duration.quarterLength)

#https://www.midi.org/specifications/item/gm-level-1-sound-set - id for instrument
def is_bass_midi_instrument(instrument):
    if instrument in range(33, 40):
        return True
    return False

def get_bass_track(song):
    for track in song.tracks:
        if is_bass_midi_instrument(track.channel.instrument) or "bass" in track.name.lower():
            return track
    
def parse_song(file, output):
    song = guitarpro.parse(file)
    track = get_bass_track(song)
    if not track:
        return
    
    bass = Instrument("Bass")
    eventFactory = EventFactory(bass)
    for string in track.strings:
        bass.add_string(str(string))
      
    '''
    row["artist"] = song.artist
    row["album"]  = song.album
    row["title"]  = song.title
    '''

    for measure in track.measures:
        for voice in measure.voices:
            for beat in voice.beats:
                block = Block(get_duration(beat))
                if beat.status == guitarpro.BeatStatus.rest:
                    block.add_event(RestEvent())
                else:
                    for note in beat.notes[::-1]:
                        block.add_event(eventFactory.create(note))         
                
                if len(block.events):
                    result = block.to_dict()
                    for k,v in result.items():
                        result.update({k:str(v)})

                    allColumns = columns.copy()
                    allColumns.update(result)

                    output.write(seperator.join(allColumns.values()) + '\n')

def process():
    output = open('output/raw_output.csv', 'w+')    
    #write header
    output.write(seperator.join(columns.keys()) + '\n')

    #parsing files
    datadir = 'data/'
    files = os.listdir(datadir)
    for gpfile in files:  
        #if gpfile == "Test.gp4":
            parse_song(datadir + gpfile, output)
    #close file                
    output.close()

    
t1 = time.time()
process()
t2 = time.time()
print (t2 - t1)    


0.9612431526184082


In [64]:
# cleaning and pocessing
notes = pd.read_csv('output/raw_output.csv')

notes.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3706 entries, 0 to 3705
Data columns (total 12 columns):
type           3547 non-null object
ghostNote      3547 non-null float64
hammer         3547 non-null float64
palmMute       3547 non-null float64
slides         3547 non-null float64
duration       3706 non-null float64
no_of_notes    3706 non-null int64
chord          309 non-null object
root_note      3409 non-null object
root_freq      3409 non-null float64
scnd_note      309 non-null object
scnd_freq      309 non-null float64
dtypes: float64(7), int64(1), object(4)
memory usage: 347.6+ KB


In [107]:
notes

Unnamed: 0,type,ghostNote,hammer,palmMute,slides,duration,no_of_notes,chord,root_note,root_freq,scnd_note,scnd_freq,duration_adj
0,normal,0.0,0.0,0.0,0.0,0.50,1,,A3,220.000000,,,0.50
1,,,,,,0.50,1,,,,,,0.00
2,normal,0.0,0.0,0.0,0.0,0.25,1,,E4,329.627557,,,0.25
3,normal,0.0,0.0,0.0,0.0,0.25,1,,B4,493.883301,,,0.25
4,,,,,,0.25,1,,,,,,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700,normal,0.0,0.0,0.0,0.0,0.50,1,,G3,195.997718,,,0.50
3701,normal,0.0,0.0,0.0,0.0,0.50,1,,D4,293.664768,,,0.50
3702,normal,0.0,0.0,0.0,0.0,0.50,1,,F#3,184.997211,,,0.50
3703,normal,1.0,0.0,0.0,1.0,1.50,1,,F#3,184.997211,,,1.50


In [76]:
# data processing

# - merging tied notes
import numpy as np
duration_sum = 0
duration_adj = []
for index, row in notes[::-1].iterrows():
    if row['type'] == 'tie':
        duration_sum = duration_sum + row['duration']
        duration_adj.append(row['duration'])
    elif row['type'] == 'normal':
        duration_adj.append(row['duration'] + duration_sum)
        duration_sum = 0
    else:
        duration_adj.append(row['duration'])
        
durations = duration_adj[::-1]
notes['duration'] = np.array(durations)


# droping rows
notes = notes.drop(notes[notes['type'] == 'tie'].index)

# droping columns
notes.drop(columns=['type', 'no_of_notes', 'chord', 'scnd_note', 'scnd_freq'], inplace=True)

#replacing nan with 0
values = {'root_note': 'R'}
notes.fillna(value=values, inplace=True)
notes.fillna(0.0, inplace=True)

notes.to_csv('output/preprocessed_output.csv')
notes

Unnamed: 0,ghostNote,hammer,palmMute,slides,duration,root_note,root_freq
0,0.0,0.0,0.0,0.0,0.50,A3,220.000000
1,0.0,0.0,0.0,0.0,0.50,R,0.000000
2,0.0,0.0,0.0,0.0,0.25,E4,329.627557
3,0.0,0.0,0.0,0.0,0.25,B4,493.883301
4,0.0,0.0,0.0,0.0,0.25,R,0.000000
...,...,...,...,...,...,...,...
3700,0.0,0.0,0.0,0.0,0.50,G3,195.997718
3701,0.0,0.0,0.0,0.0,0.50,D4,293.664768
3702,0.0,0.0,0.0,0.0,0.50,F#3,184.997211
3703,1.0,0.0,0.0,1.0,1.50,F#3,184.997211


In [77]:
# hot one encoding
def to_list(textdata):
    return "".join(textdata.lower().split()).split(',')

def multibinarize(series):
    mlb = MultiLabelBinarizer()
    data = pd.DataFrame(mlb.fit_transform(series.apply(to_list)), columns=mlb.classes_, index=df.index)
    return mlb.classes_, data

def dummies(series):
    data = pd.get_dummies(series.apply(to_list).apply(pd.Series).stack()).sum(level=0)
    return data

# # multibinarize
# kraj_classes, kraj_data = multibinarize(df['root_note'])
# df[kraj_classes] = kraj_data

# get_dummies
nuty = dummies(df['root_note'])

notes = pd.concat(
    [
        notes,
        nuty
    ], axis=1
)

notes.drop(columns=['root_note'], inplace=True)
notes


Unnamed: 0,ghostNote,hammer,palmMute,slides,duration,root_freq,a2,a3,a4,a5,...,f5,g#2,g#3,g#4,g#5,g2,g3,g4,g5,r
0,0.0,0.0,0.0,0.0,0.50,220.000000,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0.0,0.0,0.0,0.0,0.50,0.000000,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0.0,0.0,0.0,0.0,0.25,329.627557,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0.0,0.0,0.0,0.0,0.25,493.883301,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0.0,0.0,0.0,0.0,0.25,0.000000,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700,0.0,0.0,0.0,0.0,0.50,195.997718,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3701,0.0,0.0,0.0,0.0,0.50,293.664768,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3702,0.0,0.0,0.0,0.0,0.50,184.997211,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3703,1.0,0.0,0.0,1.0,1.50,184.997211,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [78]:
# normalize 
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler() 
scaled_values = scaler.fit_transform(notes) 
notes.loc[:,:] = scaled_values
notes.to_csv('output/processed_output.csv')
notes

Unnamed: 0,ghostNote,hammer,palmMute,slides,duration,root_freq,a2,a3,a4,a5,...,f5,g#2,g#3,g#4,g#5,g2,g3,g4,g5,r
0,0.0,0.0,0.0,0.0,0.010408,0.210224,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.010408,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.004337,0.314980,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.004337,0.471937,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.004337,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700,0.0,0.0,0.0,0.0,0.010408,0.187288,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3701,0.0,0.0,0.0,0.0,0.010408,0.280616,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3702,0.0,0.0,0.0,0.0,0.010408,0.176777,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3703,1.0,0.0,0.0,1.0,0.034692,0.176777,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
