In [1]:
import guitarpro
import music21
import math
import os
import time
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [14]:
seperator = ','
columns = {
    #"artist"      : ""
    #"album"       : "",
    #"title"       : "",
    # effect
    "song"        : "",
    "type"        : "",
    "ghostNote"   : "",
    "hammer"      : "",
    "palmMute"    : "",
    "slides"      : "",
    # note
    "duration"    : "",
    "no_of_notes" : "",
    "chord"       : "",
    "root_note"   : "",
    "root_freq"   : "",
    "scnd_note"   : "",
    "scnd_freq"   : ""
}

class Instrument:
    tuning = []
    
    def __init__(self, name):
        self.name = name
        self.tuning = []
    
    def add_string(self, note):
        self.tuning.append(music21.note.Note(note))
    
    def get_tuning(self):
        return self.tuning
    
    def get_note(self, string, fret):
        return self.tuning[string - 1].transpose(fret)
        
class Block():
    def __init__(self, duration):
        self.duration = duration
        self.events = []
        
    def add_event(self, event):
        self.events.append(event)
        
    def get_chord_info(self):
        if len(self.events) > 1:
            c = music21.chord.Chord()
            for event in self.events:
                if type(event) == DeadEvent:
                    return ""
                c.add(event.name)
            return c.commonName
        
    def to_dict(self):
        # meta info froim root
        data = self.events[0].meta
        data["duration"]    = self.duration
        data["no_of_notes"] = len(self.events)
        data["root_note"]   = self.events[0].name
        data["root_freq"]   = self.events[0].frequency
        
        if len(self.events) > 1 :
            data["chord"]     = self.get_chord_info()
            data["scnd_note"] = self.events[1].name
            data["scnd_freq"] = self.events[1].frequency
            
        return data
        
class RestEvent():
    def __init__(self):
        self.name = 'R'
        self.frequency = ""
        self.meta = {}
        
class NoteEvent():
    def __init__(self, note, meta):
        self.name = note.nameWithOctave.replace('-', '')
        self.frequency = str(frequency(note))
        self.meta = meta
        
class DeadEvent():
    def __init__(self, meta):
        self.name = 'X'
        self.frequency = ""
        self.meta = meta
        
class EventFactory():
    def __init__(self, instrument):
        self.instrument = instrument
        
    def create(self, note):
        meta = {
            'type'      : note.type.name,
            'ghostNote' : int(note.effect.ghostNote),
            'hammer'    : int(note.effect.hammer),
            'palmMute'  : int(note.effect.palmMute),
            'slides'    : len(note.effect.slides)
        }
        
        if note.type.name == 'dead':
            return DeadEvent(meta)

        return NoteEvent(self.instrument.get_note(note.string, note.value), meta)
    
def frequency(note): 
    p1 = music21.pitch.Pitch(note.nameWithOctave)
    return p1.frequency   

def get_duration(beat):
    tuplet = beat.duration.tuplet
    tupletValue = tuplet.times / tuplet.enters
    duration = music21.duration.Duration(4 / beat.duration.value * tupletValue)
    if beat.duration.isDotted:
        duration.dots = 1
    return float(duration.quarterLength)

#https://www.midi.org/specifications/item/gm-level-1-sound-set - id for instrument
def is_bass_midi_instrument(instrument):
    if instrument in range(33, 40):
        return True
    return False

def get_bass_track(song):
    for track in song.tracks:
        if is_bass_midi_instrument(track.channel.instrument) or "bass" in track.name.lower():
            return track
    
def parse_song(file): 
    song = guitarpro.parse(file)
    events = []
    track = get_bass_track(song)
    if not track:
        print('bass track not found')
        return events
    
    bass = Instrument("Bass")
    eventFactory = EventFactory(bass)
    for string in track.strings:
        bass.add_string(str(string))

    
    for measure in track.measures:
        for voice in measure.voices:
            for beat in voice.beats:
                block = Block(get_duration(beat))
                if beat.status == guitarpro.BeatStatus.rest:
                    block.add_event(RestEvent())
                else:
                    for note in beat.notes[::-1]:
                        block.add_event(eventFactory.create(note))         
                
                if len(block.events):
                    result = block.to_dict()
                    for k,v in result.items():
                        result.update({k:str(v)})

                    allColumns = columns.copy()
                    allColumns.update(result)
                    
                    events.append(allColumns)
    return events
    

def process():
    output = open('output/raw_output.csv', 'w+')    
    #write header
    output.write(seperator.join(columns.keys()) + '\n')

    #parsing files
    datadir = 'data/'
    files = os.listdir(datadir)
    song = 0
    try:
        for gpfile in files:
            print(' -- ' + str(song) + ' processing: ' + gpfile)
            song += 1
            #if gpfile == "Test.gp4":
            result = parse_song(datadir + gpfile)
            if len(result) > 0:
                for row in result:
                    row['song'] = str(song)
                    output.write(seperator.join(row.values()) + '\n')
    except KeyError:
        print('parse error')
    #close file                
    output.close()

    
t1 = time.time()
process()
t2 = time.time()
print (t2 - t1)    

 -- 0 processing: Testament - The Burning Times.gp3
 -- 1 processing: Kreator - Second Awakening.gp4
 -- 2 processing: KISS - Makin Love.gp3
 -- 3 processing: Tool - Forty Six And 2 (ver 2).gp3
 -- 4 processing: KISS - God Gave Rock N Roll To You Ii.gp5
 -- 5 processing: Kreator - Against The Rest.gp5
 -- 6 processing: Kreator - From Flood Into Fire (ver 2 by rafaelherrera).gp5
 -- 7 processing: KISS - Parasite.gp3
 -- 8 processing: KISS - Sure Know Something (ver 2).gp3
 -- 9 processing: Tool - Fear Inoculum (ver 2 by Grimin).gp5
bass track not found
 -- 10 processing: Tool - Schism (ver 7 by kGonzo).gp5
 -- 11 processing: KISS - Modern Day Delilah (ver 2 by JesperPantzar).gp5
 -- 12 processing: Lamb of God - Boot Scraper.gp4
bass track not found
 -- 13 processing: Lamb of God - O.D.H.G.A.B.F.E.gp4
 -- 14 processing: Kreator - Take Their Lives.gp5
 -- 15 processing: Tool - Sober (ver 6 by jtoodrumz).gp5
 -- 16 processing: Slipknot - All Out Life.gp5
 -- 17 processing: KISS - 100,000 Y

In [15]:
# cleaning and pocessing
notes = pd.read_csv('output/raw_output.csv')

# data processing

# - merging tied notes
import numpy as np
duration_sum = 0
duration_adj = []
for index, row in notes[::-1].iterrows():
    if row['type'] == 'tie':
        duration_sum = duration_sum + row['duration']
        duration_adj.append(row['duration'])
    elif row['type'] == 'normal':
        duration_adj.append(row['duration'] + duration_sum)
        duration_sum = 0
    else:
        duration_adj.append(row['duration'])
        
durations = duration_adj[::-1]
notes['duration'] = np.array(durations)


# droping rows
notes = notes.drop(notes[notes['type'] == 'tie'].index)

# droping columns
notes.drop(columns=['type', 'no_of_notes', 'chord', 'scnd_note', 'scnd_freq'], inplace=True)

#normalize octave & duration
notes['duration'] = notes['duration'].clip(upper=4.0) #max 4 x 1/4

#print(notes['root_note'])

#replacing nan with 0
#values = {'root_note': 'R'}
#notes.fillna(value=values, inplace=True)
notes.fillna(0.0, inplace=True)

notes.to_csv('output/preprocessed_output.csv')
notes

Unnamed: 0,song,ghostNote,hammer,palmMute,slides,duration,root_note,root_freq
0,1,0.0,0.0,0.0,0.0,4.00,D2,73.416192
2,1,0.0,0.0,0.0,0.0,4.00,D2,73.416192
3,1,0.0,0.0,0.0,0.0,2.00,E3,164.813778
4,1,0.0,0.0,0.0,0.0,2.00,E3,155.563492
5,1,0.0,0.0,0.0,0.0,4.00,D2,73.416192
...,...,...,...,...,...,...,...,...
295710,313,0.0,0.0,0.0,0.0,0.50,E2,82.406889
295711,313,0.0,0.0,0.0,0.0,1.00,R,0.000000
295712,313,0.0,0.0,0.0,0.0,0.75,E2,82.406889
295713,313,0.0,0.0,0.0,0.0,0.75,E2,82.406889


In [16]:
notes = pd.read_csv('output/preprocessed_output.csv')

# transpose
#notes['octave'] = notes.apply(lambda row: row['root_note'][-1:], axis=1)
#notes

In [17]:
# create note / duration
notes['note_duration'] = notes.apply(lambda row: row['root_note'] + '-' + str(row['duration']), axis=1)

# hot one encoding
def to_list(textdata):
    return "".join(textdata.lower().split()).split(',')

def multibinarize(series):
    mlb = MultiLabelBinarizer()
    data = pd.DataFrame(mlb.fit_transform(series.apply(to_list)), columns=mlb.classes_, index=df.index)
    return mlb.classes_, data

def dummies(series):
    data = pd.get_dummies(series.apply(to_list).apply(pd.Series).stack()).sum(level=0)
    return data

# get_dummies
labelled_notes = dummies(notes['note_duration'])

# save labels to file
print(labelled_notes.columns.values)
df = pd.DataFrame(labelled_notes.columns.values)
df.to_csv('output/labelled_notes.csv', index=False)

['a1-0.125' 'a1-0.25' 'a1-0.3333333333333333' 'a1-0.5' 'a1-0.75' 'a1-1.0'
 'a1-1.5' 'a1-2.0' 'a1-3.0' 'a1-4.0' 'a2-0.125' 'a2-0.16666666666666666'
 'a2-0.25' 'a2-0.3333333333333333' 'a2-0.5' 'a2-0.6666666666666666'
 'a2-0.75' 'a2-1.0' 'a2-1.25' 'a2-1.3333333333333333' 'a2-1.5' 'a2-1.75'
 'a2-2.0' 'a2-2.5' 'a2-3.0' 'a2-3.5' 'a2-4.0' 'a3-0.125'
 'a3-0.16666666666666666' 'a3-0.25' 'a3-0.3333333333333333' 'a3-0.4'
 'a3-0.5' 'a3-0.625' 'a3-0.6666666666666666' 'a3-0.75' 'a3-0.875' 'a3-1.0'
 'a3-1.25' 'a3-1.5' 'a3-1.75' 'a3-2.0' 'a3-2.25' 'a3-2.5' 'a3-3.0'
 'a3-3.5' 'a3-3.75' 'a3-4.0' 'a4-0.125' 'a4-0.25' 'a4-0.5' 'a4-0.75'
 'a4-1.0' 'a4-1.125' 'a4-3.25' 'a4-4.0' 'a5-1.0' 'a5-4.0' 'b1-0.125'
 'b1-0.25' 'b1-0.3333333333333333' 'b1-0.5' 'b1-0.6666666666666666'
 'b1-0.75' 'b1-1.0' 'b1-1.25' 'b1-1.5' 'b1-2.0' 'b1-2.5' 'b1-2.75'
 'b1-3.0' 'b1-4.0' 'b2-0.08333333333333333' 'b2-0.125'
 'b2-0.16666666666666666' 'b2-0.25' 'b2-0.3333333333333333' 'b2-0.4375'
 'b2-0.5' 'b2-0.6' 'b2-0.625' 'b2-0.66666666

In [18]:
notes = pd.concat(
    [
        notes,
        labelled_notes
    ], axis=1
)


#drop unneded columns
notes.drop(columns=['Unnamed: 0', 'ghostNote', 'hammer', 'palmMute', 'slides', 'duration', 'root_freq', 'root_note', 'note_duration'], inplace=True)

notes

Unnamed: 0,song,a1-0.125,a1-0.25,a1-0.3333333333333333,a1-0.5,a1-0.75,a1-1.0,a1-1.5,a1-2.0,a1-3.0,...,r-3.0,r-4.0,x-0.125,x-0.25,x-0.5,x-0.75,x-1.0,x-1.5,x-3.0,x-4.0
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281830,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
281831,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
281832,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
281833,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
# normalize 
from sklearn.preprocessing import MinMaxScaler

#scaler = MinMaxScaler() 
#scaled_values = scaler.fit_transform(notes) 
#notes.loc[:,:] = scaled_values
notes.to_csv('output/processed_output.csv')
notes

Unnamed: 0,song,a1-0.125,a1-0.25,a1-0.3333333333333333,a1-0.5,a1-0.75,a1-1.0,a1-1.5,a1-2.0,a1-3.0,...,r-3.0,r-4.0,x-0.125,x-0.25,x-0.5,x-0.75,x-1.0,x-1.5,x-3.0,x-4.0
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281830,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
281831,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
281832,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
281833,313,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
