In [None]:
pip install pyguitarpro

In [None]:
import guitarpro
import music21
import math
import os
import time
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
seperator = ','
columns = {
    #"artist"      : ""
    #"album"       : "",
    #"title"       : "",
    # effect
    "song"        : "",
    "type"        : "",
    "ghostNote"   : "",
    "hammer"      : "",
    "palmMute"    : "",
    "slides"      : "",
    # note
    "duration"    : "",
    "no_of_notes" : "",
    "chord"       : "",
    "root_note"   : "",
    #"root_freq"   : "",
    "scnd_note"   : "",
    "scnd_freq"   : ""
}

class Instrument:
    tuning = []
    
    def __init__(self, name):
        self.name = name
        self.tuning = []
    
    def add_string(self, note):
        self.tuning.append(music21.note.Note(note))
    
    def get_tuning(self):
        return self.tuning
    
    def get_note(self, string, fret):
        return self.tuning[string - 1].transpose(fret)
        
class Block():
    def __init__(self, duration):
        self.duration = duration
        self.events = []
        
    def add_event(self, event):
        self.events.append(event)
        
    def get_chord_info(self):
        if len(self.events) > 1:
            c = music21.chord.Chord()
            for event in self.events:
                if type(event) == DeadEvent:
                    return ""
                c.add(event.name)
            return c.commonName
        
    def to_dict(self):
        # meta info froim root
        data = self.events[0].meta
        data["duration"]    = self.duration
        data["no_of_notes"] = len(self.events)
        data["root_note"]   = self.events[0].name
        #data["root_freq"]   = self.events[0].frequency
        
        if len(self.events) > 1 :
            data["chord"]     = self.get_chord_info()
            data["scnd_note"] = self.events[1].name
            #data["scnd_freq"] = self.events[1].frequency
            
        return data
        
class RestEvent():
    def __init__(self):
        self.name = 'R'
        self.frequency = ""
        self.meta = {}
        
class NoteEvent():
    def __init__(self, note, meta):
        self.name = note.nameWithOctave.replace('-', '')
        self.frequency = str(frequency(note))
        self.meta = meta
        
class DeadEvent():
    def __init__(self, meta):
        self.name = 'X'
        self.frequency = ""
        self.meta = meta
        
class EventFactory():
    def __init__(self, instrument):
        self.instrument = instrument
        
    def create(self, note):
        meta = {
            'type'      : note.type.name,
            'ghostNote' : int(note.effect.ghostNote),
            'hammer'    : int(note.effect.hammer),
            'palmMute'  : int(note.effect.palmMute),
            'slides'    : len(note.effect.slides)
        }
        
        if note.type.name == 'dead':
            return DeadEvent(meta)

        return NoteEvent(self.instrument.get_note(note.string, note.value), meta)
    
def frequency(note): 
    p1 = music21.pitch.Pitch(note.nameWithOctave)
    return p1.frequency   

def get_duration(beat):
    tuplet = beat.duration.tuplet
    tupletValue = tuplet.times / tuplet.enters
    duration = music21.duration.Duration(4 / beat.duration.value * tupletValue)
    if beat.duration.isDotted:
        duration.dots = 1
    return float(duration.quarterLength)

#https://www.midi.org/specifications/item/gm-level-1-sound-set - id for instrument
def is_bass_midi_instrument(instrument):
    if instrument in range(33, 40):
        return True
    return False

def get_bass_track(song):
    for track in song.tracks:
        if is_bass_midi_instrument(track.channel.instrument) or "bass" in track.name.lower():
            return track
    
def parse_song(file): 
    song = guitarpro.parse(file)
    events = []
    track = get_bass_track(song)
    if not track:
        print('bass track not found')
        return events
    
    bass = Instrument("Bass")
    eventFactory = EventFactory(bass)
    for string in track.strings:
        bass.add_string(str(string))

    
    for measure in track.measures:
        for voice in measure.voices:
            for beat in voice.beats:
                block = Block(get_duration(beat))
                if beat.status == guitarpro.BeatStatus.rest:
                    block.add_event(RestEvent())
                else:
                    for note in beat.notes[::-1]:
                        block.add_event(eventFactory.create(note))         
                
                if len(block.events):
                    result = block.to_dict()
                    for k,v in result.items():
                        result.update({k:str(v)})

                    allColumns = columns.copy()
                    allColumns.update(result)
                    
                    events.append(allColumns)
    return events
    

def process():
    output = open('output/raw_output.csv', 'w+')    
    #write header
    output.write(seperator.join(columns.keys()) + '\n')

    #parsing files
    datadir = 'data/'
    files = os.listdir(datadir)
    song = 0
    try:
        for gpfile in files:
            print(' -- ' + str(song) + ' processing: ' + gpfile)
            song += 1
            #if gpfile == "Test.gp4":
            result = parse_song(datadir + gpfile)
            if len(result) > 0:
                for row in result:
                    row['song'] = str(song)
                    output.write(seperator.join(row.values()) + '\n')
    except KeyError:
        print('parse error')
    #close file                
    output.close()

    
t1 = time.time()
process()
t2 = time.time()
print (t2 - t1)    

In [None]:

df

In [None]:
df = pd.read_csv('output/raw_output.csv')

# strip rests on beginning and end
def strip_rests(df, column_name="root_note"):
    first_note = df[column_name].notna().idxmin()
    last_note  = df[column_name].notna()[::-1].idxmax()
    
    return df[first_note:last_note]

def calc_octave_range(song):
    song['octave'] = song.apply(lambda row: int(row['root_note'][-1:]) if len(row['root_note']) > 1 else np.nan, axis=1)
    min = song['octave'].min()
    max = song['octave'].max()
    return  min, max, max - min 

#TODO - parametrize this
def transpose_octave_down(row):
    note_name = row['root_note']
    if not note_name == 'R' and not note_name == 'X':   
        note = music21.pitch.Pitch(note_name)
        row['root_name'] = str(note.transpose(music21.interval.Interval(-12)))
        
def transpose_two_octave_down(row):
    note_name = row['root_note']
    if not note_name == 'R' and not note_name == 'X':   
        note = music21.pitch.Pitch(note_name)
        row['root_name'] = str(note.transpose(music21.interval.Interval(-12)))        

processed = pd.DataFrame()
songs_count = len(df['song'].unique())
for song_idx in range(1, songs_count):
    song = df[df['song'] == song_idx]
    if len(song):
        song.reset_index(inplace=True)
        song = strip_rests(song)
        min_octave, max_octave, octave_range = calc_octave_range(song)
        if max_octave > 4:
            if max_octave == 5:
                song.apply(transpose_octave_down, axis=1)
            else:
                song.apply(transpose_two_octave_down, axis=1)
            print('-- transposed song: ' + str(song_idx) + ' octaves: ' + str(min_octave) + '-' +str(max_octave))

    processed = processed.append(song, ignore_index=True,sort=True)
    
df = processed

# - merging tied notes
import numpy as np
duration_sum = 0
duration_adj = []
for index, row in df[::-1].iterrows():
    if row['type'] == 'tie':
        duration_sum = duration_sum + row['duration']
        duration_adj.append(row['duration'])
    elif row['type'] == 'normal':
        duration_adj.append(row['duration'] + duration_sum)
        duration_sum = 0
    else:
        duration_adj.append(row['duration'])
        
durations = duration_adj[::-1]
df['duration'] = np.array(durations)

# droping rows
df = df.drop(df[df['type'] == 'tie'].index)

# droping columns
df.drop(columns=['type', 'no_of_notes', 'chord', 'scnd_note', 'scnd_freq'], inplace=True)

#normalize octave & duration
df['duration'] = df['duration'].clip(upper=2.0) #max 2 x 1/4


#replacing nan with 0
df.fillna(0.0, inplace=True)

df.to_csv('output/preprocessed_output.csv')
df

In [None]:
notes = pd.read_csv('output/preprocessed_output.csv')


# create note / duration
notes['note_duration'] = notes.apply(lambda row: row['root_note'] + '-' + str(row['duration']), axis=1)

# hot one encoding
def to_list(textdata):
    return "".join(textdata.lower().split()).split(',')

def multibinarize(series):
    mlb = MultiLabelBinarizer()
    data = pd.DataFrame(mlb.fit_transform(series.apply(to_list)), columns=mlb.classes_, index=df.index)
    return mlb.classes_, data

def dummies(series):
    data = pd.get_dummies(series.apply(to_list).apply(pd.Series).stack()).sum(level=0)
    return data

# get_dummies
labelled_notes = dummies(notes['note_duration'])


# save labels to file
print(labelled_notes.columns.values)
df = pd.DataFrame(labelled_notes.columns.values)
df.to_csv('output/labelled_notes.csv', index=False)

notes = pd.concat(
    [
        notes,
        labelled_notes
    ], axis=1
)


#drop unneded columns
notes.drop(columns=['Unnamed: 0', 'ghostNote', 'hammer', 'palmMute', 'slides', 'duration', 'root_note', 'note_duration', 'octave', 'index'], inplace=True)


In [None]:
# normalize 
from sklearn.preprocessing import MinMaxScaler

#scaler = MinMaxScaler() 
#scaled_values = scaler.fit_transform(notes) 
#notes.loc[:,:] = scaled_values
notes.to_csv('output/processed_output.csv')
notes