In [8]:
%matplotlib inline
import copy
import pandas as pd
import numpy as np
import librosa
import seaborn as sb
import matplotlib.pyplot as plt
import itertools
import re
import random
from os import listdir
from os.path import isfile, join
from __future__ import print_function
from numpy import median, diff

# SongFile:
### Fields
- beat_frames:  
- beat_times: 
- bpm: 
- bpm_string: 
- beat_length: 
- sample_times: 
- indices: 
- music_samples: 
- data: 

- pack
- name
- extension
- music_file
- stepfile
### Output
- data/{0}_beat_times.csv
- data/{0}_beat_frames.csv
- data/{0}_music_samples.csv

# TODO separate music and stepfile (dont assume scc, or always use sm)

In [26]:
sample_rate_down = 1
hop_length_down = 8
sr = 11025 * 16 / sample_rate_down
hop_length = 512 / (sample_rate_down * hop_length_down)
samples_per_second = 48 / 4
steps_per_bar = 12
class SongFile:
    def load_new(self):
        data, _ = librosa.load(self.music_file, sr=sr)
        _, self.beat_frames = librosa.beat.beat_track(y=data, sr=sr, hop_length=hop_length)
        self.beat_times = librosa.frames_to_time(self.beat_frames, sr=sr, hop_length=hop_length)

        seconds = len(data) / sr
        self.bpm = get_beats(self.beat_times, self.beat_frames)
        
        self.beat_length = 60. / self.bpm
        bps = self.bpm / 60
        # take 24 samples for each beat (need 3rds, 8ths)
        num_samples = int(bps * seconds * samples_per_second)
        beat_time = self.beat_length / samples_per_second
        
        sample_times = [self.beat_times[0] + (beat_time * i) for i in range(num_samples)]
        # only take samples where music still playing
        self.sample_times = [time for time in sample_times if round(time * sr) < len(data)]
        self.indices = [round(time * sr) for time in self.sample_times]
        self.music_samples = [data[index] for index in self.indices]
        self.data = data
    
    def __init__(self, pack, name, extension, force_new):
        self.pack = pack
        self.name = name
        self.extension = extension
        self.music_file = 'StepMania/Songs/{0}/{1}/{1}.{2}'.format(pack, name, extension)
        self.stepfile = 'StepMania/Songs/{0}/{1}/{1}.ssc'.format(pack, name)
        
        key = '{0}~{1}'.format(pack, name)
        if force_new or (not '{0}_beat_frames.csv'.format(key) in listdir('data')):
            print ('Calculating beats for {0}'.format(key))
            self.load_new()
                        
            print ('Saving calculated beats for {0}'.format(key))
            pd.DataFrame(self.beat_times).to_csv('data/{0}_beat_times.csv'.format(key), index=False)
            pd.DataFrame(self.beat_frames).to_csv('data/{0}_beat_frames.csv'.format(key), index=False)
            pd.DataFrame(self.music_samples).to_csv('data/{0}_music_samples.csv'.format(key), index=False)
        else:
            #print ('Loading beats from save for {0}'.format(key))
            self.beat_frames = pd.read_csv('data/{0}_beat_frames.csv'.format(key)).values.flatten()
            self.beat_times = pd.read_csv('data/{0}_beat_times.csv'.format(key)).values.flatten()
            self.music_samples = pd.read_csv('data/{0}_music_samples.csv'.format(key)).values.flatten()

# Some useful functions to load induvidual or lists of songs
- load_song(pack: String, pack: String, force_new: Bool)
- load_songs(songs: Array(Pair(String~pack, String~title)), force_new: Bool)
- load_all_songs(force_new: Bool)

In [10]:
def load_song(pack, title, force_new):
    folder_path = 'StepMania/Songs/{0}/{1}'.format(pack, title)
    song = next(file for file in listdir(folder_path) if file.endswith('.ogg') or file.endswith('.mp3'))
    extension = song.split('.')[1]
    key = '{0}~{1}'.format(pack, title)
    return SongFile(pack, title, extension, force_new)

def load_songs(songs, force_new):
    return {'{0}~{1}'.format(song[0], song[1]): load_song(song[0], song[1], force_new) for song in songs}

def load_all_songs(force_new):
    songs = [('In The Groove', song) for song in listdir('StepMania/Songs/In The Groove') if song != '.DS_Store']
    songs.extend([('a_test', song) for song in ['A', 'B', 'C']])
    return load_songs(songs, force_new)

# Functions to get bpm from song
- get_beats(beat_times: Array(Float), beat_frames: Array(Int))

In [11]:
def get_beats(beat_times, beat_frames):
    changes = []
    changes_time = []
    for i in range(len(beat_frames) - 1):
        changes.append(beat_frames[i + 1] - beat_frames[i])
        changes_time.append(beat_times[i + 1] - beat_times[i])

    sorted_changes = sorted(changes)
    median = sorted_changes[int(len(changes) / 2)]
    median = max(set(sorted_changes), key=sorted_changes.count)

    changes_counted = [False] * len(changes)
    time_changes_sum = 0
    time_changes_count = 0
    for i in range(len(changes)):
        # can use other factors (eg if song has a slow part take double beats into accout)
        # in [0.5, 1, 2]:
        for change_factor in [1]:
            if abs((changes[i] * change_factor) - median) <= hop_length_down:
                changes_counted[i] = True
                time_changes_sum += (changes_time[i] * change_factor)
                time_changes_count += change_factor
            
    average = time_changes_sum / time_changes_count
    
    time_differences = []
    earliest_proper_beat = 1
    for i in range(1, len(beat_times) - 1):
        if changes_counted[i] & changes_counted[i - 1]:
            earliest_proper_beat = i
            break
            
    last_proper_beat = len(beat_times) -2
    for i in range(1, len(beat_times) - 1):
        if changes_counted[len(beat_times) - i - 1] & changes_counted[len(beat_times) - i - 2]:
            last_proper_beat = len(beat_times) - i - 1
            break
    
    time_differences = []
    buffer = 5
    for i in range(20):
        start_beat = earliest_proper_beat + buffer * i
        if changes_counted[start_beat] & changes_counted[start_beat - 1]:
            for j in range(20):
                end_beat = last_proper_beat - buffer * j
                if changes_counted[end_beat] & changes_counted[end_beat - 1]:
                    time_differences.append(beat_times[end_beat] - beat_times[start_beat])
        
    # get num beats, round, and make new average
    new_averages = [time_difference / round(time_difference / average) for time_difference in time_differences]
    #print (new_averages)
    new_averages.sort()
    num_averages = len(new_averages)
    #new_average = sum(new_averages[5:num_averages - 5]) / (num_averages - 10)
    new_average = new_averages[int(num_averages/2)]
    return 60./new_average

# Helpers to test get_beats() above
- get_bpm(file: String)
- get_song_bpms()
- test_get_beats(song_data: Map(SongData))

In [12]:
def get_bpm(file):
    with open(file, "r") as ins:
        for line in ins:
            if line.startswith('#BPMS:'):
                result = re.search('#BPMS:(.*);', line)
                bpm_string = result.group(1)
                if len(bpm_string.split(',')) == 1:
                    return float(bpm_string.split('=')[1])
                return 0
    return 0

def get_song_bpms(song_data):
    song_bpms = {}
    for key, song in song_data.items():
        song_bpms[key] = get_bpm('StepMania/Songs/{0}/{1}/{1}.sm'.format(song.pack, song.name))
    return song_bpms

def test_get_beats(song_data):
    song_bpms = get_song_bpms(song_data)
    errors = []
    for key in song_data:
        song = song_data[key]
        real_beat = song_bpms[key]
        if real_beat != 0:
            prediced_beat = get_beats(song.beat_times, song.beat_frames)[0][1]
            for i in range (1,4):
                if abs((prediced_beat * (i + 1) / (i)) - real_beat) < abs(prediced_beat - real_beat):
                    prediced_beat *= (i + 1) / (i)
            print ('{0:.3f} - {1:.3f} = {2:.3f} ({3})'.format(prediced_beat, real_beat, prediced_beat - real_beat, song.name))

#songs = [('In The Groove', song) for song in listdir('StepMania/Songs/In The Groove') if song != '.DS_Store'][:15]
#song_data_temp = load_songs(songs, True)
#test_get_beats(song_data_temp)

# Helpers to read .sm and return notes and meta data
- get_notes_from_note_string(note_string)
- get_notes_and_metadata(file)
- get_song_steps()

In [13]:
regex_notes_with_metadata = '#NOTES:n     dance-single((?:(?!//-).)*);'
regex_metadata_split = ':n     (.*):n     (.*):n     (.*):n     (.*):n     (.*):(.*);'
def get_notes_from_note_string(note_string):
    measure_comments = re.findall('( )*//( )*measure( )*[0-9]*', note_string)
    note_strings_split = re.split(r'n', note_string)[1:-1]
    notes = []
    bar = []
    for row in note_strings_split:
        if len(row) == 4:
            bar.append(row)
        else:
            notes.append(bar)
            bar = []
    return note_strings_split

def get_notes_and_metadata(file):
    difficulty_map = {}
    with open(file) as txt:
        step_file = txt.read()
        step_file = step_file.replace('\n', 'n')
        notes_with_metadata_groups = re.finditer(regex_notes_with_metadata, step_file)
        for match in notes_with_metadata_groups:
            notes_with_metadata = match.group(0)
            split_data = re.search(regex_metadata_split, notes_with_metadata)
            difficulty = split_data.group(4)
            metadata = split_data.group(5)
            notes = get_notes_from_note_string(split_data.group(6))
            notes_with_metadata_map = {
                'DIFFICULTY': difficulty,
                'METADATA': metadata,
                'NOTES': notes,
            }
            difficulty_map[difficulty] = notes_with_metadata_map
    return difficulty_map

def get_song_steps():
    songs = [song for song in listdir('StepMania/Songs/In The Groove')]
    songs.remove('.DS_Store')
    song_steps = {}
    for song in songs:
        song_steps['In The Groove~{0}'.format(song)] = get_notes_and_metadata('StepMania/Songs/In The Groove/{0}/{0}.sm'.format(song))
    return song_steps

In [14]:
def write_song_header(output_stepfile, song):
    keys = ['VERSION', 'TITLE', 'MUSIC', 'OFFSET', 'SAMPLESTART', 'SAMPLELENGTH']
    
    header_info = {
        'VERSION': 0.82,
        'TITLE': song.name,
        'MUSIC': '{0}.{1}'.format(song.name, song.extension),
        'OFFSET': -song.beat_times[0],
        'SAMPLESTART': song.beat_times[0] + 32 * song.beat_length,
        'SAMPLELENGTH': 32 * song.beat_length
    }
    
    for key in keys:
        print ("#{0}:{1};".format(key, str(header_info[key])), file=output_stepfile)
        
def write_step_header(output_stepfile, song):
    print("//---------------dance-single - ----------------", file=output_stepfile)
    keys = ['NOTEDATA', 'CHARTNAME', 'STEPSTYPE', 'DIFFICULTY', 'METER', 'RADARVALUES', 'BPMS']
        
    step_info = {
        'NOTEDATA': '',
        'CHARTNAME': 'Kommisar',
        'STEPSTYPE': 'dance-single',
        'DIFFICULTY': 'Expert',
        'METER': 9,
        'RADARVALUES': '0.234,0.292,0.008,0,0,211,212,1,0,0,0,0,0,0,0.234,0.292,0.008,0,0,211,212,1,0,0,0,0,0,0',
        'BPMS': '0={0.5f}'.format(song.bpm_string)
    }
    for key in keys:
        print ("#{0}:{1};".format(key, str(step_info[key])), file=output_stepfile)
        
def write_notes_simple(output_stepfile, song):
    print ("#NOTES:", file=output_stepfile)
    
    for i in range(40):
        print ("0001\n0001\n0001\n0001\n,", file=output_stepfile)
    print ("0000;", file=output_stepfile)
    
def write_notes(output_stepfile, song):
    print ("#NOTES:", file=output_stepfile)
    
    samples = song.music_samples
    # take steps_per_bar / 4 samples per beat (steps_per_bar per bar)
    steps_per_beat = steps_per_bar / 4
    filter_ammount = samples_per_second / steps_per_beat
    
    absolute_samples = [samples[i] for i in range(len(samples)) if i % filter_ammount == 0]
    # show 1 / 3 of all notes
    cutoff_index = int(len(absolute_samples) / 3)
    cutoff = sorted(absolute_samples)[-cutoff_index]
    indices = [sample > cutoff for sample in absolute_samples]
    
    for i in range(len(indices)):
        if indices[i]:
            print ("0001", file=output_stepfile)
        else:
            print ("0000", file=output_stepfile)
        if i % steps_per_bar == 0 and i != 0:
            print (",", file=output_stepfile)

    print ("0000;", file=output_stepfile)
    
def step_song(song):
    output_stepfile=open(song.stepfile, 'w')
    write_song_header(output_stepfile, song)
    write_step_header(output_stepfile, song)
    write_notes(output_stepfile, song)
    output_stepfile.close()

In [None]:
#song_data = load_all_songs(False)

import copy
song = load_song('a_test', 'A', True)
song2 = copy.deepcopy(song)
song3 = copy.deepcopy(song)
song2.name = 'A2'
song3.name = 'A3'
song2.stepfile = 'StepMania/Songs/{0}/{1}/{1}.ssc'.format('a_test', 'A2')
song3.stepfile = 'StepMania/Songs/{0}/{1}/{1}.ssc'.format('a_test', 'A3')

In [None]:
step_song(song)
#step_song(song2)
#step_song(song3)

In [27]:
song = load_song('a_test', 'A', True)
backup1 = copy.deepcopy(song)
#song2 = load_song('a_test', 'Fire', True)
#song3 = load_song('a_test', 'WeBelongTogether', True)
#song4 = load_song('a_test', 'CallMeBaby', True)

Calculating beats for a_test~A


  return array(a, dtype, copy=False, order=order)


Saving calculated beats for a_test~A


In [16]:
onsets = librosa.onset.onset_detect(y=song.data, sr=sr, hop_length=hop_length)

  return array(a, dtype, copy=False, order=order)


In [32]:
print (len(song.indices))
print (song.indices[:20])
print (song.indices[9400:])

9426
[72512.0, 76075.0, 79639.0, 83202.0, 86765.0, 90328.0, 93892.0, 97455.0, 101018.0, 104581.0, 108145.0, 111708.0, 115271.0, 118834.0, 122398.0, 125961.0, 129524.0, 133087.0, 136651.0, 140214.0]
[33567117.0, 33570680.0, 33574243.0, 33577806.0, 33581370.0, 33584933.0, 33588496.0, 33592059.0, 33595623.0, 33599186.0, 33602749.0, 33606312.0, 33609876.0, 33613439.0, 33617002.0, 33620565.0, 33624129.0, 33627692.0, 33631255.0, 33634819.0, 33638382.0, 33641945.0, 33645508.0, 33649072.0, 33652635.0, 33656198.0]


In [33]:
print (len(onsets))
print (onsets[:20])
print (onsets[2800:])
32507904
33656198

2816
[1120 1318 1719 1818 2514 2624 2929 3134 3344 3787 3943 4038 4145 4275 4669
 4814 5021 5145 5242 5452]
[513872 514085 514306 514397 514501 514740 514865 514954 515100 515389
 515503 516472 516838 517106 517484 517894]


In [40]:
onsets = librosa.onset.onset_detect(y=song.data, sr=sr, hop_length=512)
onsets_scaled = [onset * 512 for onset in onsets]

In [41]:
print (len(onsets_scaled))
print (onsets_scaled[:20])
print (onsets_scaled[980:])

1002
[72192, 116736, 201216, 242688, 287232, 370688, 754688, 785408, 883200, 898048, 903680, 925696, 1053696, 1097216, 1267200, 1326592, 1346048, 1385984, 1395200, 1438208]
[31952384, 31973376, 31993856, 32037376, 32050176, 32079360, 32123392, 32165376, 32207872, 32229376, 32250368, 32272384, 32291328, 32314880, 32335872, 32365056, 32376832, 32400384, 32421376, 32464384, 32485888, 32507904]


In [47]:
i = 0
onset_happened_in_frame = [False] * len(song.indices)
for onset in onsets_scaled:
    while abs(onset - song.indices[i]) > abs(onset - song.indices[i + 1]):
        i += 1
    onset_happened_in_frame[i] = True

In [48]:
len(onset_happened_in_frame)

9426

In [None]:
y_harmonic, y_percussive = librosa.effects.hpss(song.data)

In [None]:
beat_frames = librosa.samples_to_frames(song.indices)

In [None]:
# Compute MFCC features from the raw signal
mfcc = librosa.feature.mfcc(y=song.data, sr=sr, hop_length=512, n_mfcc=13)

In [None]:
# And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)

In [None]:
# Stack and synchronize between beat events
# This time, we'll use the mean value (default) instead of median
beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

In [None]:
# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)

In [None]:
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)

In [None]:
# Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

In [None]:
print (len(beat_features))
print (len(beat_features[0]))

In [None]:
len(beat_frames)

In [None]:
len(song.data)

In [None]:
def is_onset(index, onsets):
    
onset_samples_frames = [is_onset(index, onsets) for index in song.indices]