In [49]:
%matplotlib inline
import pandas as pd
import numpy as np
import librosa
import seaborn as sb
import matplotlib.pyplot as plt
import itertools
import re
import random
from os import listdir
from os.path import isfile, join
from __future__ import print_function
from numpy import median, diff

In [9]:
sample_rate = 22050*8
class SongFile:
    def load_new(self):
        data, _ = librosa.load(self.music_file, sr=sample_rate)
        _, self.beat_frames = librosa.beat.beat_track(y=data, sr=sample_rate)
        self.beat_times = librosa.frames_to_time(self.beat_frames, sr=sample_rate)

        seconds = len(data) / sample_rate
        bpm = get_beats(self.beat_times, self.beat_frames)[0][1]
        bps = bpm / 60
        # take 24 samples for each beat (need 3rds, 8ths)
        num_samples = int(bps * seconds * 24) - 4 # -1 to avoid out of bounds for now
        beat_time = (1. / bps) / 24
        sample_times = [beat_times[0] + (beat_time * i) for i in range(num_samples)]
        self.music_samples = [data[int(time * sample_rate)] for time in sample_times]
    
    def __init__(self, pack, name, extension, force_new):
        self.pack = pack
        self.name = name
        self.extension = extension
        self.music_file = 'StepMania/Songs/{0}/{1}/{1}.{2}'.format(pack, name, extension)
        self.stepfile = 'StepMania/Songs/{0}/{1}/{1}.ssc'.format(pack, name)
        
        key = '{0}~{1}'.format(pack, name)
        if force_new or (not '{0}_beat_frames.csv'.format(key) in listdir('data')):
            print ('Calculating beats for {0}'.format(key))
            self.load_new()
                        
            print ('Saving calculated beats for {0}'.format(key))
            pd.DataFrame(self.beat_times).to_csv('data/{0}_beat_times.csv'.format(key), index=False)
            pd.DataFrame(self.beat_frames).to_csv('data/{0}_beat_frames.csv'.format(key), index=False)
            pd.DataFrame(self.music_samples).to_csv('data/{0}_music_samples.csv'.format(key), index=False)
        else:
            print ('Loading beats from save for {0}'.format(key))
            self.beat_frames = pd.read_csv('data/{0}_beat_frames.csv'.format(key)).values.flatten()
            self.beat_times = pd.read_csv('data/{0}_beat_times.csv'.format(key)).values.flatten()
            self.music_samples = pd.read_csv('data/{0}_music_samples.csv'.format(key)).values.flatten()

In [11]:
def load_song(pack, title, force_new = False):
    folder_path = 'StepMania/Songs/{0}/{1}'.format(pack, title)
    song = next(file for file in listdir(folder_path) if file.endswith('.ogg') or file.endswith('.mp3'))
    extension = song.split('.')[1]
    key = '{0}~{1}'.format(pack, title)
    song_data[key] = SongFile(pack, title, extension, force_new)

In [16]:
def load_all_songs(force_new):
    songs = [song for song in listdir('StepMania/Songs/In The Groove')]
    songs.remove('.DS_Store')
    for song in songs:
        load_song('In The Groove', song, force_new)

    load_song('_test', 'A', force_new)
    load_song('_test', 'B', force_new)
    load_song('_test', 'C', force_new)

In [17]:
song_data = {}
load_all_songs(False)

Loading beats from save for In The Groove~Anubis
Loading beats from save for In The Groove~Bend Your Mind
Loading beats from save for In The Groove~Boogie Down
Loading beats from save for In The Groove~Bouff
Loading beats from save for In The Groove~Bubble Dancer
Loading beats from save for In The Groove~Changes
Loading beats from save for In The Groove~Charlene
Loading beats from save for In The Groove~Crazy
Loading beats from save for In The Groove~Da Roots
Loading beats from save for In The Groove~Dawn
Loading beats from save for In The Groove~Delirium
Loading beats from save for In The Groove~Disconnected
Loading beats from save for In The Groove~Disconnected -Hyper-
Loading beats from save for In The Groove~Disconnected -Mobius-
Loading beats from save for In The Groove~DJ Party
Loading beats from save for In The Groove~Do U Love Me
Loading beats from save for In The Groove~Don't Promise Me
Loading beats from save for In The Groove~Drifting Away
Loading beats from save for In The 

In [18]:
def get_beats(beat_times, beat_frames):
    changes = []
    changes_time = []
    for i in range(len(beat_frames) - 1):
        changes.append(beat_frames[i + 1] - beat_frames[i])
        changes_time.append(beat_times[i + 1] - beat_times[i])

    sorted_changes = sorted(changes)
    median = sorted_changes[int(len(changes) / 2)]

    changes_counted = [abs(change - median) < 2 for change in changes]
    time_changes_counted = list(itertools.compress(changes_time, changes_counted))
    average = sum(time_changes_counted) / len(time_changes_counted)
    
    time_differences = []
    earliest_proper_beat = 1
    for i in range(1, len(beat_times) - 1):
        if changes_counted[i] & changes_counted[i - 1]:
            earliest_proper_beat = i
            break
            
    last_proper_beat = len(beat_times) -2
    for i in range(1, len(beat_times) - 1):
        if changes_counted[len(beat_times) - i - 1] & changes_counted[len(beat_times) - i - 2]:
            last_proper_beat = len(beat_times) - i - 1
            break
    
    time_differences = []
    buffer = 5
    for i in range(20):
        start_beat = earliest_proper_beat + buffer * i
        if changes_counted[start_beat] & changes_counted[start_beat - 1]:
            for j in range(20):
                end_beat = last_proper_beat - buffer * j
                if changes_counted[end_beat] & changes_counted[end_beat - 1]:
                    time_differences.append(beat_times[end_beat] - beat_times[start_beat])
        
    # get num beats, round, and make new average
    new_averages = [time_difference / round(time_difference / average) for time_difference in time_differences]
    #print (new_averages)
    new_averages.sort()
    num_averages = len(new_averages)
    #new_average = sum(new_averages[5:num_averages - 5]) / (num_averages - 10)
    new_average = new_averages[int(num_averages/2)]
    return [(0, 60./new_average)]
    
def get_time_string(times):
    time_strings = []
    for time in times:
        if time[0] != 0:
            time_strings.append(',')
        time_strings.append('{:}={:.3f}'.format(time[0], time[1]))
    return ''.join(time_strings)

def test_get_beats(beat_times, beat_frames):
    a = get_beats(beat_times, beat_frames)
    a.append((500, 1))
    time = beat_times[0]
    beat_times_mock = [time]
    for i in range(len(a) - 1):
        for j in range(a[i+1][0] - a[i][0]):
            time += 60/a[i][1]
            beat_times_mock.append(time)

    beat_times_pd = pd.DataFrame(beat_times, columns=['VALUE'])
    beat_times_pd['REAL'] = True
    beat_times_pd
    beat_times_mock_pd = pd.DataFrame(beat_times, columns=['VALUE'])
    beat_times_mock_pd['REAL'] = False
    both = beat_times_pd.append(beat_times_mock_pd)
    both['BEAT']=both.index
    linestyles = ["--", "-"]*390
    fig, (ax) = plt.subplots(1,1, figsize=(20,7))
    graph = sb.pointplot(x='BEAT', y='VALUE', hue='REAL', linestyles=linestyles, data=both, ax=ax)
    
#test_get_beats(beat_times)

In [19]:
def get_bpm(file):
    with open(file, "r") as ins:
        for line in ins:
            if line.startswith('#BPMS:'):
                result = re.search('#BPMS:(.*);', line)
                bpm_string = result.group(1)
                if len(bpm_string.split(',')) == 1:
                    return float(bpm_string.split('=')[1])
                return 0
    return 0

In [21]:
songs = [song for song in listdir('StepMania/Songs/In The Groove')]
songs.remove('.DS_Store')
song_bpms = {}
for song in songs:
    song_bpms['In The Groove~{0}'.format(song)] = get_bpm('StepMania/Songs/In The Groove/{0}/{0}.sm'.format(song))
    
song_bpms['_test~A'] = 181.685
song_bpms['_test~B'] = 140
song_bpms['_test~C'] = 180

In [23]:
errors = []
for key in song_data:
    song = song_data[key]
    real_beat = song_bpms[key]
    if real_beat != 0:
        prediced_beat = get_beats(song.beat_times, song.beat_frames)[0][1]
        for i in range (1,4):
            if abs((prediced_beat * (i + 1) / (i)) - real_beat) < abs(prediced_beat - real_beat):
                prediced_beat *= (i + 1) / (i)
        errors.append(abs(prediced_beat - real_beat))
print (sorted(['{0:.3f}'.format(error) for error in errors]))

['0.000', '0.000', '0.001', '0.001', '0.001', '0.001', '0.001', '0.002', '0.002', '0.002', '0.002', '0.003', '0.004', '0.004', '0.004', '0.004', '0.004', '0.005', '0.005', '0.006', '0.006', '0.006', '0.006', '0.007', '0.007', '0.007', '0.008', '0.008', '0.010', '0.010', '0.011', '0.011', '0.011', '0.011', '0.011', '0.011', '0.013', '0.015', '0.016', '0.018', '0.022', '0.023', '0.023', '0.037', '0.061', '0.283', '0.287', '0.315', '0.324', '0.337', '0.347', '0.347', '0.486', '47.329']


In [34]:
regex_notes_with_metadata = '#NOTES:n     dance-single((?:(?!//-).)*);'
regex_metadata_split = ':n     (.*):n     (.*):n     (.*):n     (.*):n     (.*):(.*);'
def get_notes_from_note_string(note_string):
    measure_comments = re.findall('( )*//( )*measure( )*[0-9]*', note_string)
    note_strings_split = re.split(r'n', note_string)[1:-1]
    notes = []
    bar = []
    for row in note_strings_split:
        if len(row) == 4:
            bar.append(row)
        else:
            notes.append(bar)
            bar = []
    return note_strings_split

def get_notes_and_metadata(file):
    difficulty_map = {}
    with open(file) as txt:
        step_file = txt.read()
        step_file = step_file.replace('\n', 'n')
        notes_with_metadata_groups = re.finditer(regex_notes_with_metadata, step_file)
        for match in notes_with_metadata_groups:
            notes_with_metadata = match.group(0)
            split_data = re.search(regex_metadata_split, notes_with_metadata)
            difficulty = split_data.group(4)
            metadata = split_data.group(5)
            notes = get_notes_from_note_string(split_data.group(6))
            notes_with_metadata_map = {
                'DIFFICULTY': difficulty,
                'METADATA': metadata,
                'NOTES': notes,
            }
            difficulty_map[difficulty] = notes_with_metadata_map
    return difficulty_map

In [35]:
songs = [song for song in listdir('StepMania/Songs/In The Groove')]
songs.remove('.DS_Store')
song_steps = {}
for song in songs:
    song_steps['In The Groove~{0}'.format(song)] = get_notes_and_metadata('StepMania/Songs/In The Groove/{0}/{0}.sm'.format(song))

In [309]:
def write_song_header(output_stepfile, info_map):
    keys = ['VERSION', 'TITLE', 'MUSIC', 'OFFSET', 'SAMPLESTART', 'SAMPLELENGTH']
    header_info = {
        'VERSION': 0.82,
        'TITLE': info_map['song_name'],
        'MUSIC': '{0}.mp3'.format(info_map['song_name']),
        'OFFSET': -0.090,
        'SAMPLESTART': info_map['sample_start'],
        'SAMPLELENGTH': info_map['sample_length']
    }
    for key in keys:
        print ("#{0}:{1};".format(key, str(header_info[key])), file=output_stepfile)
        
def write_step_header(output_stepfile, info_map):
    print("//---------------dance-single - ----------------", file=output_stepfile)
    keys = ['NOTEDATA', 'CHARTNAME', 'STEPSTYPE', 'DIFFICULTY', 'METER', 'RADARVALUES', 'BPMS']
    step_info = {
        'NOTEDATA': '',
        'CHARTNAME': 'Kommisar',
        'STEPSTYPE': 'dance-single',
        'DIFFICULTY': 'Beginner',
        'METER': 1,
        'RADARVALUES': '0.234,0.292,0.008,0,0,211,212,1,0,0,0,0,0,0,0.234,0.292,0.008,0,0,211,212,1,0,0,0,0,0,0',
        'BPMS': info_map['bpm']
    }
    for key in keys:
        print ("#{0}:{1};".format(key, str(step_info[key])), file=output_stepfile)
        
def write_notes(output_stepfile, info_map):
    print ("#NOTES:", file=output_stepfile)
    
    for i in range(80):
        print ("0101\n0001\n0101\n0001\n,", file=output_stepfile)
    print ("0000;", file=output_stepfile)

In [329]:
def get_info_map(song):
    info_map = {}
    info_map['song_name'] = song.name
    info_map['song_format'] = song.extension
    times = get_beats(song.beat_times, song.beat_frames)
    bpm_string = get_time_string(times)
    info_map['bpm'] = bpm_string
    tempo = 60./times[0][1]
    info_map['sample_start'] = 16 * tempo
    info_map['sample_length'] = 32 * tempo
    return info_map

In [330]:
def step_song(song):
    output_stepfile=open(song.stepfile, 'w')
    info_map = get_info_map(song)
    write_song_header(output_stepfile, info_map)
    write_step_header(output_stepfile, info_map)
    write_notes(output_stepfile, info_map)
    output_stepfile.close()

In [333]:
step_song(A)
step_song(B)
step_song(C)

In [50]:
def partition(vector, left, right, pivotIndex):
    pivotValue = vector[pivotIndex]
    vector[pivotIndex], vector[right] = vector[right], vector[pivotIndex]  # Move pivot to end
    storeIndex = left
    for i in range(left, right):
        if vector[i] < pivotValue:
            vector[storeIndex], vector[i] = vector[i], vector[storeIndex]
            storeIndex += 1
    vector[right], vector[storeIndex] = vector[storeIndex], vector[right]  # Move pivot to its final place
    return storeIndex
 
def _select(vector, left, right, k):
    "Returns the k-th smallest, (k >= 0), element of vector within vector[left:right+1] inclusive."
    while True:
        pivotIndex = random.randint(left, right)     # select pivotIndex between left and right
        pivotNewIndex = partition(vector, left, right, pivotIndex)
        pivotDist = pivotNewIndex - left
        if pivotDist == k:
            return vector[pivotNewIndex]
        elif k < pivotDist:
            right = pivotNewIndex - 1
        else:
            k -= pivotDist + 1
            left = pivotNewIndex + 1
            
def select(vector, k, left=None, right=None):
    """\
    Returns the k-th smallest, (k >= 0), element of vector within vector[left:right+1].
    left, right default to (0, len(vector) - 1) if omitted
    """
    if left is None:
        left = 0
    lv1 = len(vector) - 1
    if right is None:
        right = lv1
    assert vector and k >= 0, "Either null vector or k < 0 "
    assert 0 <= left <= lv1, "left is out of range"
    assert left <= right <= lv1, "right is out of range"
    return _select(vector, left, right, k)

In [74]:
samples = song_data['In The Groove~Anubis'].music_samples
absolute_samples = [abs(sample) for sample in samples]
new_absolute_samples = absolute_samples
cutoff = select(new_absolute_samples, int(len(new_absolute_samples) / 4))
indices = [sample > cutoff for sample in absolute_samples] 

In [57]:
indices

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,


In [60]:
cutoff

0.064550742506980896

In [75]:
absolute_samples

[0.00028943430515937507,
 0.018229655921459201,
 0.001901402254588902,
 0.020589953288435936,
 0.020916363224387169,
 0.01323325652629137,
 0.027405166998505592,
 0.014831164851784706,
 0.0090711675584316236,
 0.028041195124387741,
 0.0017623142339289188,
 0.0,
 0.020090410485863689,
 0.0014012963511049747,
 0.018394749611616131,
 0.023908169940114018,
 0.013132426887750627,
 0.012002126313745977,
 0.011218992993235588,
 0.00094433926278725277,
 0.012080567888915541,
 0.0010645473375916481,
 0.022169679403305054,
 0.012553143315017223,
 0.013792805373668671,
 0.0054854047484695903,
 0.007962801493704319,
 0.0065485658124089241,
 0.029239112511277199,
 0.016250854358077049,
 0.0044888085685670384,
 0.013052330352365971,
 0.028576994314789769,
 0.0058190193958580494,
 0.022308791056275368,
 0.029471550136804581,
 0.0022812718525528912,
 0.01859748363494873,
 0.022793153300881386,
 0.025187099352478981,
 0.028306381776928902,
 0.0002585369220469147,
 0.0066277044825255871,
 0.001625243574

In [63]:
samples[:30]

array([  3.60486090e-01,  -6.33608282e-01,  -3.63201231e-01,
         1.94671035e-01,   5.49491085e-02,   8.22863355e-02,
        -5.19268513e-02,   1.33794695e-01,   4.94023561e-02,
         1.02614887e-01,  -2.89434305e-04,  -4.94689569e-02,
         6.91425949e-02,  -9.79611352e-02,  -1.21515714e-01,
        -8.58131945e-02,   1.82296559e-02,   8.85947645e-02,
        -2.31444016e-01,  -1.49595678e-01,   1.90140225e-03,
         3.35818976e-02,   3.42114270e-02,   4.38803695e-02,
        -1.11865647e-01,   1.16503075e-01,   2.05899533e-02,
        -2.09163632e-02,   1.32332565e-02,  -7.69507959e-02])

In [64]:
[abs(sample) for sample in samples[:30]]

[0.36048609018325811,
 0.63360828161239624,
 0.36320123076438893,
 0.19467103481292725,
 0.054949108511209488,
 0.082286335527896881,
 0.051926851272583015,
 0.13379469513893127,
 0.049402356147766113,
 0.10261488705873489,
 0.00028943430515937507,
 0.049468956887722015,
 0.069142594933509827,
 0.09796113520860672,
 0.12151571363210678,
 0.085813194513320923,
 0.018229655921459201,
 0.088594764471054077,
 0.23144401609897611,
 0.14959567785263062,
 0.001901402254588902,
 0.033581897616386414,
 0.034211426973342896,
 0.043880369514226913,
 0.11186564713716507,
 0.11650307476520537,
 0.020589953288435936,
 0.020916363224387169,
 0.01323325652629137,
 0.076950795948505402]