# Dance Dance Convolution - Revisited

Revision of the original "Dance Dance Convolution" paper, that incorporates newer machine learning and AI techniques to "hopefully" improve the original model results.

## Imports

In [1]:
import os 
from os.path import isfile, join, splitext, basename, normpath
from pathlib import Path
from collections import defaultdict
import numpy as np
from sm_parsing import stepfile_parser

## Loading and Cleaning Dataset

In [2]:
# Path to dataset
base_path = "./dataset"

# Song packs inside dataset
# (Ignores files that are not directories)
song_packs = [f for f in os.listdir(base_path) if not isfile(join(base_path, f))]

print(f"Found a total of {len(song_packs)} song packs.")

# =======================================
# SONG PACK CLEANING AND DATA EXTRACTION
# =======================================

# Files that dont add anything to training are deleted (videos, images, txts, etc.)
# Wanted file extensions
wanted_ext = [".ssc", ".sm", ".ogg", ".ini"]

# Counter for the number of files deleted.
files_deleted = 0

# Counter for the number of songs encountered
songs_encountered = 0

# Dictionary that will get one entry for each pack
pack_data = {}

# For every song pack
for pack_name in song_packs:

    # Empty dict that stores all relevant filepaths for a song inside a pack
    # All unseen keys are assigned an empty list by default
    song_data = defaultdict(lambda: [])

    # Go through every file in the song pack
    # (including files and subfiles)
    for path, _, files in os.walk(join(base_path, pack_name)):

        # For every file inside the base path
        for file in files: 

            # Get the current file's parent folder (song folder)
            # 1. The absolute path for the parent directory is extracted
            # 2. 'normpath' strips off any trailing slashes
            # 3. 'basename' returns the last part of the path
            parent_name = basename(normpath(Path(path)))

            # If the file has a "pack_name" as a parent the file is outside 
            # a song folder, it is ignored as a result.
            if parent_name in song_packs:
                print(f"Found '{file}' outside of a song folder. Ignoring file.")
                continue

            # The file extension is extracted
            _, ext = splitext(file)

            # File is deleted if it has an unwanted extension
            if ext not in wanted_ext:
                try:
                    os.remove(join(path, file))
                    files_deleted += 1
                except Exception as e:
                    raise Exception(e)

            # All the paths that relate to a song are stored in a dict
            # according to their name and the songpack they belong to
            else:
                song_data[parent_name].append(join(path, file))
    
    # The "song_data" is stored inside the "pack_data"
    # (This is to prevent two packs having the same title for a
    # song and risking overwriting the data for one song.)
    pack_data[pack_name] = song_data

    # We add the number of songs in the pack to "songs_encountered"
    songs_encountered += len(list(song_data.keys()))

# Printout after cleaning
if files_deleted == 0:
    print(f"Dataset already clean. {songs_encountered} songs found. 0 files deleted.")
else:
    print(f"Dataset cleaned successfully. {songs_encountered} songs found. {files_deleted} files deleted.")


Found a total of 6 song packs.
Found 'group.ini' outside of a song folder. Ignoring file.
Dataset already clean. 230 songs found. 0 files deleted.


## Data Extraction from Stepfiles

In [3]:
# Extension of the required stepfile
stepfile_ext = ".sm"

# Counter for songs successfully processed
successfully_processed = 0

# Dict to store the tag data for each song in the pack 
dataset_tags = {}

# For every pack in the dataset
for pack_name in pack_data.keys():
    
    # For every song in the song pack
    for song_name in pack_data[pack_name]: 

        try:
            # For every path corresponding to the current song, we take
            # the one that contains the extension that we need. Due to it being
            # returned inside of a list, we get the first element.
            stepfile_path = [path for path in pack_data[pack_name][song_name] if stepfile_ext in path][0]

        # If an error occurs while getting the stepfile path,
        # the program skips the current song
        except Exception:
            print(f"No '{stepfile_ext}' found for song '{song_name}' in song pack '{pack_name}'. Skipping song.")
            continue

        # Step file content is extracted as text
        with open(stepfile_path, 'r', encoding="utf-8") as stepfile:
            stepfile_txt = stepfile.read()
            
        # The text of each song is parsed and turned into a dict of tags
        dataset_tags[(pack_name, song_name)] = stepfile_parser(stepfile_txt)

        # Increase the number of files succesfully processed
        successfully_processed += 1

# Successful files
print(f"Number of succesfully processed songs: {successfully_processed} / {songs_encountered}")

  if arrow_code in note_blocks[block] and replace_letter_arrow_types == True:


Number of succesfully processed songs: 230 / 230


BPMS = Lista de tuplas
STOPS = Lista de tuplas
NOTES = Lista con listas, donde cada lista corresponde a un bloque. Cada línea del bloque es un elemento de la lista

- Incluir "required attributes"

Pasos postprocesado:
- Calcula los SPB (Seconds Per Beat) al hacer 60/BPM para cada BPM. 
- Utiliza el SPB para calcular el largo en segundos para cada sección de la canción con BPM distinto. Siempre se debe tener un BPM impar, ya que se calcularía el tiempo en segundos entre cada pareja de BPMs.

REQUIRED_ATTRIBUTES = ['offset', 'bpms', 'notes']

In [27]:
song_tags = dataset_tags[("Fraxtil's Arrow Arrangements", "Bad Ketchup")]
song_tags = dataset_tags[("KDA - ALL OUT", "MORE")]

for chart in song_tags["notes"]:

    # =================
    # SEGMENT LENGTH
    # =================

    # BPMS for the song (applies for all difficulties)
    # They are converted into a numpy array
    BPMs = np.array(song_tags["bpms"])

    # If the song has no BPM
    if len(BPMs) <= 0:
        raise Exception("No value for the BPM attribute.")

    # If the song has more than 1 Beat,BPM pair
    if len(BPMs) > 1:

        # Seconds per beat.
        # 1 Beat/Min = 1 Beat / 60s -> 1 Second/Beat = 60s/1 Beat 
        SPBs = 60 / BPMs[:,1]

        # Difference between all subsequent "beat" timestamps
        # Length of a segment measured in beats.
        beat_diffs = np.diff(BPMs[:,0], axis=0)

        # Get the length of a "BPM segment" (segment in which the BPM
        # remains constant), measured in seconds. We dont take the last element
        # of the SPBs, as this is the last segment that ends until the end of the
        # song, it is not part of a segment with a measurable length.
        bpm_segment_len = beat_diffs * SPBs[0:-1]

        # Cumulative sum of bpm segment length
        cumulative_bpm_len = np.cumsum(bpm_segment_len)

    # =======================
    # BEATS TO ABSOLUTE TIME
    # =======================

    # For each measure in the note data
    # Remember: There are four beats in each measure.
    for measure_num, measure in enumerate(chart["notedata"]):

        # ===========================
        # BEAT INDEX
        # ===========================
        
        # Number of lines or rows in the current measure
        measure_len = measure.shape[0]

        # Each line is assigned a "line number":
        # A number from 1 to the length of the measure
        line_number = np.linspace(1, measure_len, measure_len)
        
        # If we were to assign an index to every row in all measures
        # we would have a "beat_index". This calculates the beat_index
        # of each line. 
        # 
        # Steps:
        # - Get the number of "beats passed" by getting the current measure
        #   number (because of the enumerate, the count goes from 0 to the number
        #   of the last measure) and multiplying it by the 4 beats in each measure.
        # - Add the "fractions of beat" that correspond to each of the lines of the
        #   current measure.
        beat_index = 4*measure_num + 4*(line_number / measure_len)

        # ===========================
        # BPM SEGMENT
        # ===========================
        
        # If there is more than 1 timestamp in "BPM"
        if len(BPMs) > 1:

            # Determine to which "bpm_segment" the current beats pertain. For this we
            # use the function "bisect" applied to the whole vector (searchsorted). This
            # function returns the index where a number should be placed in an array, to
            # keep the array sorted. 
            #
            # For example: 'searchsorted([1,2,3,4], 2.5)' will return 2 as using this index
            # to place the 2.5 inside the list, will place it after the 2.
            #
            # Given this, we can input our list of beat timestamps in "BPMs" as a first argument
            # and our list of beats as a second argument. This will give us the "next" beat segment
            # in which our beat should be placed to be greater than the previous beat timestamp.
            # By subtracting one from this result, we will get the beat segment each beat belongs to.
            # In short, we obtain the BPM segment to which every beat belongs to.
            #timestamps = np.append(BPMs[:,0], np.Inf)
            bpm_segment_beat_idx = np.searchsorted(BPMs[:,0], beat_index, side="left") - 1

            # Cumulative bpm segment length for all current beats
            # Subtract 1 to make the indices start on 0
            cumulative_bpm_beat = cumulative_bpm_len[bpm_segment_beat_idx - 1]
        
        # If BPM only has one beat timestamp
        # - We asume that all beats pertain to the only segment available: 0
        # - There is nothing to "cumulative sum", so the sum is equal to 0
        else:
            bpm_segment_beat_idx = 0
            cumulative_bpm_beat = 0

        # =================
        # STOPS
        # =================

        # Stops for the current song
        stops = song_tags["stops"]

        # If stops is not empty
        if stops is not None:

            # Cumulative sum for all the error lengths (in seconds)
            cumulative_stop_len = np.cumsum(stops[:,1])

            # If the first BPM listed is not 0, we append a cero
            # at the beggining of the cumulative sum.
            if stops[0,0] != 0:
                cumulative_stop_len = np.append(0, cumulative_stop_len)
            
            # To which "stop segment" each beat pertains to.
            # For reference return to the statement used to get "bpm_segment_per_beat"
            stop_segment_beat_idx = np.searchsorted(stops[:,0], beat_index, side="left")

            # Cumulative sum of stopped time for each beat
            cumulative_stop_beat = cumulative_stop_len[stop_segment_beat_idx]

        # If there are no stops, the cumulative per beat is 0
        else:
            cumulative_stop_beat = 0
        
        # =================
        # PARTIAL SEGMENT
        # =================

        # Steps:
        # 1. The BPM for each segment is converted to seconds per beat (SPB)
        # 2. We get the difference between the current beat and the las BPM segment limit (given in beats)
        # 3. The previous difference is converted into seconds by multiplying by the SPB (SPB x B = S)
        partial_segment_spb = (60 / BPMs[bpm_segment_beat_idx, 1])
        partial_segment = partial_segment_spb * (beat_index - BPMs[bpm_segment_beat_idx, 0])

        # =================
        # OFFSET
        # =================

        # Offset and Stops for the song
        offset = song_tags["offset"]

        # Get absolute time
        # Got this directly from DDC. No idea about the logic behind it.
        beat_abs_time = cumulative_bpm_beat + cumulative_stop_beat - offset + partial_segment

        print(measure_num, measure_len)
        print(line_number)
        print(beat_index)
        print(beat_abs_time)
        print(measure)
        print(" =========== Decomposition ================")
        print("Cumulative BPM:", cumulative_bpm_beat)
        print("Cumulative Stop:", cumulative_stop_beat)
        print("Offset:", offset)
        print("Partial Segment:", partial_segment)
        print("Abs Time:", beat_abs_time)
        print("=========================")

    break

0 4
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[ -0.21026761 113.66297183 114.08550704 114.50804225]
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
Cumulative BPM: [113.02816901 113.23943662 113.23943662 113.23943662]
Cumulative Stop: 0
Offset: -0.001
Partial Segment: [-113.23943662    0.42253521    0.84507042    1.26760563]
Abs Time: [ -0.21026761 113.66297183 114.08550704 114.50804225]
1 4
[0. 1. 2. 3.]
[4. 5. 6. 7.]
[114.93057746 115.35311268 115.77564789 116.1981831 ]
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
Cumulative BPM: [113.23943662 113.23943662 113.23943662 113.23943662]
Cumulative Stop: 0
Offset: -0.001
Partial Segment: [1.69014085 2.11267606 2.53521127 2.95774648]
Abs Time: [114.93057746 115.35311268 115.77564789 116.1981831 ]
2 4
[0. 1. 2. 3.]
[ 8.  9. 10. 11.]
[116.62071831 117.04325352 117.46578873 117.88832394]
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
Cumulative BPM: [113.23943662 113.23943662 113.23943662 113.23943662]
Cumulative Stop: 0
Offset: -0.001
Partial Segment: [3.38028

In [4]:
dataset_tags[("Fraxtil's Arrow Arrangements", "Bad Ketchup")]

{'title': 'Bad Ketchup',
 'subtitle': None,
 'artist': 'Ladyscraper',
 'titletranslit': None,
 'subtitletranslit': None,
 'artisttranslit': None,
 'genre': 'Breakcore',
 'credit': 'Fraxtil',
 'banner': 'bn.png',
 'background': 'bg.png',
 'lyricspath': None,
 'cdtitle': '../fraxtil.png',
 'music': 'Bad Ketchup.ogg',
 'offset': 0.0,
 'samplestart': 15.333,
 'samplelength': 15.0,
 'selectable': True,
 'bpms': array([[  0., 180.]]),
 'stops': None,
 'bgchanges': None,
 'keysounds': None,
 'attacks': None,
 'notes': [{'charttype': 'dance-single',
   'description/author': 'Fraxtil',
   'difficulty': 'Challenge',
   'numericalmeter': 13,
   'grooveradar': [1.0,
    1.0,
    0.197,
    0.274,
    1.0,
    958.0,
    28.0,
    39.0,
    22.0,
    0.0,
    12.0,
    1.0,
    1.0,
    0.197,
    0.274,
    1.0,
    958.0,
    28.0,
    39.0,
    22.0,
    0.0,
    12.0],
   'notedata': [array([[0, 0, 0, 0],
           [0, 0, 0, 0],
           [0, 0, 0, 0],
           [0, 0, 0, 0]]),
    array([[0

In [5]:
from scipy.signal import stft

