In [1]:
import pandas as pd, numpy as np
import re
import seaborn as sns
import sys  
sys.path.insert(1, '../src')
from step_serializer import StepSerializer
from step_pattern_searcher import StepPatternSearcher

# Stepchart Data

Begin by reading in a data frame as output by the ``sss_crawler.py`` script found in the ``src`` folder of the NLPump directory.

In [2]:
df = pd.read_csv('../data/PRIME-PHOENIX.csv', index_col=0)
df.head()

Unnamed: 0,Song Title,Step Type,Difficulty,Steps
0,Nemesis,S,4,6.071:Z-6.821:Z-7.571:C-8.321:C-9.071:Q-9.821:...
1,Nemesis,S,7,6.071:z1-6.634:z0-6.821:c1-7.384:c0-7.571:S-8....
2,Nemesis,S,10,6.071:z1-6.821:e1z-7.196:z0e0-7.571:S-8.321:E-...
3,Nemesis,S,16,4.571:Q-4.758:C-4.946:Z-5.134:E-5.321:Q-5.508:...
4,Nemesis,S,18,6.071:z1-6.821:s1e1z-7.196:z0s0e0-7.571:c1-8.1...


Now separate out the single and double charts, as searching for double patterns within a single chart will result in dictionary key errors (this will be fixed eventually).

In [3]:
df_s =  df.loc[df['Step Type'] == 'S', :].copy()
df_d = df.loc[df['Step Type'] == 'D', :].copy()

# Searching for Patterns

A pattern is defined as a sequence of steps such that the time between each pair of consecutive steps is constant. Let's discuss the syntax for pattern input first.

## Syntax

The search function of the StepPatternSearcher class takes a string input which represents the pattern of interest. This string should consist of hyphen-separated "notes". Each note consists of a number of characters representing panels which need to be simultaneously hit. We use the convention that the characters Z,Q,S,E,C correspond to the down-left, up-left, center, top-right, and down-right panels of the player 1 (left) pad, while the characters V,R,G,Y,N correspond to the respective panels of the player 2 (right) pad.

Capital letters indicate tap notes, while lowercase letters indicate hold notes. Examples:

- M-Run (Singles): 'Z-Q-S-E-C'
- M-Run (Doubles): 'Z-Q-S-E-C-V-R-G-Y-N'
- Top-left/center taps while holding top-right: 'Qe-Se-Qe-Se'

The pattern searcher makes no distinction between the input pattern and the mirrored pattern (i.e. if you search a singles chart for 'Q-S-E', you'll receive timestamps corresponding to 'Q-S-E' as well as 'E-S-Q'). The order in which you enter the panels within a given note also doesn't matter, as the search function will sort them for you.

## Examples

Let's get started. Create a StepPatternSearcher object and then apply its search function to the data frame rows as illustrated by the examples below.

The last two arguments of the search function should be float values corresponding to the minimum and maximum number of seconds between each step in the pattern. Recall that the notes of each pattern must be evenly spaced in time, so if you input 0 and 1 for these last two arguments, for instance, you'll get timestamps where the pattern occurs and the time difference between consecutive steps is some constant betweeen 0 and 1 seconds.

In [4]:
searcher = StepPatternSearcher()

### Pattern Search Example (Single)

In [5]:
pattern = 'Z-Q-S-E-C' # M-run
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_s[col] = df_s.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_s.loc[
    df_s[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty').head()

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (Z-Q-S-E-C)
1523,Le Grand Rouge,S,8,[104.342]
297,Sugar Free,S,8,"[73.872, 107.622]"
2892,STAGER,S,8,[112.658]
1829,Bon Bon Chocolat,S,8,[128.8]
1400,Switronic,S,8,[115.178]


In [9]:
pattern = 'Qe-Se-Qe-Se' # Taps during hold
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_s[col] = df_s.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_s.loc[
    df_s[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty').head()

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (Qe-Se-Qe-Se)
2938,Big Daddy,S,14,[74.291]
2674,Queencard,S,15,"[31.916, 32.839]"
442,Enhanced Reality,S,19,"[39.58, 49.463, 50.875, 42.404, 52.286, 53.698]"
1594,DESTRUCIMATE,S,19,"[0.308, 0.775, 0.308, 0.775]"
2393,Versailles,S,20,"[67.121, 67.464, 67.807]"


In [10]:
pattern = 'QSC-ZSE-QSC' # alternating diagonal triples
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_s[col] = df_s.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_s.loc[
    df_s[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty').head()

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (QSC-ZSE-QSC)
731,Rock the house - SHORT CUT -,S,15,"[26.451, 26.108]"
2152,The End of the World ft. Skizzo,S,16,"[35.525, 35.925]"
512,Smile Diary,S,16,[70.987]
2282,BBoom BBoom - FULL SONG -,S,17,"[189.212, 189.569]"
1409,Obliteration,S,18,"[102.64, 101.24]"


### Pattern Search Example (Double)

In [11]:
pattern = 'Z-Q-S-E-C-V-R-G-Y-N' # Double M-run.
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_d = df.loc[df['Step Type'] == 'D', :].copy()
df_d[col] = df_d.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_d.loc[
    df_d[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty').head()

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (Z-Q-S-E-C-V-R-G-Y-N)
669,Break It Down,D,13,[67.064]
1432,JANUS,D,14,[77.154]
1575,Tales of Pumpnia,D,14,[97.12]
1496,Skeptic,D,14,[12.664]
1089,Energetic,D,16,[85.504]


In [13]:
pattern = 'CV-ER-CV-ER' # Bracket drill
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_d = df.loc[df['Step Type'] == 'D', :].copy()
df_d[col] = df_d.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_d.loc[
    df_d[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty').head()

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (CV-ER-CV-ER)
977,Me Gustas Tu,D,6,"[61.218, 61.218]"
743,Heavy Rotation - SHORT CUT -,D,9,"[43.193, 43.193]"
590,Trashy Innocence,D,11,"[23.68, 32.027, 48.723, 57.071, 23.68, 32.027,..."
1670,Jogging,D,18,"[74.878, 74.878]"
2678,Queencard,D,18,"[63.07, 63.07]"


In [14]:
pattern = 'SCER' # Quad
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_d = df.loc[df['Step Type'] == 'D', :].copy()
df_d[col] = df_d.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_d.loc[
    df_d[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty').head()

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (SCER)
1758,Gashina,D,19,[91.555]
384,Feel My Happiness,D,21,[67.243]
671,Break It Down,D,21,[74.82]
1365,Acquaintance - FULL SONG -,D,22,[153.281]
211,Scorpion King,D,23,[37.74]
