In [1]:
import pandas as pd, numpy as np
import re
import seaborn as sns
import sys  
sys.path.insert(1, '../src')
from step_serializer import StepSerializer
from step_pattern_searcher import StepPatternSearcher

# Stepchart Data

Begin by reading in a data frame as output by the ``sss_crawler.py`` script found in the ``src`` folder of the NLPump directory.

In [2]:
df = pd.read_csv('../data/PRIME-PHOENIX.csv', index_col=0)
df.head()

Unnamed: 0,Song Title,Step Type,Difficulty,Steps
0,Nemesis,S,4,6.071:Z-6.821:Z-7.571:C-8.321:C-9.071:Q-9.821:...
1,Nemesis,S,7,6.071:z1-6.634:z0-6.821:c1-7.384:c0-7.571:S-8....
2,Nemesis,S,10,6.071:z1-6.821:e1z-7.196:z0e0-7.571:S-8.321:E-...
3,Nemesis,S,16,4.571:Q-4.758:C-4.946:Z-5.134:E-5.321:Q-5.508:...
4,Nemesis,S,18,6.071:z1-6.821:s1e1z-7.196:z0s0e0-7.571:c1-8.1...


Now separate out the single and double charts, as searching for double patterns within a single chart will result in dictionary key errors (this will be fixed eventually).

In [3]:
df_s =  df.loc[df['Step Type'] == 'S', :].copy()
df_d = df.loc[df['Step Type'] == 'D', :].copy()

# Searching for Patterns

Now we can start searching for patterns. Create a StepPatternSearcher object and then apply its search function to the data frame rows as illustrated by the examples below.

In [4]:
searcher = StepPatternSearcher()

### Pattern Search Example (Single)

In [5]:
pattern = 'QSC-ZSE-QSC' # alternating diagonal triples
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_s[col] = df_s.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_s.loc[
    df_s[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty')

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (QSC-ZSE-QSC)
731,Rock the house - SHORT CUT -,S,15,"[26.451, 26.108]"
2152,The End of the World ft. Skizzo,S,16,"[35.525, 35.925]"
512,Smile Diary,S,16,[70.987]
2282,BBoom BBoom - FULL SONG -,S,17,"[189.212, 189.569]"
1409,Obliteration,S,18,"[102.64, 101.24]"
2822,GOODTEK,S,18,"[60.692, 63.218]"
635,Idealized Romance,S,18,"[77.169, 80.597, 84.026, 75.455, 78.883, 82.31..."
73,You Got Me Crazy,S,18,"[41.293, 41.645]"
331,Ineffective Boss Without Power,S,18,"[94.962, 94.609]"
513,Smile Diary,S,19,"[26.073, 70.987, 23.33, 30.187]"


### Pattern Search Example (Double)

In [6]:
pattern = 'Z-Q-S-E-C-V-R-G-Y-N' # Double M-run.
col = f'timestamps ({pattern})'

# The search function requires the step type and chart as inputs as well as the pattern,
# so we apply the following lambda function to search each stepchart in the data frame.
df_d = df.loc[df['Step Type'] == 'D', :].copy()
df_d[col] = df_d.apply(lambda row: searcher.search(
    row['Step Type'],
    row['Steps'],
    pattern,
    0,
    1
), axis=1)

# Create new column showing timestamps where pattern is found.
df_d.loc[
    df_d[col].apply(len) > 0, # exclude stepcharts with 0 matches.
    ['Song Title', 'Step Type', 'Difficulty', col]
].sort_values('Difficulty')

Unnamed: 0,Song Title,Step Type,Difficulty,timestamps (Z-Q-S-E-C-V-R-G-Y-N)
669,Break It Down,D,13,[67.064]
1432,JANUS,D,14,[77.154]
1575,Tales of Pumpnia,D,14,[97.12]
1496,Skeptic,D,14,[12.664]
1089,Energetic,D,16,[85.504]
810,The Quick Brown Fox Jumps Over The Lazy Dog,D,16,[85.866]
721,Super Fantasy - SHORT CUT -,D,17,[38.744]
67,Requiem,D,18,[79.957]
1106,PICK ME,D,18,[75.963]
962,Waltz of Doge,D,18,[99.53]
