In [16]:
import numpy as np
import pandas as pd
import os
import re


def preprocess_file(file_path):
    filename, file_ext = os.path.splitext(file_path)
    with open(file_path, 'rb') as ff:
        db = ff.read().decode('utf-8')
        # dbhash = hashlib.md5(db.encode('utf-8')).hexdigest()
        # indexFile = filename + ".index"
        # connections = []
        # print(db)
        lines = db.split('\n')
        filtered_lines = [line for line in lines if not line.startswith('ref:') and not line.startswith('todo')] #one instance where todo is added without a : after it

        filtered_content = '\n'.join(filtered_lines)
        with open(filename+'_processed'+file_ext, 'w') as file:
            file.write(filtered_content)
preprocess_file("GrappleMap.txt")

In [17]:
def process_block_iterative(lines):
    results = []
    idx = 0
    
    while idx < len(lines):
        # print(len(lines) - idx)
        
        result = {
            'description': '',
            'properties': '',
            'tags': '',
            'code': []
        }
        
        while idx < len(lines):
            line = lines[idx]
            
            if line.startswith('properties:'):
                result['properties'] = line.split(':', 1)[1].strip()
            elif line.startswith('tags:'):
                result['tags'] = line.split(':', 1)[1].strip()
            elif len(line.strip()) == 69:
                # print('detected base64')
                result['code'].append(line.strip())
            elif line.strip():
                if not result['description']:
                    result['description'] = line.replace('\\n',' ')
                else:
                    # print(idx, line)
                    results.append(result)
                    lines = lines[idx:]
                    idx = 0
                    break
            
            idx += 1
        
        if idx == len(lines):
            results.append(result)
    
    return results

In [25]:
with open("GrappleMap_processed.txt", 'rb') as ff:
    db = ff.read().decode('utf-8')
    lines = db.split('\n')
    parsed = process_block_iterative(lines)

In [26]:
grapplemap = pd.DataFrame(parsed)
grapplemap.head()

Unnamed: 0,description,properties,tags,code
0,side ctrl w/ near open elbow and crossface,,side_control top_underhook bottom_supine cross...,[Q2aAwQJFazuRQGddznKqcxxSRkbTzoJpbsxEToa1FVGga...
1,twister side control,,twister_side top_on_side side_control bottom_s...,[DPaUwxULaRrgHtaEvNVYbjuKGFaEwZUHbgt6JaceDeO6d...
2,north south,,north_south bottom_supine top_kneeling,[5oazN44BazJj26drNp17dnJh2VcaN819b3INWBa3QRVQa...
3,seated back w/ hook on underhook side,,back seatbelt top_seated,[LKaAKZNKfQIVOBaENkN7dlLCNcaUNvOWexLiIAerReVhf...
4,full guard kimura,,full_guard kimura bottom_supine top_kneeling t...,[ANaAxzxXaBCzBTaGA5zYbZFiCDaHzPzUaEEKI5bABQEjb...


In [27]:
#drop any rows with empty vals in the code column
grapplemap = grapplemap[grapplemap['code'].apply(len) != 0]
#one hot encode whether a row is of a move or a transition based on how many rows of code it has
grapplemap['is_position'] = grapplemap['code'].apply(lambda x: 1 if len(x) == 4 else 0)
grapplemap['is_transition'] = grapplemap['code'].apply(lambda x: 1 if len(x) > 4 else 0)

grapplemap.head()

Unnamed: 0,description,properties,tags,code,is_position,is_transition
0,side ctrl w/ near open elbow and crossface,,side_control top_underhook bottom_supine cross...,[Q2aAwQJFazuRQGddznKqcxxSRkbTzoJpbsxEToa1FVGga...,1,0
1,twister side control,,twister_side top_on_side side_control bottom_s...,[DPaUwxULaRrgHtaEvNVYbjuKGFaEwZUHbgt6JaceDeO6d...,1,0
2,north south,,north_south bottom_supine top_kneeling,[5oazN44BazJj26drNp17dnJh2VcaN819b3INWBa3QRVQa...,1,0
3,seated back w/ hook on underhook side,,back seatbelt top_seated,[LKaAKZNKfQIVOBaENkN7dlLCNcaUNvOWexLiIAerReVhf...,1,0
4,full guard kimura,,full_guard kimura bottom_supine top_kneeling t...,[ANaAxzxXaBCzBTaGA5zYbZFiCDaHzPzUaEEKI5bABQEjb...,1,0


In [28]:
def combine_every_four(input_list):
    # Check if the length is a multiple of 4
    if len(input_list) % 4 != 0:
        raise ValueError("Input list length must be a multiple of 4")
    
    # Use list comprehension to combine every 4 strings
    combined = [''.join(input_list[i:i+4]) for i in range(0, len(input_list), 4)]
    if len(combined) == 1:
        return combined[0]
    else:
        return combined

def extract_transition_positions(code_col, is_transition):
    if is_transition:
        return pd.Series({
            'start_position':code_col[0],
            'end_position':code_col[-1]})
    else:
        return pd.Series({
            'start_position':None,
            'end_position':None})

grapplemap['code'] = grapplemap['code'].apply(combine_every_four)

In [29]:
# np.unique(grapplemap['code'].apply(lambda x: len(x)) ,return_counts=True)
np.unique([len(i) for i in grapplemap['code']] ,return_counts=True)

(array([  2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
         15,  16,  17,  18,  20,  21,  22, 276]),
 array([ 89, 339, 349, 210, 147, 116,  73,  48,  44,  23,  10,  14,   7,
          6,   1,   1,   4,   2,   1,   1, 601]))

In [30]:
transition_positions = grapplemap.apply(lambda row: extract_transition_positions(row['code'],row['is_transition']), axis=1)
grapplemap = pd.concat([grapplemap,transition_positions], axis=1)
grapplemap

Unnamed: 0,description,properties,tags,code,is_position,is_transition,start_position,end_position
0,side ctrl w/ near open elbow and crossface,,side_control top_underhook bottom_supine cross...,Q2aAwQJFazuRQGddznKqcxxSRkbTzoJpbsxEToa1FVGgaY...,1,0,,
1,twister side control,,twister_side top_on_side side_control bottom_s...,DPaUwxULaRrgHtaEvNVYbjuKGFaEwZUHbgt6JaceDeO6d0...,1,0,,
2,north south,,north_south bottom_supine top_kneeling,5oazN44BazJj26drNp17dnJh2VcaN819b3INWBa3QRVQa5...,1,0,,
3,seated back w/ hook on underhook side,,back seatbelt top_seated,LKaAKZNKfQIVOBaENkN7dlLCNcaUNvOWexLiIAerReVhfL...,1,0,,
4,full guard kimura,,full_guard kimura bottom_supine top_kneeling t...,ANaAxzxXaBCzBTaGA5zYbZFiCDaHzPzUaEEKI5bABQEjbs...,1,0,,
...,...,...,...,...,...,...,...,...
2127,attack leg,top detailed,,[RFazHzw3azFLPYaEEiAnaED4QebHFizwbzEKOjiaGHBQh...,0,1,RFazHzw3azFLPYaEEiAnaED4QebHFizwbzEKOjiaGHBQhZ...,ESd2zAC4azyYIbcEywGxaUxzHKdozDFsbLxRMyfwEeJDhf...
2128,...,,,[MSaCMsVmaQzJPUaEKlYZaEAgO0bJKLX3bNArNLiqKoTlg...,0,1,MSaCMsVmaQzJPUaEKlYZaEAgO0bJKLX3bNArNLiqKoTlgy...,HJbaQJAWbnIzIJdgNQEwbOHNHPdvOYDvcJIgFAj3OvBFjj...
2129,...,,,[HJbaQJAWbnIzIJdgNQEwbOHNHPdvOYDvcJIgFAj3OvBFj...,0,1,HJbaQJAWbnIzIJdgNQEwbOHNHPdvOYDvcJIgFAj3OvBFjj...,DLazTEBrazH7FkcyQYCRdYIKD8cIRMBAdmILxuebTevkdV...
2130,step over,top detailed,,[IvazThT4azPrLAaERcTQaGLJKha8Q6SXa1MOHKfoMjL1a...,0,1,IvazThT4azPrLAaERcTQaGLJKha8Q6SXa1MOHKfoMjL1a0...,xiaCExHwaKSEABaOF8KsaIQszKbTFzI0aQP7CYhZFuD6aY...


In [31]:
# Separate positions and transitions
positions = grapplemap[grapplemap['is_position'] == 1]
transitions = grapplemap[grapplemap['is_transition'] == 1]

# Step 2: Create a set of unique position codes
position_codes = set(positions['code'])

# Step 3: Check for matches in start_position and end_position
matches = transitions[
    transitions['start_position'].isin(position_codes) |
    transitions['end_position'].isin(position_codes)
]

# Print the results
# if not matches.empty:
#     # print("Found matches:")
#     for _, row in matches.iterrows():
#         if row['start_position'] in position_codes:
#             # print(f"Transition {row['description']} starts with a known position code")
#         if row['end_position'] in position_codes:
#             # print(f"Transition {row['description']} ends with a known position code")
# else:
#     # print("No matches found")

# You can also get the count of matches
print(f"\nTotal matches found: {len(matches)} out of {len(transitions)}")



Total matches found: 858 out of 1485


In [32]:
grapplemap = grapplemap.assign(trans_start_node = '',trans_end_node = '')
                        
grapplemap.to_csv('grapplemap_df.csv',index=False)