In [11]:
import json
import os
import numpy as np
import pandas as pd
import pickle
from preprocessing_helper import *

%load_ext autoreload
%autoreload 2
%autosave 180


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Autosaving every 180 seconds


### 1. Get directories related information and load in MoonBoard 2016 data

In [12]:
cwd = os.getcwd()
parent_wd = cwd.replace('/preprocessing', '')
raw_data_path = parent_wd + '/raw_data/moonGen_scrape_2016_final'
json_data_path = parent_wd + '/raw_data/problems_2023_01_30/problems MoonBoard 2016 .json'
hold_feature_path = parent_wd + '/raw_data/HoldFeature2016.xlsx'

In [13]:
with open(raw_data_path, 'rb') as f:
    MoonBoard_2016_raw = pickle.load(f)
with open(json_data_path, 'rb') as f:
    MoonBoard_2016_raw_json = json.load(f)

In [14]:
MoonBoard_2016_raw_json['data'][0]

{'name': 'Far from the Madding Crowd',
 'grade': '6B+',
 'userGrade': '6B+',
 'setbyId': '5FC09F63-05F3-4DAE-A1A5-3AC22C37139A',
 'setby': 'Ben Moon',
 'method': 'Feet follow hands',
 'userRating': 4,
 'repeats': 24993,
 'holdsetup': {'description': 'MoonBoard 2016', 'holdsets': None, 'apiId': 1},
 'isBenchmark': True,
 'isMaster': False,
 'upgraded': False,
 'downgraded': False,
 'moves': [{'problemId': 19215,
   'description': 'E6',
   'isStart': True,
   'isEnd': False},
  {'problemId': 19215, 'description': 'C5', 'isStart': True, 'isEnd': False},
  {'problemId': 19215, 'description': 'E8', 'isStart': False, 'isEnd': False},
  {'problemId': 19215, 'description': 'F11', 'isStart': False, 'isEnd': False},
  {'problemId': 19215, 'description': 'C13', 'isStart': False, 'isEnd': False},
  {'problemId': 19215, 'description': 'D15', 'isStart': False, 'isEnd': False},
  {'problemId': 19215, 'description': 'D18', 'isStart': False, 'isEnd': True}],
 'holdsets': [{'description': 'Hold Set B', 

In [15]:
def getHoldCol(holdName):
    return ord(holdName[0:1].upper()) - 65

def getHoldRow(holdName):
    return int(holdName[1:]) - 1

def uniqueHolds(holds):
    seenHolds = dict()
    
    for hold in holds:
        seenHolds[hold['holdName'].upper()] = hold
    
    return list(seenHolds.values())
        
    

def reformatClimbInfo(climb_info):
    climb = {'name':climb_info['name'], 'id':climb_info['apiId'], 
            'holds': uniqueHolds(list(map(lambda hold : {'holdName': hold['description'].upper(), 
                                        'col': getHoldCol(hold['description'].upper()), 
                                        'row': getHoldRow(hold['description'].upper()),
                                        'isStart': hold['isStart'], 
                                        'isEnd': hold['isEnd']},
                        climb_info['moves']))),
            'repeats': climb_info['repeats'],
            'grade': climb_info['grade'],
            'isBenchmark': climb_info['isBenchmark']}
    return climb

# print(reformatClimbInfo(climb_example))
MoonBoard_2016_all = list(map(reformatClimbInfo, MoonBoard_2016_raw_json['data']))
print(MoonBoard_2016_all[0])

{'name': 'Far from the Madding Crowd', 'id': 19215, 'holds': [{'holdName': 'E6', 'col': 4, 'row': 5, 'isStart': True, 'isEnd': False}, {'holdName': 'C5', 'col': 2, 'row': 4, 'isStart': True, 'isEnd': False}, {'holdName': 'E8', 'col': 4, 'row': 7, 'isStart': False, 'isEnd': False}, {'holdName': 'F11', 'col': 5, 'row': 10, 'isStart': False, 'isEnd': False}, {'holdName': 'C13', 'col': 2, 'row': 12, 'isStart': False, 'isEnd': False}, {'holdName': 'D15', 'col': 3, 'row': 14, 'isStart': False, 'isEnd': False}, {'holdName': 'D18', 'col': 3, 'row': 17, 'isStart': False, 'isEnd': True}], 'repeats': 24993, 'grade': '6B+', 'isBenchmark': True}


In [16]:
features = pd.read_excel(hold_feature_path, dtype=str)

# convert features from pd dataframe to dictionary
feature_dict = {}
for index in features.index:
    feature_item = features.loc[index]
    feature_dict[(int(feature_item['X_coord']), int(feature_item['Y_coord']))] = np.array(
        list(feature_item['Difficulties'])).astype(int)
    
# print(len(feature_dict))
print(list(feature_dict.values())[0])

[0 3 4 7 3 0]


### 2. Separate data into different categories:
- no user ratings
- benchmarked with user ratings
- non-benchmarked with user ratings

In [17]:
# grade_map = get_grade_map()

grade_map = dict([(climb['id'], climb['grade']) for climb in MoonBoard_2016_all])

# print(np.unique(list(grade_map.values()), return_counts=True))
grade_counts = np.unique(list(map(lambda climb: climb['grade'], MoonBoard_2016_all)), return_counts=True)
print(grade_counts)


(array(['6B', '6B+', '6C', '6C+', '7A', '7A+', '7B', '7B+', '7C', '7C+',
       '8A', '8A+', '8B', '8B+'], dtype='<U3'), array([    9, 19164,  7426,  9107,  7970,  6036,  2834,  3282,  2064,
         975,   404,   109,    60,    66]))


In [18]:
from itertools import compress


holdLists = [list(map(lambda hold: hold['holdName'], climb['holds'])) for climb in MoonBoard_2016_all]

print(holdLists[0])
dups = [len(holds) != len(set(holds)) for holds in holdLists]

list(compress(holdLists, dups))


['E6', 'C5', 'E8', 'F11', 'C13', 'D15', 'D18']


[]

In [30]:
# select climbs to train on
MoonBoard_2016_train = []
def selectByGrade(grade, climbs_list):
    return list(filter(lambda climb: climb['grade'] == grade, climbs_list))

print(len(MoonBoard_2016_all))
MB2016_reapeated = list(filter(lambda climb: climb['repeats'] != 0, MoonBoard_2016_all))
print(len(MB2016_reapeated))
MB2016_benchmark = list(filter(lambda climb: climb['isBenchmark'], MoonBoard_2016_all))
print(len(MB2016_benchmark))

reapeated_grade_counts = np.unique(list(map(lambda climb: climb['grade'], MB2016_reapeated)), return_counts=True)
print(reapeated_grade_counts)
benchmark_grade_counts = np.unique(list(map(lambda climb: climb['grade'], MB2016_benchmark)), return_counts=True)
print(benchmark_grade_counts)

MB2016_by_grade_all = dict((grade, selectByGrade(grade, MoonBoard_2016_all)) for grade in grade_counts[0])
MB2016_by_grade_repeated = dict((grade, selectByGrade(grade, MB2016_reapeated)) for grade in grade_counts[0])
MB2016_by_grade_benchmark = dict((grade, selectByGrade(grade, MB2016_benchmark)) for grade in grade_counts[0])


MB2016_curated = np.concatenate([MB2016_by_grade_repeated[grade] if (grade == '8B+' or grade == '8B' or grade == '8A+' or grade == '8A') 
                                 else MB2016_by_grade_benchmark[grade] for grade in grade_counts[0]])

curated_grade_counts = np.unique(list(map(lambda climb: climb['grade'], MB2016_curated)), return_counts=True)
print(curated_grade_counts)

save_pickle(MoonBoard_2016_all, 'MB2016_all')
save_pickle(MB2016_reapeated, 'MB2016_reapeated')
save_pickle(MB2016_benchmark, 'MB2016_benchmark')
save_pickle(MB2016_curated, 'MB2016_curated')
save_pickle(MB2016_by_grade_all, 'MB2016_by_grade_all')
save_pickle(MB2016_by_grade_repeated, 'MB2016_by_grade_repeated')
save_pickle(MB2016_by_grade_benchmark, 'MB2016_by_grade_benchmark')

59506
48605
490
(array(['6B', '6B+', '6C', '6C+', '7A', '7A+', '7B', '7B+', '7C', '7C+',
       '8A', '8A+', '8B', '8B+'], dtype='<U3'), array([    9, 17351,  6721,  7266,  6190,  4214,  2210,  2217,  1407,
         629,   248,    53,    41,    49]))
(array(['6B+', '6C', '6C+', '7A', '7A+', '7B', '7B+', '7C', '7C+', '8A',
       '8A+', '8B'], dtype='<U3'), array([71, 64, 56, 63, 73, 56, 28, 39, 26, 11,  2,  1]))
(array(['6B+', '6C', '6C+', '7A', '7A+', '7B', '7B+', '7C', '7C+', '8A',
       '8A+', '8B', '8B+'], dtype='<U3'), array([ 71,  64,  56,  63,  73,  56,  28,  39,  26, 248,  53,  41,  49]))


In [21]:
print(list(MB2016_by_grade_all.keys()))
print(list(map(lambda list: len(list), MB2016_by_grade_all.values())))

['6B', '6B+', '6C', '6C+', '7A', '7A+', '7B', '7B+', '7C', '7C+', '8A', '8A+', '8B', '8B+']
[9, 19164, 7426, 9107, 7970, 6036, 2834, 3282, 2064, 975, 404, 109, 60, 66]


In [24]:
import traceback


def encodeHold(hold):
    """ {'holdName': hold['description'], 
        'row':ord(hold['description'][0:1]) - 65, 
        'col':int(hold['description'][1:]), 
        'pos':{'x':int(hold['description'][1:]), 'y':ord(hold['description'][0:1]) - 65},
        'isStart': hold['isStart'], 
        'isEnd': hold['isEnd']}
        
        (10, n_holds) (6 hold features, x, y, is_start, is_end)
    """
    return np.concatenate([feature_dict[(hold['col'], hold['row'])], 
                        [hold['col']], 
                        [hold['row']],
                        [1 if hold['isStart'] else 0],
                        [1 if hold['isEnd'] else 0]])

# encodeHold(MoonBoard_2016_all[0]['holds'][0])
# climb = MoonBoard_2016_all[0]
# list(filter(lambda climb: climb['id']==23360, MoonBoard_2016_all))[0]

# climb['holds']
# list(map(encodeHold, climb['holds']))
# np.vstack(list(map(encodeHold, climb['holds']))).shape

# climbs_beta = dict([(climb['id'], produce_sequence(np.vstack(list(map(encodeHold, climb['holds']))), climb['id'])[0]) for climb in MoonBoard_2016_all[9500:]])
climbs_beta = {}
for climb in MB2016_reapeated:
    try:
        climbs_beta[climb['id']] = produce_sequence(np.vstack(list(map(encodeHold, climb['holds']))), climb['id'])[0]
    except Exception:
        # Some climbs may fail. We chose to ignore them since they are vey uncommon.
        print(climb['id'])
        print(list(filter(lambda climb_: climb_['id']==climb['id'], MoonBoard_2016_all))[0])
        traceback.print_exc()
        print(Exception)

304387
<class 'Exception'>


Traceback (most recent call last):
  File "/tmp/ipykernel_3040233/1336260040.py", line 32, in <module>
    climbs_beta[climb['id']] = produce_sequence(np.vstack(list(map(encodeHold, climb['holds']))), climb['id'])[0]
  File "/tmp/ipykernel_3040233/1336260040.py", line 14, in encodeHold
    return np.concatenate([feature_dict[(hold['col'], hold['row'])],
KeyError: (0, 5)


In [25]:
with open('climb_sequence_dict_v1', 'wb') as f:
    pickle.dump(climbs_beta, f)

In [None]:
# save_path = cwd + '/processed_data_xy_mode'
# output_xy_mode = classify_and_reorganize_data(MoonBoard_2016_raw_json, save_path, delta_xy_mode = False)

Raw data with key total contains error
Raw data with key data contains error
result saved.


### 3. Data Preprocessing with BetaMove

In [26]:
# raw_data_forseq_path = parent_wd + '/preprocessing/climb_sequence_dict_v1'
# with open(raw_data_forseq_path, 'rb') as f:
#     MoonBoard_2016_raw_forseq = pickle.load(f)

In [None]:
# X_dict_merge = {**MoonBoard_2016_raw_forseq['X_dict_benchmark_withgrade'], 
#                 **MoonBoard_2016_raw_forseq['X_dict_benchmark_nograde'], 
#                 **MoonBoard_2016_raw_forseq['X_dict_withgrade'], 
#                 **MoonBoard_2016_raw_forseq['X_dict_nograde']}
# Y_dict_merge = {**MoonBoard_2016_raw_forseq['Y_dict_benchmark_withgrade'], 
#                 **MoonBoard_2016_raw_forseq['Y_dict_benchmark_nograde'], 
#                 **MoonBoard_2016_raw_forseq['Y_dict_withgrade'], 
#                 **MoonBoard_2016_raw_forseq['Y_dict_nograde']}

In [28]:
# save_path_seq = cwd + '/processed_data_seq'
# output_seq = generate_organized_sequence_data(raw_data = MoonBoard_2016_raw_forseq, save_path = save_path_seq)
# save_pickle(output_seq, save_path_seq)

data with key 19215 contains error
data with key 19216 contains error
data with key 19217 contains error
data with key 19218 contains error
data with key 19219 contains error
data with key 19220 contains error
data with key 19221 contains error
data with key 19222 contains error
data with key 19223 contains error
data with key 19224 contains error
data with key 19225 contains error
data with key 19226 contains error
data with key 19227 contains error
data with key 19228 contains error
data with key 19229 contains error
data with key 19230 contains error
data with key 19231 contains error
data with key 19232 contains error
data with key 19233 contains error
data with key 19234 contains error
data with key 19235 contains error
data with key 19236 contains error
data with key 19237 contains error
data with key 19280 contains error
data with key 19295 contains error
data with key 19296 contains error
data with key 19297 contains error
data with key 19298 contains error
data with key 19299 