In [20]:
import numpy as np
import pandas as pd
import pickle
import preprocessing_helper as helper

from pathlib import Path
import constants
import json 
from genson import SchemaBuilder

### 1. Get directories related information and load in MoonBoard 2016 data

Load the sccraped data

In [4]:
with open(constants.SCRAPE_DATA_PATH, 'rb') as f:
    MoonBoard_2016_raw = pickle.load(f)

Dump the scraped data to JSON

In [5]:
def dump_to_json():
  with open(constants.DATA_DIR / 'MoonBoard_2016_raw.json', 'w') as f:
    json.dump(MoonBoard_2016_raw, f)

# dump_to_json()

In [6]:
# get first 5 values of MoonBoard_2016_raw
list(MoonBoard_2016_raw.values())[:5]

[{'problem_name': 'TALL POPPY',
  'info': ['halladay',
   'Be the first to repeat this problem',
   '6C+',
   'Feet follow hands',
   ''],
  'url': 'https://moonboard.com/Problems/View/367894/tall-poppy',
  'num_empty': 3,
  'num_stars': 0,
  'moves': [{'Id': 2119224,
    'Description': 'F5',
    'IsStart': True,
    'IsEnd': False},
   {'Id': 2119225, 'Description': 'G2', 'IsStart': True, 'IsEnd': False},
   {'Id': 2119226, 'Description': 'H10', 'IsStart': False, 'IsEnd': False},
   {'Id': 2119227, 'Description': 'B11', 'IsStart': False, 'IsEnd': False},
   {'Id': 2119228, 'Description': 'E15', 'IsStart': False, 'IsEnd': False},
   {'Id': 2119229, 'Description': 'D18', 'IsStart': False, 'IsEnd': True},
   {'Id': 2119230, 'Description': 'E8', 'IsStart': False, 'IsEnd': False}],
  'grade': '6C+',
  'UserGrade': None,
  'isBenchmark': False,
  'repeats': 0,
  'ProblemType': None,
  'IsMaster': False,
  'setter': {'Id': '93A38FE2-1B2B-4B0F-9B70-9A77BAE976B8',
   'Nickname': 'halladay',
  

Generate the schema for the scraped data

In [7]:
def generate_schema():
  builder = SchemaBuilder()
  builder.add_object(list(MoonBoard_2016_raw.values()))
  builder.to_schema()

  with open(constants.DATA_DIR / 'MoonBoard_2016_raw_schema.json', 'w') as f:
    json.dump(builder.to_schema(), f)
  
# generate_schema()

Loads hold difficulties for MoonBoard from excel sheet?

The hold difficulties appear to be defined by a vector of 6 different difficulties. Each element defines the difficulty for?

In [8]:
features = pd.read_excel(constants.HOLD_FEATURES_PATH, dtype=str)

# convert features from pd dataframe to dictionary
feature_dict = {}
for index in features.index:
    feature_item = features.loc[index]
    feature_dict[(int(feature_item['X_coord']), int(feature_item['Y_coord']))] = np.array(
        list(feature_item['Difficulties'])).astype(int)

In [9]:
# TODO: what to the individual difficulties mean?? Consider copilot suggestions or see where used
features

Unnamed: 0,X_coord,Y_coord,Difficulties
0,0,17,034730
1,0,15,142210
2,0,14,013100
3,0,13,262000
4,0,12,012101
...,...,...,...
136,10,8,015100
137,10,7,013100
138,10,6,021000
139,10,5,012100


### 2. Separate data into different categories:
- no user ratings
- benchmarked with user ratings
- non-benchmarked with user ratings

In [10]:
grade_map = helper.get_grade_map()
grade_map

{'6B': 0,
 '6B+': 0,
 '6C': 1,
 '6C+': 1,
 '7A': 2,
 '7A+': 3,
 '7B': 4,
 '7B+': 4,
 '7C': 5,
 '7C+': 6,
 '8A': 7,
 '8A+': 8,
 '8B': 9}

In [11]:
save_path = constants.PREPROCESSING_DIR / 'processed_data_xy_mode'
# TODO: THIS IS CURRENTLY BROKEN (all the data errors)
# output_xy_mode = helper.classify_and_reorganize_data(MoonBoard_2016_raw, save_path, delta_xy_mode = False)

### 3. Data Preprocessing with BetaMove

Load the the data split into dicts:
- `X_dict_benchmark_withgrade`
- `Y_dict_benchmark_withgrade`
- `X_dict_benchmark_nograde`
- `Y_dict_benchmark_nograde`
- `X_dict_withgrade`
- `Y_dict_withgrade`
- `X_dict_nograde`
- `Y_dict_nograde`

Where each X item is (problem_id, 10xn array) where n is the number of holds in the problem. Each **column** vector contain:
- 0: `feature_dict[x,y][0]`
- 1: `feature_dict[x,y][1]`
- 2: `feature_dict[x,y][2]`
- 3: `feature_dict[x,y][3]`
- 4: `feature_dict[x,y][4]`
- 5: `feature_dict[x,y][5]`
- 6: `x`
- 7: `y`
- 8: `is_start`
- 9: `is_end`

Y items (problem_id, 3x1 array). Each **column** vector contain:
- 0: `grade`
- 1: `is_benchmarked`
- 2: `user_grade` (only if dict is `withgrade`)

In [12]:
raw_data_forseq_path = constants.PREPROCESSING_DIR / 'processed_data_xy_mode'
with open(raw_data_forseq_path, 'rb') as f:
    MoonBoard_2016_raw_forseq = pickle.load(f)

In [23]:
MoonBoard_2016_raw_forseq['X_dict_benchmark_withgrade'][next(iter(MoonBoard_2016_raw_forseq['X_dict_benchmark_withgrade']))]

array([[ 0.,  2.,  0.,  1.,  2.,  3.],
       [ 2.,  5.,  1.,  5.,  3.,  7.],
       [ 5.,  2.,  4.,  3.,  4.,  5.],
       [ 2.,  0.,  1.,  1.,  2.,  2.],
       [ 0.,  0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [10.,  5.,  8.,  5.,  3.,  1.],
       [ 4.,  6., 10., 13., 16., 17.],
       [ 1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  1.]])

In [24]:
MoonBoard_2016_raw_forseq['Y_dict_benchmark_withgrade'][next(iter(MoonBoard_2016_raw_forseq['Y_dict_benchmark_withgrade']))]

array([[5],
       [1],
       [5]], dtype=int64)

Merge problem dicts

In [25]:
X_dict_merge = {**MoonBoard_2016_raw_forseq['X_dict_benchmark_withgrade'], 
                **MoonBoard_2016_raw_forseq['X_dict_benchmark_nograde'], 
                **MoonBoard_2016_raw_forseq['X_dict_withgrade'], 
                **MoonBoard_2016_raw_forseq['X_dict_nograde']}
Y_dict_merge = {**MoonBoard_2016_raw_forseq['Y_dict_benchmark_withgrade'], 
                **MoonBoard_2016_raw_forseq['Y_dict_benchmark_nograde'], 
                **MoonBoard_2016_raw_forseq['Y_dict_withgrade'], 
                **MoonBoard_2016_raw_forseq['Y_dict_nograde']}

Use Beamer search to calculate theoretically best route (sequence of LH/RH moves) for each problem

In [21]:
import importlib
importlib.reload(helper)

<module 'preprocessing_helper' from 'c:\\Users\\valsp\\source\\repos\\MoonBoardRNN\\preprocessing\\preprocessing_helper.py'>

In [22]:
save_path_seq = constants.PREPROCESSING_DIR / 'processed_data_seq'
# TODO: THIS IS CURRENTLY BROKEN (all the data errors)
output_seq = helper.generate_organized_sequence_data(raw_data = X_dict_merge, save_path = save_path_seq)
# helper.save_pickle(output_seq, save_path_seq)

AttributeError: 'beta' object has no attribute 'successScoreSequence'