In [13]:
def parse_grade(yds):
    '''
    Takes in a YDS rating with no risk rating or extra text and returns that rating as a decimal
    Valid ratings take the form "5.(0-15)(a-d | a/b, b/c, c/d)(+-)"
    This function does not take into account + and -, opting to hold those as a separate predictive feature
    '''
    #remove +/-
    if yds[-1] in "+-":
        yds = yds[:-1]
    
    #reduce to difficulty grade, the 5. is not informational
    yds = yds.split(".")[1]
    
    #take care of split letter grades
    if "/" in yds:
        slashes = {
            "a/b" : .25,
            "b/c" : .5,
            "c/d" : .75
        }
        return int(yds[:-3]) + slashes[yds[-3:]]
    
    #take care of further letter grades
    if yds[-1] in 'abcd':
        letters = {
            "a" : .2,
            "b" : .4,
            "c" : .6,
            "d" : .8
        }
        return int(yds[:-1]) + letters[yds[-1]]
    
    #no letter grades, return base grade
    return int(yds)

In [16]:
df_final.loc[:, 'grade_numeric'] = df_final['grade.YDS'].apply(parse_grade)

In [18]:
def parse_grade_plus_minus(yds):
    '''
    Takes in a YDS rating with no risk rating or extra text and returns that rating as a decimal
    Valid ratings take the form "5.(9-15)(a-d | a/b, b/c, c/d)(+-)"
    This function takes into account + and -, treating them like a/b and c/d
    '''
    plus_minus_map = {'+': 'c/d',
                      '-': 'a/b'}
    #map +/- to letter
    if yds[-1] in "+-":
        yds = yds[:-1] + plus_minus_map[yds[-1]]
        
    
    #reduce to difficulty grade, the 5. is not informational
    yds = yds.split(".")[1]
    
    #take care of split letter grades
    if "/" in yds:
        slashes = {
            "a/b" : .25,
            "b/c" : .5,
            "c/d" : .75
        }
        return int(yds[:-3]) + slashes[yds[-3:]]
    
    #take care of further letter grades
    if yds[-1] in 'abcd':
        letters = {
            "a" : .2,
            "b" : .4,
            "c" : .6,
            "d" : .8
        }
        return int(yds[:-1]) + letters[yds[-1]]
    
    #no letter grades, return base grade
    return int(yds)

In [312]:
routes['grade_numeric'].value_counts().sort_index()

0.00       170
1.00       118
2.00       400
3.00       736
4.00      1730
5.00      2555
6.00      5458
7.00      9994
8.00     13769
9.00     16618
10.00     8431
10.20     7730
10.25     1125
10.40     5687
10.50     1125
10.60     4948
10.75      713
10.80     3950
11.00     5923
11.20     5543
11.25      737
11.40     4168
11.50      864
11.60     3547
11.75      535
11.80     2888
12.00     2410
12.20     4343
12.25      584
12.40     2683
12.50      477
12.60     1885
12.75      260
12.80     1235
13.00      581
13.20     1379
13.25      135
13.40      772
13.50      113
13.60      412
13.75       53
13.80      270
14.00       64
14.20      223
14.25       21
14.40       77
14.50        9
14.60       35
14.75        3
14.80       19
15.00        3
15.20        2
15.75        1
15.80        2
Name: grade_numeric, dtype: int64

In [313]:
routes['grade_numeric_plus_minus'].value_counts().sort_index()

0.00       170
1.00       118
2.00       400
3.00       736
4.00      1730
5.00      2555
6.00      5458
7.00      8494
7.75      1500
8.00     10434
8.25       772
8.75      2563
9.00     11425
9.25      1228
9.75      3965
10.00     4033
10.20     7730
10.25     2857
10.40     5687
10.50     1125
10.60     4948
10.75     3379
10.80     3950
11.00     2660
11.20     5543
11.25     2283
11.40     4168
11.50      864
11.60     3547
11.75     2252
11.80     2888
12.00     1020
12.20     4343
12.25     1456
12.40     2683
12.50      477
12.60     1885
12.75      778
12.80     1235
13.00      226
13.20     1379
13.25      381
13.40      772
13.50      113
13.60      412
13.75      162
13.80      270
14.00       20
14.20      223
14.25       59
14.40       77
14.50        9
14.60       35
14.75        9
14.80       19
15.00        1
15.20        2
15.25        1
15.75        2
15.80        2
Name: grade_numeric_plus_minus, dtype: int64

Inclusion of the +/- is really important if we want to see more variability in the lower grades.

In [1]:
#below is from the fasttext models

In [125]:
def remove_extras(grade):
    """
    Takes in a YDS grade and removes letters, replacing a/b with - and c/d with +
    """
    plus_minus_map = {
        'a': '-',
        'b': '',
        'c': '',
        'd': '+',
        'a/b': '-', 
        'b/c': '',
        'c/d': '+'
    }
    if grade[-1] in 'abcd':
        if "/" in grade:
            return grade[:-3] + plus_minus_map[grade[-3:]]
        return grade[:-1] + plus_minus_map[grade[-1]]
    return grade

routes['reduced_grade'] = routes['grade.YDS'].map(remove_extras)