In [1]:
import pandas as pd
import re

In [2]:
conversion_dict = {'V-easy': -1, 'V-easy PG13': -1, '5.9 V-easy': -1, 'V-easy R': -0.75,
                   'V0-': -0.25, '5.9 V0-': -0.25, 'V0- PG13': -0.25, 'V0- R': -0.25,
                   '5.8+ V0': 0, '5.9 V0': 0, 'V0': 0, 'V0 PG13': 0,
                   '5.10a V0 R': 0.25, 'V0 R': 0.25, 'V0 X': 0.25, '5.8+ V0 X': 0.25, 'V0+': 0.25, 'V0+ PG13': 0.25,
                   'V0+ R': 0.5, 'V0-1': 0.5, '5.10- V0-1': 0.5,
                   'V1-': 0.75, 'V1': 1, 'V1 PG13': 1, '5.9 V1': 1,
                   'V1 R': 1.25, 'V1 X':1.25, 'V1+': 1.25, 'V1+ PG13': 1.25, 'V1-2': 1.5, 
                   'V2-': 1.75, '5.10+ V2': 2, 'V2': 2, 'V2 PG13': 2, '5.11b V2': 2,
                   'V2 R': 2.25, 'V2+': 2.25, 'V2-3': 2.5, '5.10- V2-3': 2.5, 'V2+ X': 2.5, 
                   'V3-': 2.75, 'V3- R': 3, 'V3': 3, 'V3 PG13': 3,
                   'V3 R': 3.25, '5.11c V3 R': 3.25, 'V3+': 3.25, 'V3+ R':3.5, 'V3-4': 3.5,
                   'V4-': 3.75, 'V4': 4, 'V4 PG13': 4,
                   'V4 R': 4.25, 'V4 X': 4.25, 'V4+': 4.25, 'V4+ PG13': 4.25, 'V4-5': 4.5,
                   'V5-': 4.75, 'V5': 5, 'V5 PG13': 5, '5.12c V5 X': 5,
                   'V5 R': 5.25, 'V5+': 5.25, 'V5+ X': 5.5, 'V5-6': 5.5,
                   'V6-': 5.75, 'V6- PG13': 5.75, 'V6- R': 6, 'V6': 6, 'V6 PG13': 6,
                   'V6 R': 6.25, 'V6+': 6.25,  'V6-7': 6.5, 'V6-7 PG13': 6.5, 
                   'V7-': 6.75, 'V7': 7, 'V7 PG13': 7,
                   'V7 R': 7.25, 'V7+':7.25, 'V7-8': 7.5,   
                   'V7-8 R': 7.75, 'V8-': 7.75, 'V8': 8, 'V8 PG13': 8,
                   'V8 R':8.25, 'V8 X': 8.25, 'V8+': 8.25, 'V8-9': 8.5, 
                   'V9-': 8.75, 'V9': 9, 'V9 PG13': 9,
                   'V9 R': 9.25, 'V9+': 9.25, 'V9+ PG13': 9.25, 'V9 X': 9.25, 'V9-10':9.5, 'V9-10 PG13': 9.5, 
                   'V10-': 9.75, 'V10': 10, 'V10 PG13': 10,
                   'V10 R': 10.25, 'V10 X': 10.25, 'V10+': 10.25,  'V10-11': 10.5, 'V10-11 PG13': 10.5,
                   'V11-': 10.75, 'V11': 11,
                   'V11 R': 11.25, 'V11 X': 11.25, 'V11-12': 11.5,
                   'V12-': 11.75, 'V12': 12, 'V12 PG13': 12,
                   'V12+': 12.25, 'V12-13 R': 12.5,
                   'V13': 13,  'V13 PG13': 13, 'V13 R': 13.25, 'V13-14 PG13': 13.5,
                   'V14': 14}

In [3]:
def clean_column_names(df):
    df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_') # format column names
    df = df.rename(columns = {"length": "length_"}) # format column names
    return df

def create_col_url_id(df):
    df['url_id'] = df.url.apply(lambda x: re.sub("https://www.mountainproject.com/route/", "",  x)) # Collect the route ID (to ensure there are no duplicates)
    df.url_id = df.url_id.apply(lambda x: int(re.sub("/(?<=/).*", "",  x))) # Collect the route ID (to ensure there are no duplicates)
    return df

def clean_col_avg_stars(df):
    df.avg_stars = df.avg_stars.replace(to_replace = -1.0, value = 0) # Change erroneous 'Avg Stars' ratings
    return df

def rating_to_grade(df):
    df['grade'] = df.rating.replace(conversion_dict) # Convert V grades into numerical values
    return df

def clean(df):
    df = clean_column_names(df)
    df = create_col_url_id(df)
    df = clean_col_avg_stars(df)
    df = rating_to_grade(df)
    return df

In [4]:
buttermilks_df = pd.read_csv(r"data/downloads/buttermilks.csv")
buttermilks_clean_df = clean(buttermilks_df)
buttermilks_clean_df.to_csv(r"data/clean-data/buttermilks-clean.csv", index=None)

druid_stones_df = pd.read_csv(r"data/downloads/druid_stones.csv")
druid_stones_clean_df = clean(druid_stones_df)
druid_stones_clean_df.to_csv(r"data/clean-data/druid_stones-clean.csv", index=None)

happy_boulders_df = pd.read_csv(r"data/downloads/happy_boulders.csv")
happy_boulders_clean_df = clean(happy_boulders_df)
happy_boulders_clean_df.to_csv(r"data/clean-data/happy_boulders-clean.csv", index=None)

sad_boulders_df = pd.read_csv(r"data/downloads/sad_boulders.csv")
sad_boulders_clean_df = clean(sad_boulders_df)
sad_boulders_clean_df.to_csv(r"data/clean-data/sad_boulders-clean.csv", index=None)

joshua_tree_df = pd.read_csv(r"data/downloads/joshua_tree.csv") # Import
joshua_tree_clean_df = clean(joshua_tree_df) # Clean
joshua_tree_clean_df.to_csv(r"data/clean-data/joshua_tree-clean.csv", index=None) # Export