In [1]:
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from random import randint
from time import sleep
import json

In [2]:
def get_page_html(url):
    page = urllib.request.urlopen(url)
    return BeautifulSoup(page, 'html.parser')


def get_rankings_table(soup):
    return soup.find(attrs={'id': 'pnlMainRankings'})


def get_ranking_list_headings(rankings_table):
    headings = rankings_table.find(attrs={'class': 'rankinglistheadings'})
    myHeadings = headings.findAll('td')
    return [i.text for i in myHeadings]


def get_rows_of_results(soup):
    rows = []
    for r in ['rlr', 'rlra']:
        for row in soup.findAll(attrs={'class': r}):
            rows.append([i.text for i in row])
    return rows


def get_number_of_seconds(chip_time):
    t = chip_time.split(':')
    if len(t) > 2:
        return sum([float(i)*t for (i, t) in zip(t, [60*60, 60, 1])])
    else:
        return sum([float(i)*t for (i, t) in zip(t, [60, 1])])

In [3]:
def construct_url(year, event="Mar", sex="W"):
    return f"https://thepowerof10.info/rankings/rankinglist.aspx?event={event}&agegroup=ALL&sex={sex}&year={year}"

In [4]:
def get_results_for_event_year(event, year, gender='W'):
    page_url = construct_url(year, event, gender)
    
    soup = get_page_html(page_url)
    rankings_table = get_rankings_table(soup)
    column_names = get_ranking_list_headings(rankings_table)
    rows = get_rows_of_results(soup)
    return (
        pd.DataFrame(rows, columns=column_names)
        .rename(columns={'Perf': 'Chip'})
        .loc[lambda d: d.reset_index()['Rank'] != '']
        .assign(time=lambda d: d['Chip'].apply(get_number_of_seconds))
        .sort_values('time')
        .set_index('Rank')
    )

In [10]:
def get_year_results_array(df):
    obs = []
    cols_to_keep = ['Rank', 'Name', 'Club', 'Date', 'Chip', 'time']
    for i, row in df.reset_index().iterrows():
        obs.append(row[cols_to_keep].to_dict())
    return obs

In [11]:
def get_rankings_for_event(event, gender, years=[2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010]):

    results_dict = {}
    results_list = []

    for y in years:
        df = get_results_for_event_year(event, y, gender).rename(columns={'Perf': 'Chip'})
        results_dict[y] = get_year_results_array(df)
        sleep(randint(1,10))
    return results_dict

def save_results_json(event, gender='W'):
    results_dict = get_rankings_for_event(event, gender)
    if gender == 'W':
        with open(f'/Users/eileentoomer/Code/runningData/src/runningData/data/{event}Rankings.json', 'w') as fp:
            json.dump(results_dict, fp)
    else:
        with open(f'/Users/eileentoomer/Code/runningData/src/runningData/data/{event}RankingsMen.json', 'w') as fp:
            json.dump(results_dict, fp)
        

In [20]:
save_results_json('Mar')