In [45]:
from urllib.request import Request, urlopen, urlretrieve
import json
import pandas as pd
from bs4 import BeautifulSoup
import time
from utils import db, osm

In [52]:
url = "https://images.parkrun.com/events.json"
filepath = "data/events.json"

urlretrieve(url, "data/events.json")

with open(filepath) as file:
    events_json = json.load(file)
    

In [53]:
events = pd.json_normalize(events_json["events"]["features"])
events.head()

Unnamed: 0,id,type,geometry.type,geometry.coordinates,properties.eventname,properties.EventLongName,properties.EventShortName,properties.LocalisedEventLongName,properties.countrycode,properties.seriesid,properties.EventLocation
0,193,Feature,Point,"[153.051636, -27.471134]",newfarm,New Farm parkrun,New Farm,,3,1,"New Farm, Brisbane"
1,205,Feature,Point,"[153.428879, -27.972706]",mainbeach,Main Beach parkrun,Main Beach,,3,1,"Main Beach, Goldcoast"
2,349,Feature,Point,"[153.522781, -28.166758]",kirra,Kirra parkrun,Kirra,,3,1,"Kirra Beach, Gold Coast QLD, Australia"
3,365,Feature,Point,"[153.281709, -27.885247]",coomera,Coomera parkrun,Coomera,,3,1,Tallowwood Park
4,417,Feature,Point,"[153.177898, -27.442933]",wynnum,Wynnum parkrun,Wynnum,,3,1,"Wynnum Manly Foreshore, QLD"


In [54]:
def copy_df(df: pd.DataFrame) -> pd.DataFrame:
    return df.copy()


def filter_countrycode(df: pd.DataFrame, country_code: int) -> pd.DataFrame:
    df = df[df["properties.countrycode"] == country_code]
    return df


def select_columns(df: pd.DataFrame) -> pd.DataFrame:
    columns = [
        "properties.eventname",
        "properties.EventShortName",
        "geometry.coordinates",
    ]

    df = df[columns]
    return df


def rename_columns(df: pd.DataFrame) -> pd.DataFrame:

    df = df.rename(
        columns={
            "properties.eventname": "name",
            "properties.EventShortName": "short_name",
            "geometry.coordinates": "coordinates",
        }
    )

    return df


def extract_coordinates(df: pd.DataFrame) -> pd.DataFrame:
    df = df.apply(osm.split_coordinates, axis=1)
    df = df.drop("coordinates", axis=1)
    return df


def fetch_location_info(df: pd.DataFrame) -> pd.DataFrame:
    df = df.apply(osm.assign_state_city, axis=1)
    return df


In [55]:
events_pl = (
    events
    .pipe(filter_countrycode, 74)
    .pipe(select_columns)
    .pipe(rename_columns)
    .pipe(extract_coordinates)
    .pipe(fetch_location_info)
    .reset_index(drop=True)
)

events_pl.head()

Unnamed: 0,name,short_name,latitude,longitude,state,location
0,gdynia,Gdynia,54.502632,18.558699,pomorskie,Gdynia
1,gdansk,Gdańsk,54.41428,18.6017,pomorskie,Gdańsk
2,lodz,Łódź,51.752244,19.441498,łódzkie,Łódź
3,poznan,Poznań,52.418883,16.933289,wielkopolskie,Poznań
4,warszawapraga,Warszawa-Praga,52.241493,21.050963,mazowieckie,Warszawa


In [56]:
events_pl.to_csv("data/locations.csv", index=False)