# Importing Libaries

In [1]:
from datetime import datetime, timedelta
import time, csv
import ETL_pipeline_functions 

# Data Collection

In [2]:
def ETL_pipeline():
    # storing credentials for Strava and Google Geocoding API's
    strava_access_token = ETL_pipeline_functions.strava_token_exchange('.secret/strava_api_credentials.json')
    geocode_key = ETL_pipeline_functions.geocode_key_getter('.secret/geocode_api_credentials.json')

    # storing most recent date from activities file
    timestamp = ETL_pipeline_functions.last_timestamp('data/activities.csv')
    # converting date from iso-8601 format to unix format
    unix_time = ETL_pipeline_functions.timestamp_to_unix(timestamp)

    # making requests to activities endpoint for Strava API
    activities = ETL_pipeline_functions.request_activities(strava_access_token, geocode_key, unix_time)

    # cleaning activities
    cleaned_activities = [ETL_pipeline_functions.activity_cleaner(activity) for activity in activities if activity['activity_type'] == 'Run']

    # storing number of activities
    n = len(cleaned_activities)

    # checking for activities
    if n:
        # appending activities to csv file
        ETL_pipeline_functions.append_requests(cleaned_activities, 'data/activities.csv')

        # storing ids for activities
        activity_ids = list(map(lambda activity: activity['id'], cleaned_activities))

        # making requests to laps endpoint for Strava API
        splits = ETL_pipeline_functions.request_splits(strava_access_token, activity_ids)
        # appending splits to csv file
        ETL_pipeline_functions.append_requests(splits, 'data/activity_splits.csv')

        # making requests to zones endpoint for Strava API
        zones = ETL_pipeline_functions.request_zones(strava_access_token, activity_ids)
        # appending zones to csv file
        ETL_pipeline_functions.append_requests(zones, 'data/activity_zones.csv')

    # exception handling for no activities
    else:
        return print("no activities to append")
    
    # storing current date
    date = datetime.now().strftime('%d/%m/%Y')
    # logging requests to a csv file
    with open('data/request_log.csv', 'a', newline = '') as a:
        csv_writer = csv.writer(a)
        csv_writer.writerow([date, n])
        a.close()
    
    return print("ETL pipeline complete")

In [3]:
ETL_pipeline()

data/activities.csv appended
data/activity_splits.csv appended
data/activity_zones.csv appended
ETL pipeline complete


In [4]:
ETL_pipeline()

data/activities.csv appended
data/activity_splits.csv appended
data/activity_zones.csv appended
ETL pipeline complete
