# List activities summary

This notebook extracts information from the GoldenCheetah database and generates a CSV file. The goal is to create a list of activities to make it easier to select a route based on duration, distance, TSS, etc.

For every activity, the following data are collected:

- Date and time
- Route
- Duration
- Distance
- Elevation gain
- CP, NP, TSS, IF

In [1]:
import json
import datetime as dt
import pytz
import csv

In [2]:
athlete = 'david'
gc_path = '/home/david/Documentos/goldencheetah/'
athlete_path = gc_path + athlete + '/'
rideDb_path = athlete_path + 'cache/rideDB.json'

csv_file = './activities.csv'

In [3]:
utc = pytz.utc
tz = pytz.timezone('Europe/Madrid')

## Load the data

In [4]:
with open(rideDb_path, 'r', encoding='utf-8-sig') as f:
    rideDB = json.load(f)

In [5]:
rides = rideDB['RIDES']
len(rides)

516

In [6]:
ride = rides[-1]
list(ride.keys())

['date',
 'filename',
 'fingerprint',
 'crc',
 'metacrc',
 'timestamp',
 'dbversion',
 'udbversion',
 'color',
 'present',
 'sport',
 'weight',
 'zonerange',
 'hrzonerange',
 'samples',
 'METRICS',
 'TAGS',
 'INTERVALS']

## Data extraction

### Dates

In [7]:
def get_date_str(ride):
    return ride['date']

get_date_str(ride)

'2020/12/22 13:30:04 UTC'

In [8]:
def get_datetime(ride):
    fmt = '%Y/%m/%d %H:%M:%S %Z'
    date = get_date_str(ride)
    date_dt = dt.datetime.strptime(date, fmt)
    date_dt = utc.localize(date_dt)
    date_dt = date_dt.astimezone(tz)
    return date_dt

get_datetime(ride)

datetime.datetime(2020, 12, 22, 14, 30, 4, tzinfo=<DstTzInfo 'Europe/Madrid' CET+1:00:00 STD>)

In [9]:
def get_datetime_str(ride):
    fmt = '%Y/%m/%d %H:%M:%S %z'
    date = get_datetime(ride)
    date = date.strftime(fmt)
    return date

get_datetime_str(ride)

'2020/12/22 14:30:04 +0100'

### Sport

In [10]:
def get_sport(ride):
    sport = ride['sport']
    return sport

get_sport(ride)

'Bike'

### Athlete's weight

In [11]:
def get_weight_float(ride):
    weight = ride['weight']
    weight = float(weight)
    return weight

get_weight_float(ride)

71.3

### Ride has power data

def has_power_data(ride):
    try:
        power_zones = ride['zonerange']
        return True
    except:
        return False
    
has_power_data(ride)

### Tags

In [12]:
def get_tags_dict(ride):
    tags = ride['TAGS']
    return tags
    
tags = get_tags_dict(ride)
tags.keys()

dict_keys(['Aerobic TISS', 'Anaerobic TISS', 'Athlete', 'Average Cadence', 'Average Heart Rate', 'Average Power', 'Average Speed', 'BikeScore™', 'BikeStress', 'CP', 'Calendar Text', 'Change History', 'Daniels EqP', 'Daniels Points', 'Data', 'Device', 'Device Info', 'Distance', 'Duration', 'Elevation Gain', 'File Format', 'Filename', 'GOVSS', 'Keywords', 'Month', 'Notes', 'Objective', 'Pool Length', 'RPE', 'Route', 'Source Filename', 'Sport', 'SubSport', 'SwimScore', 'TSS', 'Time Moving', 'Weekday', 'Weight', 'Work', 'Workout Code', 'Year', 'xPower'])

In [13]:
tags

{'Aerobic TISS': '0 ',
 'Anaerobic TISS': '0 ',
 'Athlete': 'david ',
 'Average Cadence': '0 ',
 'Average Heart Rate': '0 ',
 'Average Power': '0 ',
 'Average Speed': '0 ',
 'BikeScore™': '0 ',
 'BikeStress': '0 ',
 'CP': '0 ',
 'Calendar Text': 'Gijon-Infanzon-Venta las Ranas-El Pedroso-La Cruz de Peon-Curbiellu-Infanzon-Gijon\n ',
 'Change History': 'Cambios en mar. dic. 22 19:10:54 2020:\nCambios en mar. dic. 22 19:11:13 2020:\nCambios en mar. dic. 22 19:14:42 2020:\nCambios en mar. dic. 22 19:21:29 2020:\nCambios en mar. dic. 22 19:22:39 2020:\nCambios en mar. dic. 22 19:23:00 2020:\nCambios en mar. dic. 22 19:23:21 2020:\nCambios en mar. dic. 22 19:25:37 2020:\nCambios en mar. dic. 22 19:27:08 2020:\nCambios en mar. dic. 22 19:28:24 2020:\nCambios en mar. dic. 22 19:29:10 2020:\nCambios en mar. dic. 22 19:29:21 2020:\nCambios en mar. dic. 22 19:34:39 2020:\n ',
 'Daniels EqP': '0 ',
 'Daniels Points': '0 ',
 'Data': 'TDSPHC-AGL----- ',
 'Device': 'Polar V650 ',
 'Device Info': ' '

#### Device

In [14]:
def get_device(ride):
    tags = get_tags_dict(ride)
    device  = tags['Device']
    device = device.strip(' \n')
    return device

get_device(ride)

'Polar V650'

#### Route

In [15]:
def get_route(ride):
    tags = get_tags_dict(ride)
    try:
        route = tags['Route']
        route = route.strip(' \n')
        route = route.replace(' - ', '-')
    except:
        route = ''
    return route

get_route(ride)

'Gijon-Infanzon-Venta las Ranas-El Pedroso-La Cruz de Peon-Curbiellu-Infanzon-Gijon'

#### Flags

['Extra' tab](https://github.com/GoldenCheetah/GoldenCheetah/wiki/UG_ChartTypes_Activities#extra-tab)

This data can help detect rides with missing data (cadence, power, etc)

| Pos | Char | Description |
|:---:|:----:|:------------|
| 0   | T    | Time |
| 1   | D    | Distance |
| 2   | S    | Speed |
| 3   | P    | Power |
| 4   | H    | Heart rate |
| 5   | C    | Cadence |
| 6   | N    | Torque |
| 7   | A    | Altitude |
| 8   | G    | GPS |
| 9   | L    | Slope |
| 10  | W    | Wind speed |
| 11  | E    | Temperature |
| 12  | V    | Left/Right balance |


In [16]:
def get_flags(ride):
    tags = get_tags_dict(ride)
    flags = tags['Data']
    
    return flags

get_flags(ride)

'TDSPHC-AGL----- '

In [17]:
flags_mapper = {
    'time': {'pos': 0, 'val': 'T'},
    'distance': {'pos': 1, 'val': 'D'},
    'speed': {'pos': 2, 'val': 'S'},
    'power': {'pos': 3, 'val': 'P'},
    'heart_rate': {'pos': 4, 'val': 'H'},
    'cadence': {'pos': 5, 'val': 'C'},
    'torque': {'pos': 6, 'val': 'N'},
    'altitude': {'pos': 7, 'val': 'A'},
    'gps': {'pos': 8, 'val': 'G'},
    'slope': {'pos': 9, 'val': 'L'},
    'wind': {'pos': 10, 'val': 'W'},
    'temperature': {'pos': 11, 'val': 'E'},
    'balance': {'pos': 12, 'val': 'V'},
}

In [18]:
def get_flag_bool(ride, flag):
    flags = get_flags(ride)
    #pos = flags_mapper[flag]['pos']
    val = flags_mapper[flag]['val']
    #res = flags[pos] == val
    #return res
    return val in flags

get_flag_bool(ride, 'time')

True

#### Ride has power data

In [19]:
def has_power_data(ride):
    res = get_flag_bool(ride, 'power')
    return res

has_power_data(ride)

True

#### Ride has distance

In [20]:
def has_distance_data(ride):
    res = get_flag_bool(ride, 'distance')
    return res

has_distance_data(ride)

True

#### Ride has cadence data

In [21]:
def has_cadence_data(ride):
    res = get_flag_bool(ride, 'cadence')
    return res

has_cadence_data(ride)

True

#### Ride has heart rate data

In [22]:
def has_heart_rate_data(ride):
    res = get_flag_bool(ride, 'heart_rate')
    return res

has_heart_rate_data(ride)

True

#### Ride has speed data

In [23]:
def has_speed_data(ride):
    res = get_flag_bool(ride, 'speed')
    return res

has_speed_data(ride)

True

#### Keywords

In [24]:
def get_keywords(ride):
    tags = get_tags_dict(ride)
    try:
        keywords = tags['Keywords']
        keywords = keywords.strip(' \n')
    except:
        keywords = ''
    return keywords

get_keywords(ride)

'SS,SweetSpot,L4,Threshold'

### Metrics

In [25]:
def get_metrics_dict(ride):
    metrics = ride['METRICS']
    return metrics

metrics = get_metrics_dict(ride)
metrics.keys()

dict_keys(['a_skiba_xpower', 'a_skiba_relative_intensity', 'a_skiba_bike_score', 'a_skiba_variability_index', 'a_skiba_response_index', 'a_coggan_np', 'a_coggan_if', 'a_coggan_tss', 'a_coggam_variability_index', 'a_friel_efficiency_factor', 'a_coggan_tssperhour', 'aerobic_decoupling', 'power_index', 'peak_power_index', 'activity_date', 'ride_count', 'workout_time', 'time_recording', 'time_riding', 'time_carrying', 'total_distance', 'climb_rating', 'athlete_weight', 'elevation_gain', 'elevation_loss', 'total_work', 'average_speed', 'average_power', 'average_apower', 'nonzero_power', 'average_hr', 'average_ct', 'heartbeats', 'hrpw', 'wb', 'wattsRPE', 'ap_percent_max', 'hrnp', 'average_cad', 'average_temp', 'max_power', 'max_heartrate', 'min_heartrate', 'max_ct', 'max_speed', 'max_cadence', 'max_temp', 'min_temp', 'ninety_five_percent_hr', 'vam', 'eoa', 'gradient', 'meanpowervariance', 'maxpowervariance', 'activity_crc', 'atiss_score', 'antiss_score', 'cp_setting', 'skiba_xpower', 'skiba_

#### Duration

In [26]:
def get_duration_secs(ride):
    metrics = get_metrics_dict(ride)
    try:
        duration = metrics['time_riding']
        duration = float(duration)
    except:
        duration = -1
    return duration

get_duration_secs(ride)

7772.0

#### Distance

In [27]:
def get_distance_km(ride):
    metrics = get_metrics_dict(ride)
    try:
        distance = metrics['total_distance']
        distance = float(distance)
        distance = '{0:.1f}'.format(distance)
        distance = float(distance)
    except:
        distance = -1
    return distance

get_distance_km(ride)

51.8

#### Elevation gain

In [28]:
def get_elevation_gain(ride):
    metrics = get_metrics_dict(ride)
    try:
        elevation = metrics['elevation_gain']
        elevation = float(elevation)
    except:
        elevation = 0
    return elevation

get_elevation_gain(ride)

821.0

#### Normalized Power

In [29]:
def get_norm_power(ride):
    metrics = get_metrics_dict(ride)
    try:
        np = metrics['coggan_np']
        np = np[0]
        np = float(np)
        np = '{0:.1f}'.format(np)
        np = float(np)
    except:
        np = -1
    return np

get_norm_power(ride)

204.7

#### TSS

In [30]:
def get_tss(ride):
    #if has_power_data(ride):
    metrics = get_metrics_dict(ride)
    try:
        tss = metrics['coggan_tss']
        tss = float(tss)
        tss = '{0:.0f}'.format(tss)
        tss = float(tss)
    except:
        tss = -1
    return tss

get_tss(ride)

167.0

#### Intensity Factor

In [31]:
def get_intensity_factor(ride):
    metrics = get_metrics_dict(ride)
    try:
        intensity = metrics['coggan_if']
        intensity = intensity[0]
        intensity = float(intensity)
        intensity = '{0:.2f}'.format(intensity)
        intensity = float(intensity)
    except:
        intensity = -1
    return intensity

get_intensity_factor(ride)

0.87

#### Functional Threshold Power (FTP)

In [32]:
def get_ftp(ride):
    metrics = get_metrics_dict(ride)
    try:
        cp = metrics['cp_setting']
        return float(cp)
    except:
        return -1
    
get_ftp(ride)

235.0

#### Average heart rate

In [33]:
def get_average_hr(ride):
    metrics = get_metrics_dict(ride)
    try:
        hr = metrics['average_hr'][0]
        hr = float(hr)
        hr = '{0:.0f}'.format(hr)
        hr = float(hr)
    except:
        hr = -1
    return hr

get_average_hr(ride)

165.0

#### Average speed

In [34]:
def get_average_speed(ride):
    metrics = get_metrics_dict(ride)
    try:
        speed = metrics['average_speed']
        speed = float(speed)
        speed = '{0:.1f}'.format(speed)
        speed = float(speed)
    except:
        speed = -1
    return speed

get_average_speed(ride)

24.1

## Generate the CSV file

In [35]:
with open(csv_file, 'w') as f:
    writer = csv.writer(f)
    
    header = ['date', 'route', 'duration_secs', 'distance_km', 'elevation', 'ftp', 'np', 'tss', 'if', 'hr', 'speed', 'sport', 'device', 'weight', 'keywords']
    writer.writerow(header)
    
    for ride in rides:
        device = get_device(ride)
        sport = get_sport(ride)

        if ('Polar' in device) and ('Bike' in sport):
            try:
                date = get_datetime_str(ride)
                route = get_route(ride)
                duration = get_duration_secs(ride)
                distance = get_distance_km(ride)
                elevation = get_elevation_gain(ride)
                cp = get_ftp(ride)
                np = get_norm_power(ride)
                tss = get_tss(ride)
                if_ = get_intensity_factor(ride)
                hr = get_average_hr(ride)
                speed = get_average_speed(ride)
                weight = get_weight_float(ride)
                keywords = get_keywords(ride)

                writer.writerow([date, route, duration, distance, elevation, cp, np, tss, if_, hr, speed, sport, device, weight, keywords])
            except:
                print(date)
                raise