We have data, we can do more statistics. In this document we will explore some more analyses and plots that could become part of the Geo Activity Playground interface. We just do it in a Jupyter Notebook as it is easier to iterate.

In [1]:
import altair as alt
import pandas as pd
import datetime

In [2]:
meta = pd.read_parquet(
    "/home/mu/Dokumente/Karten/Playground/Cache/Activity/activities.parquet"
)

meta = meta.loc[meta["consider_for_achievements"]].copy()

meta["year"] = [start.year for start in meta["start"]]
meta["month"] = [start.month for start in meta["start"]]
meta["isoyear"] = [start.isocalendar().year for start in meta["start"]]
meta["isoweek"] = [start.isocalendar().week for start in meta["start"]]
meta["date"] = [start.date() for start in meta["start"]]
meta["elapsed_time_h"] = meta["elapsed_time"].dt.total_seconds() / 3600

meta.dtypes

calories                             float64
commute                                 bool
consider_for_achievements               bool
equipment                             object
kind                                  object
steps                                float64
id                                     int64
name                                  object
path                                  object
start                         datetime64[ns]
elapsed_time                 timedelta64[ns]
distance_km                          float64
moving_time                  timedelta64[ns]
start_latitude                       float64
end_latitude                         float64
start_longitude                      float64
end_longitude                        float64
average_speed_moving_kmh             float64
average_speed_elapsed_kmh            float64
year                                   int64
month                                  int64
isoyear                                int64
isoweek   

# Yearly Eddington number

The classic Eddington Number is a lifelong thing. But we can of course compute it per year.

In [16]:
def get_eddington_number(distances: pd.Series) -> int:
    if len(distances) == 1:
        if distances.iloc[0] >= 1:
            return 1
        else:
            0

    sorted_distances = sorted(distances, reverse=True)
    for en, distance in enumerate(sorted_distances, 1):
        if distance < en:
            return en - 1


yearly_eddington = meta.groupby("year").apply(
    lambda group: get_eddington_number(
        group.groupby("date").apply(
            lambda group2: int(group2["distance_km"].sum()), include_groups=False
        )
    ),
    include_groups=False,
)
yearly_eddington.to_dict()

{2013: 13,
 2014: 7,
 2015: 8,
 2016: 1,
 2017: 9,
 2018: 20,
 2019: 14,
 2020: 27,
 2021: 30,
 2022: 42,
 2023: 29,
 2024: 26,
 2025: 5}

# Eddington number history

In [4]:
daily_distances = meta.groupby("date").apply(
    lambda group2: int(group2["distance_km"].sum()), include_groups=False
)

eddington_number_history = {"date": [], "eddington_number": []}
en = 0
top_days = []
for date, distance in daily_distances.items():
    if len(top_days) == 0:
        top_days.append(distance)
    else:
        if distance >= top_days[0]:
            top_days.append(distance)
            top_days.sort()
    while top_days[0] < len(top_days):
        top_days.pop(0)
    eddington_number_history["date"].append(
        datetime.datetime.combine(date, datetime.datetime.min.time())
    )
    eddington_number_history["eddington_number"].append(len(top_days))

In [5]:
eddington_number_history_df = pd.DataFrame(eddington_number_history)
eddington_number_history_df

Unnamed: 0,date,eddington_number
0,2013-03-25,1
1,2013-03-26,2
2,2013-04-01,2
3,2013-05-05,2
4,2013-05-21,3
...,...,...
1373,2025-01-16,66
1374,2025-01-18,66
1375,2025-01-19,66
1376,2025-01-21,66


In [6]:
(
    alt.Chart(eddington_number_history_df)
    .mark_line(interpolate="step-after")
    .encode(alt.X("date"), alt.Y("eddington_number"))
)

# Eddington number by week

Instead of aggregating by day, we instead aggregate by week.

In [7]:
distance_per_week = meta.groupby(["isoyear", "isoweek"]).apply(
    lambda group2: int(group2["distance_km"].sum()), include_groups=False
)

eddington_by_week = get_eddington_number(distance_per_week)

eddington_by_week

87

In [8]:
distance_per_week.name = "distance"

(
    alt.Chart(distance_per_week.reset_index(), width=1000)
    .mark_bar()
    .encode(alt.X("distance"), alt.Y("count()"))
)

# Lifetime distance

In [9]:
lifetime_distance = pd.DataFrame(
    {"start": meta["start"], "distance_km_cum": meta["distance_km"].cumsum()}
)

(
    alt.Chart(lifetime_distance)
    .mark_line()
    .encode(alt.X("start"), alt.Y("distance_km_cum"))
)

# Comparisons

In [10]:
references = {
    "Equator": 40_075,
    "Tour de France": 3_492,
    "Tadej Pogacars": 29_805,
}

total_distance = meta["distance_km"].sum()

for ref_name, ref_distance in references.items():
    print(f"You did {total_distance/ref_distance:.2} times the {ref_name}.")

You did 0.55 times the Equator.
You did 6.3 times the Tour de France.
You did 0.74 times the Tadej Pogacars.


In [11]:
yearly_distance = meta[["year", "distance_km"]].groupby("year").sum().reset_index()
for ref_name, ref_distance in references.items():
    yearly_distance[ref_name] = yearly_distance["distance_km"] / ref_distance
yearly_distance

Unnamed: 0,year,distance_km,Equator,Tour de France,Tadej Pogacars
0,2013,395.649116,0.009873,0.113302,0.013275
1,2014,165.490456,0.00413,0.047391,0.005552
2,2015,194.460584,0.004852,0.055687,0.006524
3,2016,41.782983,0.001043,0.011965,0.001402
4,2017,277.269273,0.006919,0.079401,0.009303
5,2018,1050.08542,0.026203,0.300712,0.035232
6,2019,1015.860637,0.025349,0.290911,0.034084
7,2020,2024.488083,0.050517,0.57975,0.067924
8,2021,3423.323477,0.085423,0.980333,0.114857
9,2022,6082.391271,0.151775,1.741807,0.204073


# Ratio of activities

In [12]:
sums_per_kind = meta.groupby("kind").sum(numeric_only=True).reset_index()
sums_per_kind

Unnamed: 0,kind,calories,commute,consider_for_achievements,steps,id,distance_km,start_latitude,end_latitude,start_longitude,end_longitude,average_speed_moving_kmh,average_speed_elapsed_kmh,year,month,isoyear,isoweek,elapsed_time_h
0,Lauf,1971.0,0,58,26566.0,8620553670810399461,337.555062,2930.021347,2930.028536,523.109719,523.116175,473.361976,428.490678,117215,330,117215,1326,47.686389
1,Radfahrt,269545.0,0,1453,0.0,8570910505892076560,18673.109968,73795.418824,73795.026272,10265.345667,10263.792623,24782.606802,21148.195902,2937661,10188,2937660,41567,1411.754606
2,Spaziergang,116860.0,0,1406,1841206.0,-2166740179642513781,2755.836824,70859.958794,70859.868857,10711.596939,10711.788093,7411.887625,5920.111057,2844025,8942,2844026,36248,750.918647
3,Wanderung,13882.0,0,36,234690.0,-5465079547078385012,323.766924,1785.410277,1785.399221,204.960683,204.955761,160.635187,130.705131,72808,194,72808,776,93.197529


In [13]:
(alt.Chart(sums_per_kind).mark_bar().encode(alt.X("distance_km"), alt.Color("kind")))

In [14]:
(alt.Chart(sums_per_kind).mark_bar().encode(alt.X("elapsed_time_h"), alt.Color("kind")))

In [15]:
(alt.Chart(sums_per_kind).mark_bar().encode(alt.X("calories"), alt.Color("kind")))