# Organised Teams Statistics

Analysis of OpenStreetMap contributions from organised teams (including corporations).

In [1]:
import duckdb
import util

util.init()

## Total and Percent of Edits and Contributors from Organised Teams Per Month

In [2]:
# Organised teams statistics per month
df = duckdb.sql("""
WITH monthly_total AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors,
        CAST(SUM(edit_count) as BIGINT) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    GROUP BY year, month
),
monthly_organised_team AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as team_contributors,
        CAST(SUM(edit_count) as BIGINT) as team_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE organised_team IS NOT NULL
    GROUP BY year, month
)
SELECT 
    mt.months,
    mt.total_contributors as "Total Contributors",
    COALESCE(mot.team_contributors, 0) as "Organised Team Contributors",
    mt.total_edits as "Total Edits",
    COALESCE(mot.team_edits, 0) as "Organised Team Edits",
    ROUND((COALESCE(mot.team_contributors, 0) * 100.0) / mt.total_contributors, 2) as "Percent Contributors from Organised Teams",
    ROUND((COALESCE(mot.team_edits, 0) * 100.0) / mt.total_edits, 2) as "Percent Edits from Organised Teams"
FROM monthly_total mt
LEFT JOIN monthly_organised_team mot ON mt.year = mot.year AND mt.month = mot.month
ORDER BY mt.year, mt.month
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Percentage of Edits from Organised Teams",
            label="Edits Percentage",
            x_col="months",
            y_col="Percent Edits from Organised Teams",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Edits from Organised Teams",
            label="Edits",
            x_col="months",
            y_col="Organised Team Edits",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Percentage of Contributors from Organised Teams",
            label="Contributors Percentage",
            x_col="months",
            y_col="Percent Contributors from Organised Teams",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors from Organised Teams",
            label="Contributors",
            x_col="months",
            y_col="Organised Team Contributors",
            query_or_df=df,
        ),
    ]
)

## Monthly Contributors, New Contributors, and Edits Per Organised Team (Top 10 Plot)

In [3]:
# Top 10 organised teams by contributors
df_top10 = duckdb.sql("""
WITH top_organised_teams AS (
    SELECT organised_team
    FROM (
        SELECT
            organised_team,
            SUM(edit_count) as total_edits
        FROM '../changeset_data/year=*/month=*/*.parquet'
        WHERE organised_team IS NOT NULL
        GROUP BY organised_team
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
user_first_appearance AS (
    SELECT
        user_name,
        year,
        month,
        organised_team,
        ROW_NUMBER() OVER (PARTITION BY user_name ORDER BY year, month) as rn
    FROM (
        SELECT DISTINCT user_name, year, month, organised_team
        FROM '../changeset_data/year=*/month=*/*.parquet'
        WHERE organised_team IN (SELECT organised_team FROM top_organised_teams)
    )
),
first_appearances AS (
    SELECT user_name, year, month, organised_team
    FROM user_first_appearance
    WHERE rn = 1
),
monthly_contributors AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        organised_team,
        COUNT(DISTINCT user_name) as "Contributors",
        CAST(SUM(edit_count) as BIGINT) as "Edits"
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE organised_team IN (SELECT organised_team FROM top_organised_teams)
    GROUP BY year, month, organised_team
),
monthly_new_contributors AS (
    SELECT
        year,
        month,
        organised_team,
        COUNT(DISTINCT user_name) as "New Contributors"
    FROM first_appearances
    GROUP BY year, month, organised_team
)
SELECT 
    mc.months,
    mc.organised_team,
    mc."Contributors",
    COALESCE(mnc."New Contributors", 0) as "New Contributors",
    mc."Edits"
FROM monthly_contributors mc
LEFT JOIN monthly_new_contributors mnc ON mc.year = mnc.year AND mc.month = mnc.month AND mc.organised_team = mnc.organised_team
ORDER BY mc.year, mc.month, mc.organised_team
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Organised Teams",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="organised_team",
            query_or_df=df_top10,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Organised Teams",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="organised_team",
            query_or_df=df_top10,
        ),
        util.FigureConfig(
            title="Monthly New Contributors by Top 10 Organised Teams",
            label="New Contributors",
            x_col="months",
            y_col="New Contributors",
            group_col="organised_team",
            query_or_df=df_top10,
        ),
    ]
)

## All Organised Teams Table

In [4]:
query = """
WITH organised_team_totals AS (
    SELECT
        organised_team as "Organised Team",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE organised_team IS NOT NULL
    GROUP BY organised_team
),
yearly_metrics AS (
    SELECT
        d.year,
        d.organised_team as "Organised Team",
        CAST(SUM(d.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT d.user_name) as BIGINT) as "Contributors"
    FROM '../changeset_data/year=*/month=*/*.parquet' d
    WHERE d.organised_team IS NOT NULL
    GROUP BY d.year, d.organised_team
)
SELECT 
    ym.year,
    ym."Organised Team",
    ym."Edits", 
    ym."Contributors",
    ott.total_edits_all_time as "Total Edits",
    ott.total_contributors_all_time as "Total Contributors"
FROM yearly_metrics ym
JOIN organised_team_totals ott ON ym."Organised Team" = ott."Organised Team"
ORDER BY year DESC, "Edits" DESC
"""
df_all = duckdb.sql(query).df()

table_configs = [
    util.TableConfig(
        title="Edits Per Organised Team by Year",
        label="Edits",
        query_or_df=df_all,
        x_axis_col="year",
        y_axis_col="Organised Team",
        value_col="Edits",
        center_columns=["Rank", "Organised Team"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Contributors Per Organised Team by Year",
        label="Contributors",
        query_or_df=df_all,
        x_axis_col="year",
        y_axis_col="Organised Team",
        value_col="Contributors",
        center_columns=["Rank", "Organised Team"],
        sum_col="Total Contributors",
    ),
]

util.show_tables(table_configs)

Rank,Organised Team,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total Edits
1,Meta,0,0,0,0,0,0,0,0,0,78815,4296035,20247234,28957510,50673799,83060907,51973945,25494310,1074076,1173709,267030340
2,Kaart,0,22922,7559,364177,976391,730863,780037,623276,1509967,6907406,7390957,8620818,21691873,28594458,19554990,17457242,14491270,10149722,7230335,147104263
3,Apple,0,0,0,0,0,0,0,0,0,0,601578,3974945,13770839,17215025,8878281,13050921,10308898,5385095,1691394,74876976
4,Amazon,0,0,0,0,0,0,0,0,0,0,68,1171512,15006820,29407768,19940677,1456200,1034876,76744,98718,68193383
5,Mapbox,0,0,45339,130377,336422,1812554,2975135,5001072,10364605,7176557,6020903,5478908,1058384,1549755,1151670,1848191,1427806,1787103,3412546,51577327
6,Microsoft,0,0,846,7795,0,184,0,0,0,987,44465,3308373,9463749,6588507,7790014,2157361,2245106,933667,2523502,35064556
7,DigitalEgypt,0,0,0,0,0,0,0,0,0,0,0,7815,4835241,4385173,3475331,4657479,4054066,3732908,1728077,26876090
8,TomTom,0,0,0,0,0,0,0,0,0,0,13356,904,14,256,5163115,5383053,5994057,5209686,3698322,25462763
9,Grab,0,0,0,0,0,0,61,10,360575,158382,1122025,847285,3077576,2958667,8916251,3993110,538887,377096,579566,22929491
10,GeoCompas,0,0,0,0,23957,1421771,2478856,2860814,3100322,1768042,582325,1756308,580203,456227,99606,844325,967220,730673,848040,18518689

Rank,Organised Team,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total Contributors
1,Amazon,0,0,0,0,0,0,0,0,0,0,3,67,308,371,674,640,610,158,272,1153
2,Meta,0,0,0,0,0,0,0,0,0,1,24,85,124,177,169,217,153,70,79,372
3,Apple,0,0,0,0,0,0,0,0,0,0,11,40,85,120,127,303,318,212,131,353
4,Grab,0,0,0,0,0,0,1,1,2,2,7,24,67,75,181,222,116,62,50,294
5,TomTom,0,0,0,0,0,0,0,0,0,0,2,1,1,3,104,202,89,76,70,291
6,Kaart,0,1,1,2,3,4,8,8,18,29,40,93,167,122,109,107,78,88,62,263
7,Mapbox,0,0,2,2,3,7,7,14,24,30,41,44,63,70,102,110,111,91,80,220
8,Lyft,0,0,0,0,0,0,0,0,0,0,0,21,34,29,33,16,18,25,15,92
9,Uber,0,0,0,0,0,0,0,0,0,0,2,87,78,21,7,7,3,2,4,88
10,Bolt,0,0,0,0,0,0,0,0,0,0,0,0,0,5,25,38,40,13,11,56
