# Editing Software Statistics

Analysis of OpenStreetMap contributors and edits by editing software.

In [1]:
import json

import duckdb
import util

util.init()

## Top 10 Editing Software

In [2]:
df = duckdb.sql("""
WITH top_software AS (
    SELECT created_by
    FROM (
        SELECT
            created_by,
            COUNT(DISTINCT user_name) as total_contributors
        FROM '../changeset_data/year=*/month=*/*.parquet'
        WHERE created_by IS NOT NULL
        GROUP BY created_by
        ORDER BY total_contributors DESC
        LIMIT 10
    )
),
user_first_appearance AS (
    SELECT 
        user_name,
        year,
        month,
        created_by,
        ROW_NUMBER() OVER (PARTITION BY user_name ORDER BY year, month) as rn
    FROM (
        SELECT DISTINCT user_name, year, month, created_by
        FROM '../changeset_data/year=*/month=*/*.parquet'
        WHERE created_by IN (SELECT created_by FROM top_software)
    )
),
first_appearances AS (
    SELECT user_name, year, month, created_by
    FROM user_first_appearance 
    WHERE rn = 1
),
monthly_contributors AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        created_by,
        COUNT(DISTINCT user_name) as "Contributors",
        SUM(edit_count) as "Edit Count"
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE created_by IN (SELECT created_by FROM top_software)
    GROUP BY year, month, created_by
),
monthly_new_contributors AS (
    SELECT
        year,
        month,
        created_by,
        COUNT(DISTINCT user_name) as "New Contributors"
    FROM first_appearances
    GROUP BY year, month, created_by
),
base_data AS (
    SELECT
        m.year,
        m.month,
        m.months,
        m.created_by,
        m."Contributors",
        COALESCE(n."New Contributors", 0) as "New Contributors",
        m."Edit Count"
    FROM monthly_contributors m
    LEFT JOIN monthly_new_contributors n ON m.year = n.year AND m.month = n.month AND m.created_by = n.created_by
)
SELECT
    months,
    created_by,
    "Contributors",
    "New Contributors",
    "Edit Count",
    SUM("New Contributors") OVER (
        PARTITION BY created_by 
        ORDER BY year, month 
        ROWS UNBOUNDED PRECEDING
    ) as "Accumulated Contributors",
    SUM("Edit Count") OVER (
        PARTITION BY created_by 
        ORDER BY year, month 
        ROWS UNBOUNDED PRECEDING
    ) as "Accumulated Edits"
FROM base_data
ORDER BY year, month, created_by
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Editing Software",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="created_by",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly New Contributors by Top 10 Editing Software",
            label="New Contributors",
            x_col="months",
            y_col="New Contributors",
            group_col="created_by",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Edit Count by Top 10 Editing Software",
            label="Edit Count",
            x_col="months",
            y_col="Edit Count",
            group_col="created_by",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Contributors by Top 10 Editing Software",
            label="Accumulated Contributors",
            x_col="months",
            y_col="Accumulated Contributors",
            group_col="created_by",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Edits by Top 10 Editing Software",
            label="Accumulated Edits",
            x_col="months",
            y_col="Accumulated Edits",
            group_col="created_by",
            query_or_df=df,
        ),
    ],
)

## Top 100 Editing Software Yearly

In [3]:
query = """
WITH user_first_year AS (
	SELECT 
		user_name,
		created_by,
		MIN(year) as first_year
	FROM '../changeset_data/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY user_name, created_by
),
software_totals AS (
	SELECT
		created_by as "Editing Software",
		CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
		CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
		CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
		CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
	FROM '../changeset_data/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY created_by
),
yearly_metrics AS (
	SELECT
		d.year,
		d.created_by as "Editing Software",
		CAST(SUM(d.edit_count) as BIGINT) as "Edits",
		CAST(COUNT(DISTINCT d.user_name) as BIGINT) as "Contributors",
		CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = d.year THEN d.user_name END) as BIGINT) as "New Contributors"
	FROM '../changeset_data/year=*/month=*/*.parquet' d
	LEFT JOIN user_first_year ufy 
		ON d.user_name = ufy.user_name AND d.created_by = ufy.created_by
	WHERE d.created_by IS NOT NULL
	GROUP BY d.year, d.created_by
)
SELECT 
	ym.year,
	ym."Editing Software",
	ym."Edits",
	ym."New Contributors",
	ym."Contributors",
	st.total_edits_all_time as "Total Edits",
	st.total_edits_2021_now as "Total Edits (2021 - Now)",
	st.total_contributors_all_time as "Total Contributors",
	st.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN software_totals st
	ON ym."Editing Software" = st."Editing Software"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

with open("../config/replace_rules_created_by.json") as f:
    editing_software_name_to_html_link = {
        name: f'<a href="{item["link"]}">{name}</a>' for name, item in json.load(f).items() if "link" in item
    }

df["Editing Software"] = df["Editing Software"].apply(
    lambda name: editing_software_name_to_html_link[name] if name in editing_software_name_to_html_link else name
)

top_100_edits = df.groupby("Editing Software")["Total Edits"].first().nlargest(100)
top_100_contributors = df.groupby("Editing Software")["Total Contributors"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Editing Software")["Total Edits (2021 - Now)"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Editing Software")["Total Contributors (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Contributors",
        query_or_df=df[df["Editing Software"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Contributors",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Contributors 2021 - Now",
        query_or_df=df[(df["Editing Software"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Contributors",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Edits All Time",
        query_or_df=df[df["Editing Software"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Edits",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Edits 2021 - Now",
        query_or_df=df[(df["Editing Software"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Edits",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)

Rank,Editing Software,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total Contributors
1,iD,0,0,0,0,56176,125149,133826,148447,194537,214057,203438,241197,239537,204579,210136,205427,169656,1623771
2,Potlatch,59540,69047,81896,107156,72954,29604,24283,14901,10184,6598,5043,3531,608,350,234,147,87,358304
3,MAPS.ME,0,0,0,0,0,0,0,96584,102375,71199,55719,35483,19974,15515,16231,7729,2385,320987
4,JOSM,13755,18797,20232,23441,23140,21862,22796,22315,23191,22762,22801,22109,21512,20537,19741,18127,14964,152173
5,StreetComplete,0,0,0,0,0,0,0,7,9572,9764,8967,11695,21935,23525,26809,27111,25814,86191
6,OsmAnd,0,192,645,1129,1653,1927,2354,3534,4862,5982,7063,6977,7959,8623,8544,7875,6569,39111
7,Organic Maps,0,0,0,0,0,0,0,0,0,0,0,0,3017,6258,10557,12765,15675,36294
8,Vespucci,58,237,460,957,1622,1801,2075,2379,2920,3331,3632,4314,4539,5025,5371,5322,4725,25048
9,Rapid,0,0,0,0,0,0,0,0,0,0,1105,3709,2124,2183,9121,3662,3598,20348
10,Go Map!!,0,0,0,0,2205,2046,1335,1508,1834,2685,3074,3152,3748,3720,3830,4162,3714,19615

Rank,Editing Software,2021,2022,2023,2024,2025,Total Contributors (2021 - Now)
1,iD,239537,204579,210136,205427,169656,771920
2,StreetComplete,21935,23525,26809,27111,25814,70565
3,MAPS.ME,19974,15515,16231,7729,2385,51403
4,JOSM,21512,20537,19741,18127,14964,48862
5,Organic Maps,3017,6258,10557,12765,15675,36294
6,OsmAnd,7959,8623,8544,7875,6569,23226
7,Rapid,2124,2183,9121,3662,3598,16883
8,Vespucci,4539,5025,5371,5322,4725,14422
9,Go Map!!,3748,3720,3830,4162,3714,10808
10,Every Door,0,1992,3675,4324,4289,9569

Rank,Editing Software,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total Edits
1,JOSM,125617731,494460089,539159637,543550289,502509468,669575514,646158075,627338221,665002671,796269776,791305333,996626125,918242849,759930262,758361441,768457774,566129688,11168694943
2,iD,0,0,0,0,34431679,109275880,142214185,189397165,279168301,321344085,359674889,459868249,480387177,416752352,427965098,428165855,335748059,3984392974
3,Potlatch,44030413,64726838,100956850,147706402,133843837,78092007,60204602,42988829,33377795,25182374,20637960,17371354,5083644,2678828,2574667,2766754,2045014,784268168
4,Rapid,0,0,0,0,0,0,0,0,0,0,11678161,51870526,63919464,63564994,63685407,52467053,32476548,339662153
5,osmtools,150412240,8994328,981921,1229689,2734257,1499370,6826400,15035558,2655505,3271989,7558536,6454470,8152531,12399003,29128241,2154313,1527465,261015816
6,bulk_upload.py,64669041,24649625,1442184,759090,185033,8089,1061001,1113638,2398,1036,26933301,263564,464798,0,0,4341,0,121557139
7,StreetComplete,0,0,0,0,0,0,0,85,1256558,1701133,1983723,3934118,12439388,12904369,13630403,12782820,9029567,69662164
8,Merkaartor,5378663,10887187,10250026,9702870,4380406,3791724,3696590,2522894,1735350,2138115,2316287,1745000,334114,491775,123828,96656,252903,59844388
9,upload.py,31167015,10166002,2402891,421238,776590,422731,488194,4827,38997,3021,4195,1853644,4378760,822536,968637,2536270,716624,57172172
10,Vespucci,2087,22298,49455,165722,277566,804178,885787,1055056,1559094,2073139,2296702,3560018,3517790,3864897,4998545,9855980,10249925,45238239

Rank,Editing Software,2021,2022,2023,2024,2025,Total Edits (2021 - Now)
1,JOSM,918242849,759930262,758361441,768457774,566129688,3771122014
2,iD,480387177,416752352,427965098,428165855,335748059,2089018541
3,Rapid,63919464,63564994,63685407,52467053,32476548,276113466
4,StreetComplete,12439388,12904369,13630403,12782820,9029567,60786547
5,osmtools,8152531,12399003,29128241,2154313,1527465,53361553
6,Vespucci,3517790,3864897,4998545,9855980,10249925,32487137
7,osmapi,23940911,1860602,1030376,1094794,298185,28224868
8,Go Map!!,4733594,4394577,4263021,4439158,4049339,21879689
9,osm-revert,0,0,14476276,2209872,1674924,18361072
10,Potlatch,5083644,2678828,2574667,2766754,2045014,15148907


## Monthly Percentage of Contributors by Top 10 Editing Software

In [4]:
df = duckdb.sql("""
WITH top_software AS (
	SELECT created_by
	FROM (
		SELECT
			created_by,
			COUNT(DISTINCT user_name) as total_contributors
		FROM '../changeset_data/year=*/month=*/*.parquet'
		WHERE created_by IS NOT NULL
		GROUP BY created_by
		ORDER BY total_contributors DESC
		LIMIT 10
	)
),
monthly_software_contributors AS (
	SELECT 
		CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
		created_by,
		COUNT(DISTINCT user_name) as contributors
	FROM '../changeset_data/year=*/month=*/*.parquet'
	WHERE created_by IN (SELECT created_by FROM top_software)
	GROUP BY year, month, created_by
),
monthly_total_contributors AS (
	SELECT 
		CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
		COUNT(DISTINCT user_name) as total_contributors
	FROM '../changeset_data/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY year, month
)
SELECT 
	msc.months,
	msc.created_by,
	ROUND((msc.contributors * 100.0) / mtc.total_contributors, 2) as 'Percentage of Contributors'
FROM monthly_software_contributors msc
JOIN monthly_total_contributors mtc ON msc.months = mtc.months
ORDER BY msc.months, msc.created_by""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Percentage of Contributors by Top 10 Editing Software",
            x_col="months",
            y_col="Percentage of Contributors",
            y_unit_hover_template="%",
            group_col="created_by",
            query_or_df=df,
        ),
    ],
)

## Monthly Contributors by Device Type

In [5]:
df_device_metrics = duckdb.sql("""
SELECT
    CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
    device_type,
    COUNT(DISTINCT user_name) as Contributors,
    CAST(SUM(edit_count) as BIGINT) as "Edit Count"
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE device_type IS NOT NULL
GROUP BY year, month, device_type
ORDER BY year, month, device_type
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Contributors by Device Type",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="device_type",
            query_or_df=df_device_metrics,
        ),
        util.FigureConfig(
            title="Monthly Edit Count by Device Type",
            label="Edit Count",
            x_col="months",
            y_col="Edit Count",
            group_col="device_type",
            query_or_df=df_device_metrics,
        ),
    ],
)

In [6]:
df = duckdb.sql("""
WITH top_software AS (
	SELECT created_by
	FROM (
		SELECT
			created_by,
			COUNT(DISTINCT user_name) as total_contributors
		FROM '../changeset_data/year=*/month=*/*.parquet'
		WHERE created_by IS NOT NULL
		GROUP BY created_by
		ORDER BY total_contributors DESC
		LIMIT 10
	)
),
user_first_software AS (
	SELECT 
		user_name,
		created_by,
		year,
		month,
		ROW_NUMBER() OVER (PARTITION BY user_name ORDER BY year, month) as rn
	FROM '../changeset_data/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
),
first_software_only AS (
	SELECT 
		user_name,
		created_by,
		year,
		month,
		CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months
	FROM user_first_software
	WHERE rn = 1 AND created_by IN (SELECT created_by FROM top_software)
),
monthly_first_software_counts AS (
	SELECT 
		months,
		created_by,
		COUNT(DISTINCT user_name) as first_time_users
	FROM first_software_only
	GROUP BY months, created_by
)
SELECT 
	months,
	created_by,
	first_time_users as 'First Time Contributors'
FROM monthly_first_software_counts
ORDER BY months""").df()
util.show_figure(
    [
        util.FigureConfig(
            title="Top 10 First Editing Software Per Month",
            x_col="months",
            y_col="First Time Contributors",
            group_col="created_by",
            query_or_df=df,
        ),
    ],
)