# Editing Software

In [22]:
import json

import duckdb
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import util
from IPython.display import HTML

pio.renderers.default = "plotly_mimetype"
duckdb.execute("SET enable_progress_bar = false")
pd.set_option("display.max_rows", 100)

default_layout = dict(
    margin=dict(l=55, r=55, b=55, t=55),
    font=dict(family="Times", size=15),
    title_x=0.5,
    paper_bgcolor="#f5f2f0",
    plot_bgcolor="#f5f2f0",
    xaxis=dict(tickcolor="black", linecolor="black", showgrid=True, gridcolor="darkgray", zerolinecolor="darkgray"),
    yaxis=dict(
        tickcolor="black",
        linecolor="black",
        showgrid=True,
        gridcolor="darkgray",
        zerolinecolor="darkgray",
        rangemode="tozero",
    ),
)

display(
    HTML(
        '<link rel="stylesheet" type="text/css" href="../notebooks/notebook.css"><script src="../notebooks/notebook.js"></script>'
    )
)

## Top 10 Editing Software

In [23]:
# Get top 10 editing software by total contributors
top_10_software = (
    duckdb.sql("""
WITH software_total_contributors AS (
    SELECT
        created_by,
        COUNT(DISTINCT user_name) as total_contributors
    FROM '../data_enriched/year=*/month=*/*.parquet'
    WHERE created_by IS NOT NULL
    GROUP BY created_by
    ORDER BY total_contributors DESC
    LIMIT 10
)
SELECT created_by FROM software_total_contributors
""")
    .df()["created_by"]
    .tolist()
)

# Monthly contributors for top 10 editing software
df_monthly_contributors_by_software = duckdb.sql(f"""
SELECT 
    CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
    created_by,
    COUNT(DISTINCT user_name) as contributors
FROM '../data_enriched/year=*/month=*/*.parquet'
WHERE created_by IN ({",".join([f"'{s}'" for s in top_10_software])})
GROUP BY year, month, created_by
ORDER BY year, month, created_by
""").df()[:-10]  # Remove last month for each software (incomplete data)

# Monthly new contributors for top 10 editing software
df_monthly_new_contributors_by_software = duckdb.sql(f"""
WITH user_first_appearance AS (
    SELECT 
        user_name,
        year,
        month,
        created_by,
        ROW_NUMBER() OVER (PARTITION BY user_name ORDER BY year, month) as rn
    FROM (
        SELECT DISTINCT user_name, year, month, created_by
        FROM '../data_enriched/year=*/month=*/*.parquet'
        WHERE created_by IN ({",".join([f"'{s}'" for s in top_10_software])})
    )
),
first_appearances AS (
    SELECT user_name, year, month, created_by
    FROM user_first_appearance 
    WHERE rn = 1
)
SELECT
    CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
    created_by,
    COUNT(DISTINCT user_name) as new_contributors
FROM first_appearances
GROUP BY year, month, created_by
ORDER BY year, month, created_by
""").df()[:-10]

# Create the plot
fig = go.Figure()
buttons = []

# Add traces for all contributors (initially visible)
for software in top_10_software:
    software_data = df_monthly_contributors_by_software[df_monthly_contributors_by_software["created_by"] == software]
    fig.add_trace(
        go.Scatter(
            x=software_data["months"],
            y=software_data["contributors"],
            name=software,
            visible=True,
            hovertemplate=f"{software}" + "<br>%{x}<br>%{y:,} contributors<extra></extra>",
        )
    )

# Add traces for new contributors (initially hidden)
for software in top_10_software:
    software_data = df_monthly_new_contributors_by_software[
        df_monthly_new_contributors_by_software["created_by"] == software
    ]
    fig.add_trace(
        go.Scatter(
            x=software_data["months"],
            y=software_data["new_contributors"],
            name=software,
            visible=False,
            hovertemplate=f"{software}" + "<br>%{x}<br>%{y:,} new contributors<extra></extra>",
        )
    )

# Create buttons for switching between all and new contributors
buttons.append(
    {
        "label": "All Contributors",
        "args": [
            {"visible": [True] * len(top_10_software) + [False] * len(top_10_software)},
            {"title.text": "Monthly Contributors by Top 10 Editing Software"},
        ],
        "method": "update",
    }
)

buttons.append(
    {
        "label": "New Contributors",
        "args": [
            {"visible": [False] * len(top_10_software) + [True] * len(top_10_software)},
            {"title.text": "Monthly New Contributors by Top 10 Editing Software"},
        ],
        "method": "update",
    }
)

fig.update_layout(
    title="Monthly Contributors by Top 10 Editing Software",
    xaxis_title="Month",
    yaxis_title="Number of Contributors",
    updatemenus=[{"type": "buttons", "buttons": buttons}],
    **default_layout,
)

fig.show()

## Top 100 Editing Software by Yearly Edit Count

In [53]:
# Edit Count
df_edit_count = duckdb.sql("""
WITH software_yearly_edits AS (
	SELECT
		year,
		created_by as "Editing Software",
		CAST(SUM(edit_count) as BIGINT) as edits
	FROM '../data_enriched/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY year, created_by
),
software_total_edits AS (
	SELECT
		"Editing Software",
		CAST(SUM(edits) as BIGINT) as total_edits
	FROM software_yearly_edits
	GROUP BY "Editing Software"
	ORDER BY total_edits DESC
	LIMIT 100
)
SELECT
	year,
	"Editing Software",
	edits
FROM software_yearly_edits
WHERE "Editing Software" IN (SELECT "Editing Software" FROM software_total_edits)
ORDER BY year, "Editing Software"
""").df()
df_edit_count = df_edit_count.pivot_table(index="Editing Software", columns="year", values="edits").fillna(0)
df_edit_count["Total"] = df_edit_count.sum(axis=1)
df_edit_count = df_edit_count.sort_values("Total", ascending=False).reset_index()
df_edit_count.insert(0, "Rank", range(1, len(df_edit_count) + 1))
df_edit_count.columns.name = None

# Contributors Count
df_contributors_count = duckdb.sql("""
WITH software_yearly_contributors AS (
	SELECT
		year,
		created_by as "Editing Software",
		CAST(COUNT(DISTINCT user_name) as BIGINT) as contributors
	FROM '../data_enriched/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY year, created_by
),
software_total_unique_contributors AS (
	SELECT
		created_by as "Editing Software",
		CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors
	FROM '../data_enriched/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY created_by
	ORDER BY total_contributors DESC
	LIMIT 100
)
SELECT
	year,
	"Editing Software",
	contributors
FROM software_yearly_contributors
WHERE "Editing Software" IN (SELECT "Editing Software" FROM software_total_unique_contributors)
ORDER BY year, "Editing Software"
""").df()
df_contributors_count = df_contributors_count.pivot_table(
    index="Editing Software", columns="year", values="contributors"
).fillna(0)

df_contributors_total = duckdb.sql("""
SELECT
	created_by as "Editing Software",
	CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors
FROM '../data_enriched/year=*/month=*/*.parquet'
WHERE created_by IS NOT NULL
	AND created_by IN (
		SELECT created_by 
		FROM (
			SELECT created_by, COUNT(DISTINCT user_name) as cnt
			FROM '../data_enriched/year=*/month=*/*.parquet'
			WHERE created_by IS NOT NULL
			GROUP BY created_by
			ORDER BY cnt DESC
			LIMIT 100
		)
	)
GROUP BY created_by
ORDER BY total_contributors DESC
""").df()
df_contributors_count = df_contributors_count.reset_index()
df_contributors_count = df_contributors_count.merge(
    df_contributors_total.set_index("Editing Software")["total_contributors"].rename("Total"),
    left_on="Editing Software",
    right_index=True,
    how="left",
)
df_contributors_count = df_contributors_count.sort_values("Total", ascending=False).reset_index(drop=True)
df_contributors_count.insert(0, "Rank", range(1, len(df_contributors_count) + 1))
df_contributors_count.columns.name = None


with open("../config/replace_rules_created_by.json") as f:
    name_to_html_link = {
        name: f'<a href="{item["link"]}">{name}</a>' for name, item in json.load(f).items() if "link" in item
    }

for df in [df_edit_count, df_contributors_count]:
    df["Editing Software"] = df["Editing Software"].apply(
        lambda name: name_to_html_link[name] if name in name_to_html_link else name
    )

recent_columns = ["Rank", "Editing Software", *df_edit_count.columns[-5:]]
tables = {
    "Edit Count": df_edit_count[recent_columns],
    "Edit Count All Years": df_edit_count,
    "Contributors Count": df_contributors_count[recent_columns],
    "Contributors Count All Years": df_contributors_count,
}

display(HTML(util.get_tables_html(tables, center_columns=["Rank", "Editing Software"])))

Rank,Editing Software,2022,2023,2024,2025,Total
1,JOSM,758566474,756759812,767135437,325128093,10906315113
2,iD,416745824,427963559,428165596,191281367,3839865430
3,Potlatch,2678828,2574667,2766754,977964,783200809
4,Rapid,63564994,63683830,52467053,20156724,327340538
5,osmtools,12399003,29128241,2154313,578476,260066827
6,bulk_upload.py,0,0,4341,0,121557139
7,StreetComplete,12904369,13630403,12782820,4729050,65361647
8,Merkaartor,491775,123828,96656,150822,59742307
9,upload.py,822536,968637,2536270,699623,57155171
10,Vespucci,3864897,4998545,9855980,5241579,40229893

Rank,Editing Software,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total
1,JOSM,125617731,494460089,539135966,542128543,500031429,666714750,642725919,625443152,664171922,794010429,790721973,996077692,917485702,758566474,756759812,767135437,325128093,10906315113
2,iD,0,0,0,0,34430862,109275830,142202485,189396418,279162048,321342148,359664267,459865024,480370002,416745824,427963559,428165596,191281367,3839865430
3,Potlatch,44030413,64726838,100956564,147706379,133843837,78092007,60204602,42988829,33377795,25182374,20637960,17371354,5083644,2678828,2574667,2766754,977964,783200809
4,Rapid,0,0,0,0,0,0,0,0,0,0,11678103,51870526,63919308,63564994,63683830,52467053,20156724,327340538
5,osmtools,150412240,8994328,981921,1229689,2734257,1499370,6826400,15035558,2655505,3271989,7558536,6454470,8152531,12399003,29128241,2154313,578476,260066827
6,bulk_upload.py,64669041,24649625,1442184,759090,185033,8089,1061001,1113638,2398,1036,26933301,263564,464798,0,0,4341,0,121557139
7,StreetComplete,0,0,0,0,0,0,0,85,1256558,1701133,1983723,3934118,12439388,12904369,13630403,12782820,4729050,65361647
8,Merkaartor,5378663,10887187,10250026,9702870,4380406,3791724,3696590,2522894,1735350,2138115,2316287,1745000,334114,491775,123828,96656,150822,59742307
9,upload.py,31167015,10166002,2402891,421238,776590,422731,488194,4827,38997,3021,4195,1853644,4378760,822536,968637,2536270,699623,57155171
10,Vespucci,2087,22298,49455,165722,277566,804178,885787,1055056,1559094,2073139,2296702,3560018,3517790,3864897,4998545,9855980,5241579,40229893

Rank,Editing Software,2022,2023,2024,2025,Total
1,iD,204579,210136,205427,108469,1582010
2,Potlatch,350,234,147,71,358299
3,MAPS.ME,15515,16231,7729,1971,320699
4,JOSM,20537,19741,18127,11346,150483
5,StreetComplete,23525,26809,27111,17443,81672
6,OsmAnd,8623,8544,7875,4082,37854
7,Organic Maps,6258,10557,12765,9308,31215
8,Vespucci,5025,5371,5322,3080,24225
9,Rapid,2183,9121,3662,2801,19754
10,Go Map!!,3720,3830,4162,2568,19035

Rank,Editing Software,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total
1,iD,0,0,0,0,56176,125149,133826,148447,194537,214057,203438,241197,239537,204579,210136,205427,108469,1582010
2,Potlatch,59540,69047,81896,107156,72954,29604,24283,14901,10184,6598,5043,3531,608,350,234,147,71,358299
3,MAPS.ME,0,0,0,0,0,0,0,96584,102375,71199,55719,35483,19974,15515,16231,7729,1971,320699
4,JOSM,13755,18797,20232,23441,23140,21862,22796,22315,23191,22762,22801,22109,21512,20537,19741,18127,11346,150483
5,StreetComplete,0,0,0,0,0,0,0,7,9572,9764,8967,11695,21935,23525,26809,27111,17443,81672
6,OsmAnd,0,192,645,1129,1653,1927,2354,3534,4862,5982,7063,6977,7959,8623,8544,7875,4082,37854
7,Organic Maps,0,0,0,0,0,0,0,0,0,0,0,0,3017,6258,10557,12765,9308,31215
8,Vespucci,58,237,460,957,1622,1801,2075,2379,2920,3331,3632,4314,4539,5025,5371,5322,3080,24225
9,Rapid,0,0,0,0,0,0,0,0,0,0,1105,3709,2124,2183,9121,3662,2801,19754
10,Go Map!!,0,0,0,0,2205,2046,1335,1508,1834,2685,3074,3152,3748,3720,3830,4162,2568,19035

Rank,Editing Software,2022,2023,2024,2025,Total
1,iD,6272457,6313210,6578483,2807097,69931098
2,JOSM,4485893,4068311,3663326,1445593,58772564
3,StreetComplete,2635760,2656229,2846512,1054031,13167505
4,Potlatch,20301,15029,9669,4693,8849121
5,Rapid,400491,353681,279572,110898,2208864
6,MAPS.ME,65282,77202,31327,8887,1914295
7,Go Map!!,165729,180799,199288,82210,1586338
8,Vespucci,162756,184752,178514,77992,1426662
9,OsmAnd,169553,139673,116624,46161,1116339
10,StreetComplete_ee,17650,200267,476284,250391,944592

Rank,Editing Software,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total
1,iD,0,0,0,0,1226853,3809897,4498088,4237327,5157967,5442371,6485929,8756318,8345101,6272457,6313210,6578483,2807097,69931098
2,JOSM,1201169,1759961,1977005,2398402,2542657,2709952,3084451,3207924,3691294,4107664,5058818,6701849,6668295,4485893,4068311,3663326,1445593,58772564
3,StreetComplete,0,0,0,0,0,0,0,86,205010,283824,381815,680376,2423862,2635760,2656229,2846512,1054031,13167505
4,Potlatch,1073205,1126552,1155119,1393596,1156234,1237552,499998,361097,273535,205891,160872,132105,23673,20301,15029,9669,4693,8849121
5,Rapid,0,0,0,0,0,0,0,0,0,0,98897,506030,459295,400491,353681,279572,110898,2208864
6,MAPS.ME,0,0,0,0,0,0,0,373309,462551,324183,284206,180246,107102,65282,77202,31327,8887,1914295
7,Go Map!!,0,0,0,0,45404,61110,110515,71153,112001,118527,123017,140351,176234,165729,180799,199288,82210,1586338
8,Vespucci,673,4141,9036,17380,38902,56550,54829,67760,107835,102336,117745,118168,127293,162756,184752,178514,77992,1426662
9,OsmAnd,0,4784,11445,7284,9771,13418,18671,28559,46542,100395,132194,138102,133163,169553,139673,116624,46161,1116339
10,StreetComplete_ee,0,0,0,0,0,0,0,0,0,0,0,0,0,17650,200267,476284,250391,944592
