# Landuse without buildings – mapping progress

In [1]:
from collections import defaultdict
from datetime import timedelta
import dateutil.parser
from pathlib import Path
import xml.etree.ElementTree as ET

import geopandas as gpd
import h3
import libpysal
import pandas as pd
from pandas import Timedelta
import plotly.express as px
import requests
import requests_cache
import seaborn as sns
from shapely.geometry import Polygon
from tqdm import tqdm

Configuration options

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = 1_000
requests_cache.install_cache(backend='sqlite', cache_name='api_cache.sqlite', expire_after=timedelta(days=30))

Fetch the status of all tasks in all challenges using the script `13_challenge_status.py`. This results in a bunch of CSV files in `data/`. Read all the CSV files into a Pandas DataFrame:

In [3]:
challenge_stats = []
for csv_file in Path('data').glob('challenge_28030_tasks.csv'):
    challenge_tasks = pd.read_csv(csv_file, index_col='TaskID', parse_dates=['MappedOn'])
    challenge_stats.append(challenge_tasks)
challenge_stats = pd.concat(challenge_stats)

# Number of tasks completed per mapper

In [4]:
mapper_status = challenge_stats[['Mapper', 'TaskStatus']].groupby(['Mapper', 'TaskStatus']).size().unstack(
    ).fillna(value=0).astype(int)
mapper_status.rename({'Already_Fixed': 'Already Fixed', 'Not_An_Issue': 'Not An Issue'}, axis=1, inplace=True)
mapper_status['Tasks total'] = mapper_status.sum(axis=1)
mapper_status = mapper_status.sort_values('Tasks total', ascending=False)
mapper_status = mapper_status[['Tasks total', 'Fixed', 'Already Fixed', 'Not An Issue']]
completion_time = challenge_stats[['Mapper', 'CompletionTime']].groupby('Mapper').sum()
completion_time['CompletionTime'] = pd.to_timedelta(completion_time['CompletionTime'], unit="ms").round('s')
mapper_status = mapper_status.join(completion_time)
mapper_status[mapper_status['Tasks total'] >= 10]

Unnamed: 0_level_0,Tasks total,Fixed,Already Fixed,Not An Issue,CompletionTime
Mapper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
söm4324,4765,1736,2549,480,8 days 03:14:54
theophrastos,3152,891,298,1963,2 days 22:05:46
Geofreund1,2319,1305,58,956,7 days 13:40:54
fx99,2229,987,40,1202,1 days 19:30:34
hfs,919,491,125,303,2 days 20:13:28
Rainero,907,598,51,258,6 days 07:02:21
puma515,830,167,1,662,0 days 09:23:52
martinst,612,179,72,361,0 days 23:04:22
EvaUnterwegs,547,172,5,370,0 days 15:38:34
googlenaut,546,430,4,112,4 days 15:52:09


In [5]:
mapper_status.count()

Tasks total       186
Fixed             186
Already Fixed     186
Not An Issue      186
CompletionTime    186
dtype: int64

In [6]:
mapper_status[mapper_status['Tasks total'] < 10].count()

Tasks total       117
Fixed             117
Already Fixed     117
Not An Issue      117
CompletionTime    117
dtype: int64

In [7]:
mapper_status[mapper_status['Tasks total'] < 10].sum()

Tasks total                   374
Fixed                         183
Already Fixed                  62
Not An Issue                  129
CompletionTime    2 days 17:54:13
dtype: object

Top 25 mappers. Need it for the dominant mappers map later.

In [8]:
mapper_status[0:25]

Unnamed: 0_level_0,Tasks total,Fixed,Already Fixed,Not An Issue,CompletionTime
Mapper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
söm4324,4765,1736,2549,480,8 days 03:14:54
theophrastos,3152,891,298,1963,2 days 22:05:46
Geofreund1,2319,1305,58,956,7 days 13:40:54
fx99,2229,987,40,1202,1 days 19:30:34
hfs,919,491,125,303,2 days 20:13:28
Rainero,907,598,51,258,6 days 07:02:21
puma515,830,167,1,662,0 days 09:23:52
martinst,612,179,72,361,0 days 23:04:22
EvaUnterwegs,547,172,5,370,0 days 15:38:34
googlenaut,546,430,4,112,4 days 15:52:09


In [9]:
with open('mapper_stats.md', 'w') as outfile:
    outfile.write(mapper_status.to_markdown())

## Progress per week

In [10]:
tasks_per_week = challenge_stats[['MappedOn', 'TaskStatus']].groupby([pd.Grouper(key='MappedOn', freq='W-MON'), 'TaskStatus']).size().unstack().fillna(0).astype(int)
tasks_per_week = tasks_per_week.filter(regex=r'Fixed|Not_An_Issue|Already_Fixed|Too_Hard|Skipped').rename(columns={
    'Not_An_Issue': 'Not An Issue',
    'Already_Fixed': 'Already Fixed',
    'Too_Hard': 'Too Hard',
})
fig = px.bar(tasks_per_week, labels={
    'MappedOn': 'Kalenderwoche',
    'value': 'Anzahl erledigter Aufgaben',
    'TaskStatus': 'Status',
})
fig.update_xaxes(
    dtick=7*24*3600*1000,
    tickformat='%V\n%Y',
)
fig.show()

# Detailed changes per mapper

Find the date range where mappers have been working on the challenges.

In [11]:
mapping_time = challenge_stats[['Mapper', 'MappedOn']].groupby('Mapper').agg(['min', 'max'])
mapping_time.columns = mapping_time.columns.to_flat_index()
mapping_time.columns = ['timestamp_min', 'timestamp_max']
mapping_time.timestamp_min -= Timedelta('6h')
mapping_time.timestamp_max += Timedelta('1d 6h')
mapping_time.timestamp_min = mapping_time.timestamp_min.dt.date
mapping_time.timestamp_max = mapping_time.timestamp_max.dt.date

For all mappers, fetch all changeset summaries in the data range where they might have worked on the challenges.

In [12]:
all_changesets = {}
for mapper, timestamp_min, timestamp_max in tqdm(mapping_time.itertuples(), total=len(mapping_time)):
    done = False
    timestamp = None
    while not done:
        response = requests.get('https://www.openstreetmap.org/api/0.6/changesets',
                               params={'display_name': mapper, 'time': f'{timestamp_min},{timestamp_max}'})
        if response.ok:
            root = ET.fromstring(response.text)
            if len(root) == 0:
                done = True
            else:
                for element in root:
                    changeset = element.attrib
                    if timestamp is None or changeset['created_at'] < timestamp:
                        timestamp = changeset['created_at']
                    for tag in element:
                        changeset[tag.attrib['k']] = tag.attrib['v']
                    all_changesets[changeset['id']] = changeset
                # Paging. Need to subtract one second, because it's an inclusive range
                timestamp_max = (dateutil.parser.isoparse(timestamp) - timedelta(seconds=1)).isoformat()
        else:
            done = True
            print(f"WARNING: {response.status_code} {response.reason} for URL: {response.request.url}")

100%|██████████| 186/186 [00:02<00:00, 73.27it/s] 


In [13]:
changesets = pd.DataFrame.from_dict(all_changesets, orient='index')
changesets.comment.fillna(value='', inplace=True)

In [14]:
challenge_cs = changesets[changesets.comment.str.contains('#maproulette') &
                          changesets.comment.str.contains("https://maproulette.org/browse/challenges/28030")]

In [15]:
def changeset_stats(changeset_id):
    response = requests.get(f'https://www.openstreetmap.org/api/0.6/changeset/{changeset_id}/download')
    response.raise_for_status()
    root = ET.fromstring(response.text)

    node_create = set()
    node_modify = set()
    stats = defaultdict(int)
    building_timestamps = defaultdict(int)
    for action in root:
        for nwr in action:
            if nwr.tag == 'node':
                if action.tag == 'create':
                    node_create.add(nwr.attrib['id'])
                elif action.tag == 'modify':
                    node_modify.add(nwr.attrib['id'])
                elif action.tag == 'delete':
                    stats[(action.tag, nwr.tag)] += 1
            else:
                tags = {}
                for child in nwr:
                    if child.tag == 'nd':
                        node_create.discard(child.attrib['ref'])
                    elif child.tag == 'tag':
                        tags[child.attrib['k']] = child.attrib['v']
                if tags.get('building', None) == 'yes':
                    stats[(action.tag, 'building=yes')] += 1
                    building_timestamps[nwr.attrib['timestamp']] += 1
                elif 'building' in tags:
                    stats[(action.tag, 'building=*')] += 1
                    building_timestamps[nwr.attrib['timestamp']] += 1
                elif 'landuse' in tags:
                    stats[(action.tag, 'landuse')] += 1
                elif 'highway' in tags:
                    stats[(action.tag, 'highway')] += 1
                else:
                    stats[(action.tag, nwr.tag)] += 1
    stats[('create', 'node')] += len(node_create)
    stats[('modify', 'node')] += len(node_modify)
    return stats, building_timestamps

In [16]:
# Loop over changeset IDs, collect stats per user
user_stats = defaultdict(lambda: defaultdict(int))
building_timestamps = []
for row in tqdm(challenge_cs.itertuples(), total=len(challenge_cs)):
    cs_id = row.id
    mapper = row.user
    cs_stats, cs_building_timestamps = changeset_stats(cs_id)
    for category, count in cs_stats.items():
        user_stats[mapper][category] += count
    building_timestamps.extend(cs_building_timestamps.items())

100%|██████████| 9358/9358 [00:20<00:00, 448.37it/s]


In [17]:
stats_table = pd.DataFrame.from_dict(user_stats, orient='index')
stats_table = stats_table.reindex(['create', 'modify', 'delete'], axis=1, level=0)
stats_table = stats_table.reindex(['building=yes', 'building=*', 'landuse', 'highway', 'node', 'way', 'relation'], axis=1, level=1)
stats_table = stats_table.reindex(index=mapper_status.index)
stats_table.dropna(how='all', inplace=True)
stats_table.fillna(value=0, inplace=True)
stats_table = stats_table.astype(int)
completion_time.columns = pd.MultiIndex.from_tuples([('create', 'CompletionTime')])
stats_table = stats_table.join(completion_time)
stats_table = stats_table.append(stats_table.sum(axis=0).rename('total'))
stats_table[('create', 'buildings per hour')] = ((stats_table[('create', 'building=yes')] + stats_table[('create', 'building=*')]) /
    (stats_table[('create', 'CompletionTime')] / pd.Timedelta(hours=1))).fillna(0).round().astype(int)
stats_table.drop(columns=[('create', 'CompletionTime')], inplace=True)
cols = stats_table.columns.tolist()
cols = cols[0:2] + [cols[-1]] + cols[2:-1]
stats_table = stats_table[cols]
stats_table


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0_level_0,create,create,create,create,create,create,create,create,modify,modify,modify,modify,modify,modify,modify,delete,delete,delete
Unnamed: 0_level_1,building=yes,building=*,buildings per hour,landuse,highway,node,way,relation,building=yes,building=*,landuse,highway,node,way,relation,node,way,relation
Mapper,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
söm4324,41547,176,214,5,21,15,140,0,1425,319,98,300,33747,306,7,3068,356,1
theophrastos,8456,257,124,166,21,17,132,0,410,166,567,233,11534,137,8,701,61,1
Geofreund1,21718,1485,128,43,138,2077,336,7,4558,1480,268,738,17122,664,46,1783,136,0
fx99,10543,311,249,150,18,14,76,0,83,126,686,84,3754,222,0,637,43,3
hfs,8153,9,120,113,50,14,134,0,1303,158,202,95,5260,108,0,892,54,2
Rainero,11886,3465,102,110,65,20,174,0,1250,446,432,559,20192,189,12,1473,113,7
puma515,726,283,107,3,2,9,49,2,68,86,31,23,1379,18,0,73,4,0
martinst,2237,3,97,0,17,2,364,0,80,19,28,24,927,10,0,42,6,0
EvaUnterwegs,4519,8,289,3,4,0,2,0,223,47,44,8,2259,12,0,112,10,0
googlenaut,14799,35,133,82,18,0,410,0,929,254,92,59,14061,75,0,1186,135,2


In [18]:
with open('stats_detail_table.md', 'w') as outfile:
    outfile.write(stats_table.to_markdown())

# “Not an issue” tasks depending on the size

In [19]:
challenge_details = gpd.read_file('data/challenge_28030_tasks.geojson')[['area', 'building_fraction', 'mr_taskStatus']]
challenge_details['area'] = challenge_details['area'].astype(float)
challenge_details['building_fraction'] = challenge_details['building_fraction'].astype(float) * 100.0
challenge_details['is_fixed'] = (challenge_details['mr_taskStatus'] == "Fixed").astype(int) * 100.0
challenge_details

Unnamed: 0,area,building_fraction,mr_taskStatus,is_fixed
0,15774.0,3.67,Not_An_Issue,0.0
1,20407.0,1.30,Fixed,100.0
2,28354.0,1.15,Not_An_Issue,0.0
3,948.0,0.00,Not_An_Issue,0.0
4,7015.0,3.48,Not_An_Issue,0.0
...,...,...,...,...
21050,10708.0,0.00,Not_An_Issue,0.0
21051,12736.0,2.96,Fixed,100.0
21052,54294.0,2.44,Not_An_Issue,0.0
21053,31697.0,1.37,Already_Fixed,0.0


In [20]:
fig = px.density_heatmap(data_frame=challenge_details, x='area', y='building_fraction', z='is_fixed',
                   histfunc='avg', text_auto='.0f', height=800, marginal_x='histogram', marginal_y='histogram',
                   color_continuous_scale=px.colors.sequential.Viridis,
                   labels={'building_fraction': 'Anteil der landuse-Fläche durch Gebäude bedeckt [%]', 'area': 'landuse-Fläche [m²]'},
                   title="Anteil der Tasks, bei denen tatsächlich etwas zu tun war")
fig.update_layout(coloraxis_colorbar=dict(title="Anteil “Fixed” [%]"))
fig.show()

# Map challenge tasks on H3

In [21]:
H3_RESOLUTION = 6

In [22]:
all_tasks = []
for state in Path('data').glob('challenge_28030_tasks.geojson'):
    tasks = gpd.read_file(state).set_index('id')
    tasks.geometry = tasks.geometry.centroid
    tasks['h3'] = tasks.geometry.apply(lambda row: h3.geo_to_h3(row.y, row.x, H3_RESOLUTION))
    tasks = tasks[['h3']]
    all_tasks.append(tasks)
all_tasks = pd.concat(all_tasks)
all_tasks


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




Unnamed: 0_level_0,h3
id,Unnamed: 1_level_1
https://osm.org/way/840036029//48,861f0ed57ffffff
https://osm.org/way/89503156//9,861f0ed17ffffff
https://osm.org/way/126610122//1,861f03a6fffffff
https://osm.org/way/156646762//5,861f03a6fffffff
https://osm.org/relation/3097787//10,861fa13a7ffffff
...,...
https://osm.org/relation/7620926//5,861f1aa67ffffff
https://osm.org/way/146636370//1,861f15ab7ffffff
https://osm.org/way/154877186//3,861f068a7ffffff
https://osm.org/way/166508965//2,861f1eaefffffff


In [23]:
challenge_tasks = challenge_stats.set_index('TaskName').join(all_tasks)

In [24]:
challenge_tasks

Unnamed: 0,TaskLink,ChallengeID,ChallengeLink,TaskStatus,TaskPriority,MappedOn,CompletionTime,Mapper,ReviewStatus,Reviewer,ReviewedAt,ReviewTimeSeconds,AdditionalReviewers,Comments,BundleId,IsBundlePrimary,Tags,h3
https://osm.org/relation/10019133//32,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Not_An_Issue,Medium,2022-11-18 17:51:34.325000+00:00,12811,EvaUnterwegs,,,,,,,,,,861f1388fffffff
https://osm.org/relation/10091308//44,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Not_An_Issue,Medium,2022-09-28 05:01:35.377000+00:00,3074,fx99,,,,,,,,,,861faa077ffffff
https://osm.org/relation/10092110//12,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Fixed,Low,2022-07-23 04:14:48.593000+00:00,229700,Fischkopp0815,,,,,,,,,,861f33b57ffffff
https://osm.org/relation/10092110//3,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Fixed,Low,2022-07-23 04:09:22.542000+00:00,330514,Fischkopp0815,,,,,,,,,,861f33b5fffffff
https://osm.org/relation/10094786//1,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Fixed,Low,2023-02-22 20:40:05.386000+00:00,143764,Rainero,,,,,,,,,,861f1a16fffffff
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
https://osm.org/way/99912338//2,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Not_An_Issue,High,2022-12-19 09:04:46.201000+00:00,96159,Geofreund1,,,,,,,,,,861f12877ffffff
https://osm.org/way/999334343//15,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Already_Fixed,Low,2023-02-26 20:09:44.914000+00:00,49217,söm4324,,,,,,,,,,861f1a647ffffff
https://osm.org/way/999334343//16,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Already_Fixed,Low,2023-02-26 20:10:02.659000+00:00,17336,söm4324,,,,,,,,,,861f1a647ffffff
https://osm.org/way/999334343//7,[[hyperlink URL link=https://maproulette.org/c...,28030,[[hyperlink URL link=https://maproulette.org/b...,Already_Fixed,Low,2023-02-26 20:20:34.190000+00:00,631103,söm4324,,,,,,söm4324: -,,,,861f1a647ffffff


## Percentage of "Not An Issue"

In [25]:
task_completion = challenge_tasks.groupby(['h3', 'TaskStatus'])[['TaskStatus']].count().unstack(fill_value=0)
task_completion.columns = task_completion.columns.droplevel()
task_completion['total'] = task_completion.sum(axis=1)
task_completion['closed'] = task_completion[['Already_Fixed', 'Fixed', 'Not_An_Issue']].sum(axis=1)
task_completion

TaskStatus,Already_Fixed,Fixed,Not_An_Issue,total,closed
h3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
861e265b7ffffff,0,3,1,4,4
861e26c87ffffff,0,1,0,1,1
861e26c97ffffff,2,3,2,7,7
861e26ca7ffffff,0,4,0,4,4
861e26cb7ffffff,0,1,0,1,1
...,...,...,...,...,...
861faed97ffffff,0,1,0,1,1
861faed9fffffff,0,3,0,3,3
861faeda7ffffff,0,0,1,1,1
861faedafffffff,0,5,3,8,8


In [26]:
task_completion['NAI_Share'] = task_completion[['Already_Fixed', 'Not_An_Issue']].sum(axis=1) / task_completion['closed']

In [27]:
h3_to_geo = lambda h: Polygon(h3.h3_to_geo_boundary(h, geo_json=True))

In [28]:
h3_geometry = task_completion.reset_index()['h3'].apply(h3_to_geo)
h3_geometry = h3_geometry.set_axis(task_completion.index)
task_completion = gpd.GeoDataFrame(task_completion, geometry=h3_geometry, crs=4326)


In [29]:
task_completion.to_file('data/task_completion_stats.geojson')

## Regional dominance of mappers

Join the tasks by ID with the input GeoJSON to find the centroid coordinates. Map each coordinate pair to a [H3](https://h3geo.org/) hexagon. Then find which mapper has solved the most tasks per hexagon.

For each hexagon count how many tasks each mapper completed.

In [30]:
tasks_on_h3 = challenge_tasks[~challenge_tasks.Mapper.isna()].groupby(['h3', 'Mapper'])[['TaskStatus']].count()
tasks_on_h3.columns = ['count']
tasks_on_h3 = tasks_on_h3.reset_index(level=1, drop=False).sort_values('count', ascending=False)

For each hexagon only keep the mapper with the most tasks.

In [31]:
dominant_mapper = tasks_on_h3[~tasks_on_h3.index.duplicated(keep='first')]

In [32]:
dominant_mapper['h3'] = dominant_mapper.index
dominant_mapper['geometry'] = dominant_mapper['h3'].apply(h3_to_geo)
dominant_mapper = gpd.GeoDataFrame(dominant_mapper, geometry=dominant_mapper.geometry, crs=4326).drop(columns=['h3'])
dominant_mapper.to_file('data/dominant_mapper.geojson', driver='GeoJSON')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Merge touching hexagons into contiguous polygons, for nicer map visualization.

In [33]:
# create spatial weights matrix
spatial_weights = libpysal.weights.Queen.from_dataframe(dominant_mapper)

# get component labels
components = spatial_weights.component_labels
dominant_mapper['component'] = components

dominant_combined = dominant_mapper.dissolve(by=['component', 'Mapper'], as_index=False, aggfunc='sum')


The weights matrix is not fully connected: 
 There are 204 disconnected components.
 There are 100 islands with ids: 478, 678, 877, 957, 1218, 1241, 1243, 1269, 1510, 2047, 2203, 2255, 2470, 2511, 2547, 2715, 2742, 3002, 3043, 3133, 3173, 3203, 3250, 3324, 3328, 3493, 3527, 3531, 3579, 3581, 3587, 3603, 3668, 3674, 3675, 3679, 3767, 3770, 3794, 3795, 3821, 3842, 3857, 3905, 3949, 4095, 4243, 4306, 4454, 4474, 4503, 4504, 4527, 4606, 4613, 4614, 4615, 4620, 4622, 4648, 4655, 4763, 4866, 4874, 4886, 4942, 4949, 4965, 4968, 4999, 5072, 5075, 5093, 5105, 5109, 5122, 5182, 5295, 5390, 5574, 5605, 5621, 5636, 5655, 5835, 5879, 6020, 6025, 6026, 6027, 6031, 6036, 6053, 6064, 6080, 6137, 6138, 6299, 6301, 6302.



In [34]:
dominant_combined = dominant_combined.merge(mapper_status[['Tasks total']], left_on='Mapper', right_index=True, how='left')

In [35]:
dominant_combined.drop(columns=['component']).to_file('data/dominant_mapper_dissolved.geojson', driver='GeoJSON')

## Last resolved timestamp per hexagon

In [36]:
challenge_tasks['MappedOn'].min()

Timestamp('2022-07-17 13:44:18.306000+0000', tz='UTC')

In [37]:
task_finish_date = challenge_tasks[~challenge_tasks['MappedOn'].isna()].groupby('h3')[['MappedOn']].max()
task_finish_date

Unnamed: 0_level_0,MappedOn
h3,Unnamed: 1_level_1
861e265b7ffffff,2023-02-20 11:39:34.059000+00:00
861e26c87ffffff,2023-02-21 13:39:32.177000+00:00
861e26c97ffffff,2023-02-26 06:52:12.217000+00:00
861e26ca7ffffff,2023-02-21 14:24:19.402000+00:00
861e26cb7ffffff,2022-08-15 12:56:57.157000+00:00
...,...
861faed97ffffff,2022-11-08 06:30:03.660000+00:00
861faed9fffffff,2023-01-04 05:41:32.644000+00:00
861faeda7ffffff,2022-12-21 06:09:39.991000+00:00
861faedafffffff,2023-01-03 06:32:56.388000+00:00


In [38]:
task_finish_date['h3'] = task_finish_date.index
task_finish_date['geometry'] = task_finish_date['h3'].apply(h3_to_geo)
task_finish_date = gpd.GeoDataFrame(task_finish_date, geometry=task_finish_date.geometry, crs=4326).drop(columns=['h3'])
task_finish_date.to_file('data/task_finish_date.geojson', driver='GeoJSON')

## Impact on buildings in Germany

Did we make a dent in the buildings mapped?

In [39]:
with open('germany.geojson', 'r') as infile:
    germany = infile.read()

In [None]:
request_form = {
    'showMetadata': 'true',
    'time': f'2022-07-11T00:00:00Z/2023-03-05T00:00:00Z/P1W',
    'bpolys': germany,
    'filter': '(type:node or type:way or type:relation) and "building"=*',
}
ohsome_response = requests.post('https://api.ohsome.org/v1/elements/count', data=request_form)
ohsome_response.raise_for_status()

In [None]:
building_timestamps_df = pd.DataFrame(building_timestamps, columns=['timestamp', 'count'])
building_timestamps_df['timestamp'] = pd.to_datetime(building_timestamps_df['timestamp'])
building_timestamps_df

Unnamed: 0,timestamp,count
0,2022-08-19 19:55:29+00:00,1
1,2022-08-19 17:15:54+00:00,1
2,2022-08-19 17:12:26+00:00,1
3,2022-08-17 12:03:00+00:00,1
4,2022-08-17 11:58:05+00:00,3
...,...,...
7817,2022-07-22 22:15:22+00:00,12
7818,2023-02-28 22:03:43+00:00,25
7819,2023-02-28 21:59:50+00:00,19
7820,2023-02-28 21:57:59+00:00,46


Count added buildings per week

In [None]:
buildings_per_week = building_timestamps_df.groupby(pd.Grouper(key='timestamp', freq='W-MON')).sum().cumsum().rename(columns={'count': 'challenge'})
buildings_per_week

Unnamed: 0_level_0,challenge
timestamp,Unnamed: 1_level_1
2022-07-18 00:00:00+00:00,262
2022-07-25 00:00:00+00:00,8216
2022-08-01 00:00:00+00:00,12750
2022-08-08 00:00:00+00:00,16960
2022-08-15 00:00:00+00:00,24279
2022-08-22 00:00:00+00:00,27819
2022-08-29 00:00:00+00:00,31348
2022-09-05 00:00:00+00:00,33843
2022-09-12 00:00:00+00:00,35301
2022-09-19 00:00:00+00:00,40009


In [None]:
ohsome_result = ohsome_response.json()['result']
buildings_germany = pd.DataFrame.from_records(ohsome_result)
buildings_germany['timestamp'] = pd.to_datetime(buildings_germany['timestamp'])
buildings_germany = buildings_germany.set_index('timestamp').rename(columns={'value': 'buildings'})
buildings_germany

Unnamed: 0_level_0,buildings
timestamp,Unnamed: 1_level_1
2022-07-11 00:00:00+00:00,35319917.0
2022-07-18 00:00:00+00:00,35345856.0
2022-07-25 00:00:00+00:00,35373092.0
2022-08-01 00:00:00+00:00,35405706.0
2022-08-08 00:00:00+00:00,35437380.0
2022-08-15 00:00:00+00:00,35469994.0
2022-08-22 00:00:00+00:00,35495421.0
2022-08-29 00:00:00+00:00,35518103.0
2022-09-05 00:00:00+00:00,35541602.0
2022-09-12 00:00:00+00:00,35561266.0


In [None]:
building_stats = buildings_germany.join(buildings_per_week, how='inner')
building_stats['without_challenge'] = building_stats['buildings'] - building_stats['challenge']
building_stats

Unnamed: 0_level_0,buildings,challenge,without_challenge
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-07-18 00:00:00+00:00,35345856.0,262,35345594.0
2022-07-25 00:00:00+00:00,35373092.0,8216,35364876.0
2022-08-01 00:00:00+00:00,35405706.0,12750,35392956.0
2022-08-08 00:00:00+00:00,35437380.0,16960,35420420.0
2022-08-15 00:00:00+00:00,35469994.0,24279,35445715.0
2022-08-22 00:00:00+00:00,35495421.0,27819,35467602.0
2022-08-29 00:00:00+00:00,35518103.0,31348,35486755.0
2022-09-05 00:00:00+00:00,35541602.0,33843,35507759.0
2022-09-12 00:00:00+00:00,35561266.0,35301,35525965.0
2022-09-19 00:00:00+00:00,35582808.0,40009,35542799.0


In [None]:
y_minmax = (building_stats['without_challenge'].min(), building_stats['buildings'].max())
fig = px.area(building_stats.rename(columns={
            "without_challenge": "Added outside challenge",
            "challenge": "Added by challenge",
        }), building_stats.index, y=["Added outside challenge", "Added by challenge"], range_y=y_minmax,
        title="Number of buildings in Germany", labels={
            "x": "Date",
            "value": "Number of buildings"
        })
fig.show()