In [1]:
from collections import defaultdict
from datetime import timedelta
import dateutil.parser
import pandas as pd
from pandas import Timedelta
import requests
import xml.etree.ElementTree as ET

pd.options.display.max_columns = None

In [2]:
challenge_stats = pd.read_csv('data/challenge_14893_tasks.csv')
challenge_stats['MappedOn'] = pd.to_datetime(challenge_stats['MappedOn'])

In [3]:
mapper_status = challenge_stats[['Mapper', 'TaskStatus']].groupby(['Mapper', 'TaskStatus']).size().unstack()
mapper_status.fillna(value=0, inplace=True)
mapper_status = mapper_status.astype(int)
mapper_status.rename({'Already_Fixed': 'Already Fixed', 'Not_An_Issue': 'Not An Issue'}, axis=1, inplace=True)
mapper_status['Tasks total'] = mapper_status.sum(axis=1)
mapper_status = mapper_status.sort_values('Tasks total', ascending=False)
mapper_status = mapper_status[['Tasks total', 'Fixed', 'Already Fixed', 'Not An Issue']]
mapper_status

TaskStatus,Tasks total,Fixed,Already Fixed,Not An Issue
Mapper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
kjon,985,830,145,10
Fischkopp0815,603,371,215,17
fx99,284,256,15,13
BeKri,274,230,22,22
pyram,130,110,11,9
hfs,125,93,2,30
Hiddenhausener,71,63,1,7
DD1GJ,65,0,0,65
_klaas_,48,45,1,2
cepesko,38,30,0,8


In [4]:
mapping_time = challenge_stats[['Mapper', 'MappedOn']].groupby('Mapper').agg(['min', 'max'])
mapping_time.columns = mapping_time.columns.to_flat_index()
mapping_time.columns = ['timestamp_min', 'timestamp_max']
mapping_time.timestamp_min -= Timedelta('6h')
mapping_time.timestamp_max += Timedelta('1d 6h')
mapping_time.timestamp_min = mapping_time.timestamp_min.dt.date
mapping_time.timestamp_max = mapping_time.timestamp_max.dt.date

In [5]:
all_changesets = {}
for mapper, timestamp_min, timestamp_max in mapping_time.itertuples():
    done = False
    timestamp = None
    while not done:
        response = requests.get('https://www.openstreetmap.org/api/0.6/changesets',
                               params={'display_name': mapper, 'time': f'{timestamp_min},{timestamp_max}'})
        response.raise_for_status()
        root = ET.fromstring(response.text)
        if len(root) == 0:
            done = True
        for element in root:
            changeset = element.attrib
            if timestamp is None or changeset['created_at'] < timestamp:
                timestamp = changeset['created_at']
            for tag in element:
                changeset[tag.attrib['k']] = tag.attrib['v']
            all_changesets[changeset['id']] = changeset
        # Paging. Need to subtract one second, because it's an inclusive range
        timestamp_max = (dateutil.parser.isoparse(timestamp) - timedelta(seconds=1)).isoformat()

In [6]:
changesets = pd.DataFrame.from_dict(all_changesets, orient='index')
changesets.comment.fillna(value='', inplace=True)

In [7]:
challenge_cs = changesets[changesets.comment.str.contains('#maproulette') &
                          changesets.comment.str.contains('residential|landuse|building') &
                          ~changesets.comment.str.contains('#osmcha')]

In [8]:
def changeset_stats(changeset_id):
    response = requests.get(f'https://www.openstreetmap.org/api/0.6/changeset/{changeset_id}/download')
    response.raise_for_status()
    root = ET.fromstring(response.text)

    node_create = set()
    node_modify = set()
    stats = defaultdict(int)
    for action in root:
        for nwr in action:
            if nwr.tag == 'node':
                if action.tag == 'create':
                    node_create.add(nwr.attrib['id'])
                elif action.tag == 'modify':
                    node_modify.add(nwr.attrib['id'])
                elif action.tag == 'delete':
                    stats[(action.tag, nwr.tag)] += 1
            else:
                tags = {}
                for child in nwr:
                    if child.tag == 'nd':
                        node_create.discard(child.attrib['ref'])
                    elif child.tag == 'tag':
                        tags[child.attrib['k']] = child.attrib['v']
                if tags.get('landuse', None) == 'residential':
                    stats[(action.tag, 'residential')] += 1
                elif tags.get('landuse', None) == 'farmyard':
                    stats[(action.tag, 'farmyard')] += 1
                elif tags.get('landuse', None) == 'farmland':
                    stats[(action.tag, 'farmland')] += 1
                elif tags.get('landuse', None) == 'industrial':
                    stats[(action.tag, 'industrial')] += 1
                elif 'landuse' in tags:
                    stats[(action.tag, 'other landuse')] += 1
                elif 'highway' in tags:
                    stats[(action.tag, 'highway')] += 1
                elif tags.get('building', None) == 'yes':
                    stats[(action.tag, 'building=yes')] += 1
                elif 'building' in tags:
                    stats[(action.tag, 'building=*')] += 1
                else:
                    stats[(action.tag, nwr.tag)] += 1
    stats[('create', 'node')] += len(node_create)
    stats[('modify', 'node')] += len(node_modify)
    return stats

In [9]:
# Loop over changeset IDs, collect stats per user
user_stats = defaultdict(lambda: defaultdict(int))
for row in challenge_cs.itertuples():
    cs_id = row.id
    mapper = row.user
    cs_stats = changeset_stats(cs_id)
    for category, count in cs_stats.items():
        user_stats[mapper][category] += count

In [10]:
stats_table = pd.DataFrame.from_dict(user_stats, orient='index')
stats_table = stats_table.reindex(['create', 'modify', 'delete'], axis=1, level=0)
stats_table = stats_table.reindex(['residential', 'farmyard', 'farmland', 'industrial', 'other landuse', 'building=yes', 'building=*', 'highway', 'node', 'way', 'relation'], axis=1, level=1)
stats_table = stats_table.reindex(index=mapper_status.index)
stats_table.dropna(how='all', inplace=True)
stats_table.fillna(value=0, inplace=True)
stats_table = stats_table.astype(int)
stats_table = stats_table.append(stats_table.sum(axis=0).rename('total'))
stats_table

Unnamed: 0_level_0,create,create,create,create,create,create,create,create,create,create,create,modify,modify,modify,modify,modify,modify,modify,modify,modify,modify,modify,delete,delete,delete
Unnamed: 0_level_1,residential,farmyard,farmland,industrial,other landuse,building=yes,building=*,highway,node,way,relation,residential,farmyard,farmland,industrial,other landuse,building=yes,building=*,highway,node,way,relation,node,way,relation
Mapper,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
kjon,3831,2773,42,17,433,20,5,315,13,266,1,254,82,321,27,564,49,41,1515,6556,444,21,5769,228,1
Fischkopp0815,1456,1266,0,3,73,4017,236,247,0,51,0,125,89,80,4,139,52,14,485,4022,85,2,307,25,0
fx99,13,6,0,1,8,3422,1270,21,16,4,0,18,2,5,6,33,189,90,53,778,27,0,37,5,0
BeKri,836,497,27,59,244,3901,94,1118,183,1324,2,215,93,263,18,458,308,159,1931,22623,603,40,1653,110,7
pyram,734,218,14,17,120,3128,16,132,8,128,9,114,35,26,14,93,77,42,425,3057,129,14,374,45,1
hfs,220,107,0,1,22,249,1,58,0,14,0,21,2,21,4,54,2,4,76,430,33,1,212,9,0
Hiddenhausener,332,201,3,15,37,1,0,6,0,56,0,23,9,54,9,68,6,4,93,1393,24,0,761,33,0
_klaas_,305,42,0,1,5,449,56,3,1,20,0,6,2,0,0,2,3,0,3,82,0,0,0,0,0
cepesko,33,12,0,5,0,3,0,5,2,0,0,10,0,3,1,10,0,0,8,53,7,0,41,1,0
MKnight,24,0,0,0,0,0,0,0,0,1,0,13,0,0,1,1,0,0,0,12,4,0,10,2,0
