# Testing PyOsmium Bindings

In [10]:
import osmium as o
import sys

In [104]:
class RoadLengthHandler(o.SimpleHandler):                                                                                           
    def __init__(self):
        o.SimpleHandler.__init__(self)                                                                                              
        self.tags = []
        self.cnt = 0
        self.versions = []
        self.visible = 0
        self.ids = []

    #Only look at ways?
    def way(self, w):
        self.cnt += 1
        self.ids.append(w.id)
        self.tags += [t.k for t in w.tags]
        
        self.versions.append(w.version)

        if w.visible:
            self.visible += 1
            

#         if self.cnt > 10:
#             sys.exit(0)

#     def node(self, n):
#         self.cnt += 1
#         self.tags += [t.k for t in n.tags]

#         if self.cnt > 10:
#             sys.exit(0)

In [105]:
h = RoadLengthHandler()                                                                                                         
# As we need the geometry, the node locations need to be cached. Therefore
# set 'locations' to true.
h.apply_file('/data/osm/crimea-2015.osh.pbf', locations=False)

In [106]:
print('Total tags {0}'.format(len(h.tags)))

Total tags 955429


In [107]:
print('Total visible ways: {0}'.format(h.visible))

Total visible ways: 485258


In [108]:
from collections import Counter

In [109]:
x = Counter(h.tags)

In [110]:
x.most_common()

[('building', 185051),
 ('highway', 179500),
 ('source', 78928),
 ('name', 76308),
 ('landuse', 36935),
 ('name:ru', 36556),
 ('name:uk', 30980),
 ('addr:street', 29978),
 ('addr:housenumber', 29734),
 ('natural', 24040),
 ('name:en', 22109),
 ('surface', 15095),
 ('building:levels', 13844),
 ('oneway', 13601),
 ('maxspeed', 8697),
 ('ref', 8685),
 ('waterway', 7537),
 ('addr:city', 7063),
 ('boundary', 6980),
 ('barrier', 6946),
 ('admin_level', 6691),
 ('amenity', 6634),
 ('created_by', 4787),
 ('railway', 4719),
 ('service', 4667),
 ('place', 4615),
 ('addr:postcode', 4441),
 ('lanes', 4401),
 ('power', 4314),
 ('layer', 3742),
 ('addr:country', 3502),
 ('abutters', 3230),
 ('leisure', 3016),
 ('int_ref', 2986),
 ('electrified', 2942),
 ('gauge', 2849),
 ('tracktype', 2827),
 ('man_made', 2621),
 ('voltage', 2510),
 ('living_street', 2464),
 ('bridge', 2435),
 ('width', 2325),
 ('area', 2243),
 ('addr:interpolation', 2242),
 ('wood', 2221),
 ('bicycle', 2070),
 ('shop', 1764),
 ('re

In [111]:
h.cnt

485258

In [112]:
y = Counter(h.versions)

In [113]:
y.most_common()

[(1, 304727),
 (2, 70894),
 (3, 35577),
 (4, 21204),
 (5, 14148),
 (6, 9490),
 (7, 6517),
 (8, 4583),
 (9, 3408),
 (10, 2599),
 (11, 2038),
 (12, 1618),
 (13, 1314),
 (14, 1089),
 (15, 894),
 (16, 735),
 (17, 630),
 (18, 528),
 (19, 454),
 (20, 380),
 (21, 327),
 (22, 286),
 (23, 246),
 (24, 205),
 (25, 174),
 (26, 146),
 (27, 129),
 (28, 110),
 (29, 101),
 (30, 85),
 (31, 77),
 (32, 72),
 (33, 66),
 (34, 55),
 (35, 46),
 (36, 44),
 (37, 38),
 (38, 30),
 (39, 25),
 (40, 23),
 (41, 19),
 (42, 15),
 (43, 14),
 (44, 13),
 (45, 11),
 (46, 11),
 (47, 11),
 (48, 10),
 (49, 8),
 (50, 7),
 (51, 6),
 (53, 6),
 (54, 6),
 (52, 5),
 (55, 3),
 (56, 1)]

In [114]:
z = Counter(h.ids)

In [115]:
z.most_common()

[(245100349, 56),
 (34929956, 55),
 (112560732, 54),
 (4528352, 54),
 (123685049, 52),
 (30702943, 51),
 (31451001, 50),
 (24508472, 49),
 (24204078, 48),
 (180568642, 48),
 (24732276, 47),
 (121484560, 44),
 (4433994, 44),
 (104184680, 43),
 (24443319, 42),
 (35469256, 42),
 (24785436, 41),
 (23503812, 41),
 (23695202, 40),
 (24933417, 40),
 (24444114, 40),
 (4434117, 40),
 (83296595, 39),
 (28471501, 39),
 (221483701, 38),
 (24444099, 38),
 (106809449, 38),
 (130989029, 38),
 (93294944, 38),
 (24779791, 37),
 (40028575, 37),
 (26934525, 37),
 (24933574, 37),
 (35956513, 37),
 (30716105, 37),
 (24444268, 37),
 (25538191, 37),
 (161167713, 36),
 (39579097, 36),
 (24933419, 36),
 (27738445, 36),
 (130072455, 35),
 (30555413, 35),
 (37911401, 35),
 (130761308, 35),
 (23916495, 35),
 (45111893, 34),
 (24204057, 34),
 (37910605, 34),
 (36354798, 34),
 (32805943, 34),
 (24446727, 34),
 (25050196, 34),
 (27712737, 34),
 (112687556, 34),
 (27323215, 33),
 (131193987, 33),
 (24785455, 33),
 (3

In [116]:
import numpy as np

In [117]:
len(np.unique(h.ids))

304774

In [118]:
len(h.ids)

485258

In [119]:
h.cnt

485258