# Create SQlite DB
create sqlite db using the example schema.sql

```
sqlite3 osm_project.db < schema.sql
```
Open data base
```
sqlite osm_project.db
```

Import previously created .csv files

```
sqlite> .mode csv
sqlite> .import nodes.csv nodes
sqlite> .import nodes_tags.csv nodes_tags
sqlite> .import ways.csv ways
sqlite> .import ways_nodes.csv ways_nodes
sqlite> .import ways_tags.csv ways_tags
```

INSERT failed: datatype mismatch

-> changed ```codecs.open(NODES_PATH, 'w')``` to ```codecs.open(NODES_PATH, 'wb')``` in function process map to force binary and suppress extra carriage return in .csv-file

In [1]:
import sqlite3
import pandas as pd

# Fetch records from osm_project.db
db = sqlite3.connect("osm_project.db")
c = db.cursor()

check all tables

In [3]:
QUERY = "SELECT * FROM nodes;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['id','lat','lon','user','uid','version','changeset','timestamp'])
print df.head()

       id        lat       lon          user     uid  version  changeset  \
0  496065  54.772389  9.343453        simlox  795290        7   57804251   
1  496066  54.772097  9.344723        simlox  795290        6   57804251   
2  496067  54.771689  9.345011  amd64-online  579833        4   57255314   
3  496068  54.771262  9.345849   Peilscheibe   35560        3    3486195   
4  496069  54.771235  9.346165  amd64-online  579833        4   57255314   

              timestamp  
0  2018-04-04T13:44:12Z  
1  2018-04-04T13:44:12Z  
2  2018-03-16T22:44:50Z  
3  2009-12-29T23:08:24Z  
4  2018-03-16T22:44:50Z  


In [4]:
QUERY = "SELECT * FROM nodes_tags;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['id','key','value','type'])
print df.head()

       id                              key     value type
0  496722             cid_58:tabcd_1:Class     Point  TMC
1  496722         cid_58:tabcd_1:Direction  positive  TMC
2  496722        cid_58:tabcd_1:LCLversion      9.00  TMC
3  496722      cid_58:tabcd_1:LocationCode     44623  TMC
4  496722  cid_58:tabcd_1:NextLocationCode     44624  TMC


In [5]:
QUERY = "SELECT * FROM ways;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['id','user','uid','version','changeset','timestamp'])
print df.head()

        id          user     uid version  changeset             timestamp
0  2557562    Kent Friis  145774      20   18957966  2013-11-17T18:42:57Z
1  2557564  Maarten Deen    9176      19   38933548  2016-04-27T20:32:00Z
2  2862595       MKnight   39774      17   45913518  2017-02-08T11:25:24Z
3  4006063        simlox  795290      18   57804251  2018-04-04T13:44:20Z
4  4586893       MKnight   39774      15   45913518  2017-02-08T11:25:25Z


In [6]:
QUERY = "SELECT * FROM ways_tags;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['id','key','value','type'])
print df.head()

        id       key       value      type
0  2557562      left       track  cycleway
1  2557562   highway    tertiary  cycleway
2  2557562      name  Padborgvej  cycleway
3  2557562   surface     asphalt  cycleway
4  2557564  cycleway        lane   regular


In [7]:
QUERY = "SELECT * FROM ways_nodes;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['id','node_id','position'])
print df.head()

        id    node_id  position
0  2557562  441633170         0
1  2557562   11307072         1
2  2557562  420322536         2
3  2557562  499421438         3
4  2557562   11307073         4


Determine, which users contributed most

In [8]:
QUERY = "SELECT user,count() FROM nodes GROUP BY user ORDER BY count() DESC;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['user', 'count'])
print df.head(20)

             user   count
0      Zartbitter  219739
1   Vasco_da_Gama    8168
2        Hubert87    4904
3           Hjart    4097
4      Kent Friis    2931
5       SpaceCafe    2859
6             xld    1925
7        Markus59    1702
8        Daniel S    1607
9          AWSbot    1500
10        JosClag    1325
11     Mario Link    1270
12         phobie    1264
13        bjay999    1256
14         Justus    1183
15       Abella57    1173
16        LennyFL    1171
17         alfa91    1168
18       OSM-Sven    1137
19         SDFL78    1000


determine all keys

In [10]:
QUERY = "SELECT DISTINCT key,value FROM ways_tags GROUP BY key ORDER BY key;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['key','value'])
print df[180:210]

                    key                            value
180                note               Gebäude abgerissen
181               notes             Weg geht noch weiter
182          octane_100                              yes
183          octane_102                              yes
184           octane_91                               no
185           octane_95                              yes
186           octane_98                              yes
187                odbl                            clean
188              office                        newspaper
189       official_name                Stille Liebe(Ost)
190            old_name                      Schloßallee
191             old_ref                             B 76
192              oneway                              yes
193       opening_hours  Mo-Fr 07:30-16:30; Sa,Su,PH off
194            operator       Bundesrepublik Deutschland
195             organic                             only
196         orientation        

determine the distribution of speed limits

In [11]:
QUERY = "SELECT key,value, count() FROM ways_tags WHERE key='maxspeed' GROUP BY value ORDER BY count() DESC;"
c.execute(QUERY)
rows = c.fetchall()
  
df = pd.DataFrame(rows, columns=['key','value','count'])
print df.head()

        key     value  count
0  maxspeed        30    567
1  maxspeed        50    492
2  maxspeed  DE:urban    113
3  maxspeed      sign     85
4  maxspeed       100     83


In [12]:
db.close()