Con este notebook realizamos un par de consultas a una API Web para obtener dos ficheros json que contienen las coordenadas GPS (latitud y longitud) asociadas a los parquímetros de la ciudad de Seattle.

## Consultas API SDOT Pay Station

In [1]:
import os
import urllib.request

def dl_data(url, output):
    try:        
        f = urllib.request.urlopen(url)
        print ("Downloading " + url)

        os.makedirs(os.path.dirname(output), exist_ok=True)
        with open(output, "wb") as local_file:
            local_file.write(f.read())

    except URLError:
        print ("Error", url)

In [2]:
# Url 1000 primeros ids:
url1 = 'http://gisrevprxy.seattle.gov/arcgis/rest/services/SDOT_EXT/DSG_datasharing/MapServer/54/query?where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson'

In [3]:
output1 = './data/paystations_ids_1_1000.json'
dl_data(url1, output1)

Downloading http://gisrevprxy.seattle.gov/arcgis/rest/services/SDOT_EXT/DSG_datasharing/MapServer/54/query?where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson


In [4]:
# Url 800 siguientes ids (Result Offset = 1000):
url2 = 'http://gisrevprxy.seattle.gov/arcgis/rest/services/SDOT_EXT/DSG_datasharing/MapServer/54/query?where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=1000&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson'

In [5]:
output2 = './data/paystations_ids_1001_1800.json'
dl_data(url2, output2)

Downloading http://gisrevprxy.seattle.gov/arcgis/rest/services/SDOT_EXT/DSG_datasharing/MapServer/54/query?where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=1000&resultRecordCount=800&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson


## Consolidación de respuesta en fichero csv

In [6]:
import json
import codecs
import pandas as pd

data1 = json.load(codecs.open(output1, 'r', 'utf-8-sig'))
data2 = json.load(codecs.open(output2, 'r', 'utf-8-sig'))

In [7]:
# Selección de las variables relevantes:

ek1 = [data1['features'][i]['attributes']['ELMNTKEY'] for i in range(len(data1['features']))]
lat1 = [data1['features'][i]['attributes']['SHAPE_LAT'] for i in range(len(data1['features']))]
long1 = [data1['features'][i]['attributes']['SHAPE_LNG'] for i in range(len(data1['features']))]

ek2 = [data2['features'][i]['attributes']['ELMNTKEY'] for i in range(len(data2['features']))]
lat2 = [data2['features'][i]['attributes']['SHAPE_LAT'] for i in range(len(data2['features']))]
long2 = [data2['features'][i]['attributes']['SHAPE_LNG'] for i in range(len(data2['features']))]

In [8]:
d1 = {'element_key': ek1, 'latitude': lat1, 'longitude': long1}
df1 = pd.DataFrame(data = d1)

d2 = {'element_key': ek2, 'latitude': lat2, 'longitude': long2}
df2 = pd.DataFrame(data = d2)

df = pd.concat([df1, df2], axis=0, sort=True)

# Calculamos la media para aquellos element_key que tienen varios puntos de coordenadas:
df_group_ek = df.groupby('element_key')['latitude', 'longitude'].mean()
df_group_ek = df_group_ek.reset_index(level=['element_key'])

df_group_ek.to_csv('./data/Coord_EK.csv', index=False)

In [9]:
coord = pd.read_csv(os.path.join('./data/Coord_EK.csv'))
coord.head()

Unnamed: 0,element_key,latitude,longitude
0,1001,47.602862,-122.334703
1,1002,47.602997,-122.334538
2,1005,47.603602,-122.335382
3,1006,47.603725,-122.335171
4,1009,47.60501,-122.336669
