In [1]:
import requests
import json
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
from datetime import datetime

In [2]:
import os
import geopandas as gpd

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

USER = os.environ["USER"]
PASSWORD = os.environ["PASSWORD"]
DOMAIN = os.getenv('DOMAIN')

In [4]:
#API access to get bike stations location

url = "https://openapi.emtmadrid.es/v1/mobilitylabs/user/login/"

headers = {
    'email': USER + "@" + DOMAIN,
    'password': PASSWORD 
    }

response = requests.request("GET", url, headers=headers)
#print(response.text)

valid_token = json.loads(response.text)["data"][0]["accessToken"]

In [5]:
stations_url = "https://openapi.emtmadrid.es/v1/transport/bicimad/stations/"

headers = {
    'accesstoken': valid_token

    }

response = requests.request("GET", stations_url, headers=headers)

stations_json_raw = response.json()


In [6]:
df_raw = pd.DataFrame(stations_json_raw['data'])
df_raw['id_'] = df_raw.index

In [7]:
# The dataset with the stations information
df_raw.head()

Unnamed: 0,id,name,light,number,address,activate,no_available,total_bases,dock_bikes,free_bases,reservations_count,geometry,id_
0,1,Puerta del Sol A,2,1a,Puerta del Sol nº 1,1,0,30,12,18,0,"{'type': 'Point', 'coordinates': [-3.7018341, ...",0
1,2,Puerta del Sol B,2,1b,Puerta del Sol nº 1,1,0,30,17,13,0,"{'type': 'Point', 'coordinates': [-3.701602938...",1
2,3,Miguel Moya,0,2,Calle Miguel Moya nº 1,1,0,24,4,19,0,"{'type': 'Point', 'coordinates': [-3.7058415, ...",2
3,4,Plaza Conde Suchil,0,3,Plaza del Conde del Valle de Súchil nº 3,1,0,18,4,12,0,"{'type': 'Point', 'coordinates': [-3.7069171, ...",3
4,5,Malasaña,0,4,Calle Manuela Malasaña nº 5,1,0,24,5,18,0,"{'type': 'Point', 'coordinates': [-3.7025875, ...",4


In [8]:
df_raw["geometry"][0] # latitud longitud

{'type': 'Point', 'coordinates': [-3.7018341, 40.4172137]}

In [9]:
df_geo = pd.DataFrame(df_raw["geometry"].values.tolist(), index=df_raw.index)
df_geo.head(2)

Unnamed: 0,type,coordinates
0,Point,"[-3.7018341, 40.4172137]"
1,Point,"[-3.701602938060457, 40.41731271011562]"


In [22]:
df_geo[['Longitude', 'Latitude']] = pd.DataFrame(df_geo.coordinates.tolist(), index= df_geo.index)
df_geo["id_"] = df_geo.index
df_geo.head(2)

Unnamed: 0,type,coordinates,Longitude,Latitude,id_
0,Point,"[-3.7018341, 40.4172137]",-3.701834,40.417214,0
1,Point,"[-3.701602938060457, 40.41731271011562]",-3.701603,40.417313,1


In [23]:
gdf = gpd.GeoDataFrame(df_raw, geometry=gpd.points_from_xy(df_geo.Longitude, df_geo.Latitude))
gdf.head(2)

Unnamed: 0,id,name,light,number,address,activate,no_available,total_bases,dock_bikes,free_bases,reservations_count,geometry,id_
0,1,Puerta del Sol A,2,1a,Puerta del Sol nº 1,1,0,30,12,18,0,POINT (-3.70183 40.41721),0
1,2,Puerta del Sol B,2,1b,Puerta del Sol nº 1,1,0,30,17,13,0,POINT (-3.70160 40.41731),1


In [24]:
bike_station_df = pd.merge(df_raw, df_geo, on='id_')

In [25]:
bike_station_df.head()

Unnamed: 0,id,name,light,number,address,activate,no_available,total_bases,dock_bikes,free_bases,reservations_count,geometry,id_,type,coordinates,Longitude,Latitude
0,1,Puerta del Sol A,2,1a,Puerta del Sol nº 1,1,0,30,12,18,0,POINT (-3.70183 40.41721),0,Point,"[-3.7018341, 40.4172137]",-3.701834,40.417214
1,2,Puerta del Sol B,2,1b,Puerta del Sol nº 1,1,0,30,17,13,0,POINT (-3.70160 40.41731),1,Point,"[-3.701602938060457, 40.41731271011562]",-3.701603,40.417313
2,3,Miguel Moya,0,2,Calle Miguel Moya nº 1,1,0,24,4,19,0,POINT (-3.70584 40.42059),2,Point,"[-3.7058415, 40.4205886]",-3.705842,40.420589
3,4,Plaza Conde Suchil,0,3,Plaza del Conde del Valle de Súchil nº 3,1,0,18,4,12,0,POINT (-3.70692 40.43029),3,Point,"[-3.7069171, 40.4302937]",-3.706917,40.430294
4,5,Malasaña,0,4,Calle Manuela Malasaña nº 5,1,0,24,5,18,0,POINT (-3.70259 40.42855),4,Point,"[-3.7025875, 40.4285524]",-3.702587,40.428552


In [26]:
air_q_stations = pd.read_csv('../data/informacion_estaciones_red_calidad_aire.csv', error_bad_lines=False, sep = ';', encoding = "ISO-8859-1")
air_q_stations.head(2)

Unnamed: 0,CODIGO,CODIGO_CORTO,ESTACION,DIRECCION,LONGITUD_ETRS89,LATITUD_ETRS89,ALTITUD,COD_TIPO,NOM_TIPO,NO2,...,HC,COD_VIA,VIA_CLASE,VIA_PAR,VIA_NOMBRE,Fecha alta,COORDENADA_X_ETRS89,COORDENADA_Y_ETRS89,LONGITUD,LATITUD
0,28079004,4,Pza. de España,Plaza de España,"3°42'43.91""O","40°25'25.98""N",637,UT,Urbana tráfico,X,...,,273600,PLAZA,DE,ESPAÑA,01/12/1998,4395793291,4475049263,-3.712257,40.423882
1,28079008,8,Escuelas Aguirre,Entre C/ Alcalá y C/ O Donell,"3°40'56.22""O","40°25'17.63""N",672,UT,Urbana tráfico,X,...,X,18900,CALLE,DE,ALCALA,01/12/1998,4421172366,4474770696,-3.682316,40.421553


In [94]:
location = [40.41955949449261,-3.6888147164886242]
air_Map = folium.Map(
                location = location, 
                zoom_start = 14)

latitudes = list(air_q_stations.LONGITUD)
longitudes = list(air_q_stations.LATITUD)
labels = list(air_q_stations.ESTACION)
for lat, lng, label in zip(latitudes, longitudes, labels):
    folium.Marker(
      location = [lng, lat], 
      popup = label,
      icon = folium.Icon()
     ).add_to(air_Map)
#air_Map

In [28]:
#hacer geopoints y hacer listado de puntos a max 15 m de distancia
air_stations_df = gpd.GeoDataFrame(air_q_stations, geometry=gpd.points_from_xy(air_q_stations.LATITUD, air_q_stations.LONGITUD))

In [29]:
air_stations_df.columns

Index(['CODIGO', 'CODIGO_CORTO', 'ESTACION', 'DIRECCION', 'LONGITUD_ETRS89',
       'LATITUD_ETRS89', 'ALTITUD', 'COD_TIPO', 'NOM_TIPO', 'NO2', 'SO2', 'CO',
       'PM10', 'PM2_5', 'O3', 'BTX', 'HC', 'COD_VIA', 'VIA_CLASE', 'VIA_PAR',
       'VIA_NOMBRE', 'Fecha alta', 'COORDENADA_X_ETRS89',
       'COORDENADA_Y_ETRS89', 'LONGITUD', 'LATITUD', 'geometry'],
      dtype='object')

In [30]:
def min_distance(bici_pts, air_pts):
    return bici_pts.distance(air_pts).min()

In [31]:
bikes_geolocation = bike_station_df[["id", "name", "address", "total_bases", "geometry", "Latitude", "Longitude"]]
display(bikes_geolocation.head())

Unnamed: 0,id,name,address,total_bases,geometry,Latitude,Longitude
0,1,Puerta del Sol A,Puerta del Sol nº 1,30,POINT (-3.70183 40.41721),40.417214,-3.701834
1,2,Puerta del Sol B,Puerta del Sol nº 1,30,POINT (-3.70160 40.41731),40.417313,-3.701603
2,3,Miguel Moya,Calle Miguel Moya nº 1,24,POINT (-3.70584 40.42059),40.420589,-3.705842
3,4,Plaza Conde Suchil,Plaza del Conde del Valle de Súchil nº 3,18,POINT (-3.70692 40.43029),40.430294,-3.706917
4,5,Malasaña,Calle Manuela Malasaña nº 5,24,POINT (-3.70259 40.42855),40.428552,-3.702587


In [33]:
air_geolocation = air_stations_df[["CODIGO", "ESTACION", "LONGITUD", "LATITUD","geometry"]]
air_geolocation.head()

Unnamed: 0,CODIGO,ESTACION,LONGITUD,LATITUD,geometry
0,28079004,Pza. de España,-3.712257,40.423882,POINT (40.42388 -3.71226)
1,28079008,Escuelas Aguirre,-3.682316,40.421553,POINT (40.42155 -3.68232)
2,28079011,Avda. Ramón y Cajal,-3.677349,40.451473,POINT (40.45147 -3.67735)
3,28079016,Arturo Soria,-3.639242,40.440046,POINT (40.44005 -3.63924)
4,28079017,Villaverde,-3.713317,40.347147,POINT (40.34715 -3.71332)


In [36]:
from math import sin, cos, sqrt, atan2, radians

# approximate radius of earth in km
R = 6373.0

lat1 = radians(52.2296756)
lon1 = radians(21.0122287)
lat2 = radians(52.406374)
lon2 = radians(16.9251681)

dlon = lon2 - lon1
dlat = lat2 - lat1

a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
c = 2 * atan2(sqrt(a), sqrt(1 - a))

distance = R * c

print("Result:", distance, "km")

def _distance(lat1, lon1, lat2, lon2):
    R = 6373.0 # approximate radius of earth in km

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c * 1000 # m
    return distance

Result: 278.54558935106695
Should be: 278.546 km


In [48]:
# bikes_geolocation air_geolocation
# for each bike station, which air station it's closer
lat_bike = bikes_geolocation.Latitude.loc[0]
lon_bike = bikes_geolocation.Longitude.loc[0]
lista_ = []
for code, lat, lon in zip(list(air_geolocation.CODIGO), list(air_geolocation.LATITUD), list(air_geolocation.LONGITUD)):
    lista_.append((code, _distance(lat_bike,lon_bike, lat, lon)))
    
min(lista_, key = lambda t: t[1])

(28079035, 248.97012113475435)

In [51]:
all_distances = [(code, _distance(lat_bike,lon_bike, lat, lon)) for code, lat, lon in zip(list(air_geolocation.CODIGO), list(air_geolocation.LATITUD), list(air_geolocation.LONGITUD))]
min_station_distance = min(all_distances, key = lambda t: t[1])
station_code = min_station_distance[0]
distance = min_station_distance[0]


In [80]:
# bikes_geolocation air_geolocation
# ["id", "name", "address", "total_bases", "geometry", "Latitude", "Longitude"]
b_id = list(bikes_geolocation.id)
b_lat = list(bikes_geolocation.Latitude)
b_lon = list(bikes_geolocation.Longitude)
# air_geolocation.CODIGO
# air_geolocation.LATITUD
# air_geolocation.LONGITUD 

def min_dist_airStation(b_lat, b_lon, df_code, df_lat, df_lon):
    #air_geolocation_ = air_geolocation
    all_distances = [(code, _distance(x,y, lat, lon)) for code, lat, lon, x,y in zip(list(df_code), list(df_lat), list(df_lon), list(b_lat), list(b_lon))]#zip(list(air_geolocation_.CODIGO), list(air_geolocation_.LATITUD), list(air_geolocation_.LONGITUD))]
    min_station_distance = min(all_distances, key = lambda t: t[1])
    #station_code = min_station_distance[0]
    #distance = min_station_distance[1]
    return min_station_distance
bikes_geolocation.columns

Index(['id', 'name', 'address', 'total_bases', 'geometry', 'Latitude',
       'Longitude', 'id_', 'air_station_code', 'min_dist'],
      dtype='object')

In [90]:
lat_bike = bikes_geolocation.Latitude#.loc[0:1]
lon_bike = bikes_geolocation.Longitude#.loc[0:1]

df_code = air_geolocation.CODIGO
df_lat = air_geolocation.LATITUD
df_lon = air_geolocation.LONGITUD

air_station_code = []
min_distance = []

for b_lat, b_lon in zip(list(lat_bike), list(lon_bike)):
    _list = [(code, _distance(b_lat,b_lon, lat, lon)) for code, lat, lon in zip(list(df_code), list(df_lat), list(df_lon))]
    
    station_distance = min(_list, key = lambda t: t[1])[1]
    station_code = min(_list, key = lambda t: t[1])[0]
    
    air_station_code.append(station_code)
    min_distance.append(station_distance)

bikes_geolocation["air_station_code"] = air_station_code
bikes_geolocation["min_distance"] = min_distance

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bikes_geolocation["air_station_code"] = air_station_code
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bikes_geolocation["min_distance"] = min_distance


In [95]:
bikes_geolocation.head()

Unnamed: 0,id,name,address,total_bases,geometry,Latitude,Longitude,id_,air_station_code,min_dist,min_distance
0,1,Puerta del Sol A,Puerta del Sol nº 1,30,POINT (-3.70183 40.41721),40.417214,-3.701834,41.417214,28079035,1980.979671,248.970121
1,2,Puerta del Sol B,Puerta del Sol nº 1,30,POINT (-3.70160 40.41731),40.417313,-3.701603,42.417313,28079035,1980.979671,249.035064
2,3,Miguel Moya,Calle Miguel Moya nº 1,24,POINT (-3.70584 40.42059),40.420589,-3.705842,43.420589,28079035,1980.979671,273.619391
3,4,Plaza Conde Suchil,Plaza del Conde del Valle de Súchil nº 3,18,POINT (-3.70692 40.43029),40.430294,-3.706917,44.430294,28079004,1980.979671,844.377449
4,5,Malasaña,Calle Manuela Malasaña nº 5,24,POINT (-3.70259 40.42855),40.428552,-3.702587,45.428552,28079004,1980.979671,969.604147
