In [1]:
import requests
import pandas as pd
import folium as f
import geopandas as gpd
from shapely.geometry import Point
from pymongo import MongoClient

In [2]:
root = 'https://saopaulo.publicbikesystem.net/ube/gbfs/v1/'
endpoints = requests.get(root).json()['data']['en']['feeds']

In [3]:
main = endpoints[0]
main

{'name': 'station_information',
 'url': 'https://saopaulo-br.publicbikesystem.net/customer/ube/gbfs/v1/en/station_information'}

In [4]:
raw_station_information = requests.get(main['url']).json()['data']['stations']
station_information = pd.DataFrame(raw_station_information)
station_information.head(3)

Unnamed: 0,station_id,name,physical_configuration,lat,lon,altitude,address,capacity,rental_methods,groups,obcn,nearby_distance,post_code,planned_date
0,1,1 - Largo da Batata,REGULAR,-23.566831,-46.693741,0.0,"Av. Brigadeiro Faria Lima, esquina R. Teodoro ...",83,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],1,1000.0,,
1,3,3 - CPTM Pinheiros,VAULT,-23.566478,-46.701258,0.0,"R. Gilberto Sabino, 138/ ao lado do ponto de ô...",15,"[KEY, TRANSITCARD, PHONE]",[Pinheiros],3,1000.0,3164200.0,
2,4,4 - Rua Diogo Moreira,REGULAR,-23.569145,-46.692003,0.0,"Av. Brigadeiro Faria Lima, na altura do número...",23,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],4,1000.0,3164200.0,


In [5]:
huge_stations = station_information.loc[station_information.capacity >= 30]
len(huge_stations), len(station_information)

(6, 259)

In [6]:
def newMap():
    return f.Map(location=[-23.549279718284097, -46.63297420696871], zoom_start=12, tiles="cartodbpositron")

In [7]:
points = huge_stations[['lat', 'lon']].to_records(index=False)
points

rec.array([(-23.56683096, -46.6937412 ), (-23.56616415, -46.69395917),
           (-23.59391757, -46.69126812), (-23.5597721 , -46.7227278 ),
           (-23.59295294, -46.68093275), (-23.592837  , -46.6804    )],
          dtype=[('lat', '<f8'), ('lon', '<f8')])

In [8]:
m = newMap()
for p in points:
    f.CircleMarker(p).add_to(m)
m

In [9]:
m = newMap()
for i, row in station_information.iterrows():
    point = [row.lat, row.lon]
    color='#ffee00'
    if row.capacity >= 30:
        color='#ed1717'
    elif row.capacity >= 20:
        color='#f24d11'
    elif row.capacity >= 15:
        color='#f6830c'
    elif row.capacity >= 10:
        color='#fbb806'
    f.CircleMarker(point, popup=row.address, color=color).add_to(m)
m

In [10]:
m = newMap()
for i, row in station_information.iterrows():
    point = [row.lat, row.lon]
    color='#ffee00'
    if row.altitude > 0:
        color='#ed1717'
    f.CircleMarker(point, popup=str(row.altitude), color=color).add_to(m)
m

In [11]:
groups = [] 
for element in station_information.iterrows():
  if len(element[1].groups) >= 1:
    groups.append(element[1].groups[0])

groups_dict = {x: groups.count(x) for x in groups}

groups_descending_keys = sorted(groups_dict, key=groups_dict.get, reverse=True)

for key in groups_descending_keys:
  print("{:<20} {:<5}".format(key, groups_dict[key]))

Itaim Bibi           38   
Pinheiros            33   
Moema                26   
Brooklin             23   
Jardim Paulista      21   
Vila Mariana         20   
USP                  20   
Bela Vista           12   
Santa Cecilia        11   
Alto de Pinheiros    11   
Barra Funda          8    
Perdizes             7    
Consolação           6    
República            6    
Saúde                4    
Santana              4    
Liberdade            2    
Paraíso              2    
Casa Verde           2    
Sé                   1    
Campo Belo           1    


In [12]:
rental_methods = []
for element in station_information.iterrows():
  for a in range(len(element[1].rental_methods)):
    rental_methods.append(element[1].rental_methods[a])

rental_methods_dict = {x: rental_methods.count(x) for x in rental_methods}

rental_methods_descending_keys = sorted(rental_methods_dict, key=rental_methods_dict.get, reverse=True)

for key in rental_methods_descending_keys:
  print("{:<15} {:<5}".format(key, rental_methods_dict[key]))

KEY             259  
TRANSITCARD     259  
PHONE           259  
CREDITCARD      113  


In [13]:
district = []
for x in station_information.iterrows():
    if 'CREDITCARD' in x[1].rental_methods:
      district.append(x[1].groups[0])

district_dict = {x: district.count(x) for x in district}

district_descending_keys = sorted(district_dict, key=district_dict.get, reverse=True)

for key in district_descending_keys:
  print("{:<20} {:<5}".format(key, district_dict[key]))

Moema                18   
Vila Mariana         15   
Itaim Bibi           14   
Pinheiros            10   
USP                  9    
Santa Cecilia        7    
Alto de Pinheiros    6    
Consolação           5    
República            5    
Jardim Paulista      4    
Bela Vista           4    
Saúde                4    
Brooklin             3    
Barra Funda          3    
Liberdade            1    
Sé                   1    
Campo Belo           1    
Paraíso              1    
Perdizes             1    
Santana              1    


In [14]:
station_information['lng_lat'] = station_information[['lon', 'lat']].apply(tuple, axis=1)
station_information.head(3)

Unnamed: 0,station_id,name,physical_configuration,lat,lon,altitude,address,capacity,rental_methods,groups,obcn,nearby_distance,post_code,planned_date,lng_lat
0,1,1 - Largo da Batata,REGULAR,-23.566831,-46.693741,0.0,"Av. Brigadeiro Faria Lima, esquina R. Teodoro ...",83,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],1,1000.0,,,"(-46.6937412022694, -23.5668309564147)"
1,3,3 - CPTM Pinheiros,VAULT,-23.566478,-46.701258,0.0,"R. Gilberto Sabino, 138/ ao lado do ponto de ô...",15,"[KEY, TRANSITCARD, PHONE]",[Pinheiros],3,1000.0,3164200.0,,"(-46.7012582819428, -23.5664776214954)"
2,4,4 - Rua Diogo Moreira,REGULAR,-23.569145,-46.692003,0.0,"Av. Brigadeiro Faria Lima, na altura do número...",23,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],4,1000.0,3164200.0,,"(-46.6920025786078, -23.5691446194095)"


In [15]:
station_information['geometry'] = station_information.lng_lat.apply(lambda p: Point(p))
station_information.head(3)

Unnamed: 0,station_id,name,physical_configuration,lat,lon,altitude,address,capacity,rental_methods,groups,obcn,nearby_distance,post_code,planned_date,lng_lat,geometry
0,1,1 - Largo da Batata,REGULAR,-23.566831,-46.693741,0.0,"Av. Brigadeiro Faria Lima, esquina R. Teodoro ...",83,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],1,1000.0,,,"(-46.6937412022694, -23.5668309564147)",POINT (-46.6937412022694 -23.5668309564147)
1,3,3 - CPTM Pinheiros,VAULT,-23.566478,-46.701258,0.0,"R. Gilberto Sabino, 138/ ao lado do ponto de ô...",15,"[KEY, TRANSITCARD, PHONE]",[Pinheiros],3,1000.0,3164200.0,,"(-46.7012582819428, -23.5664776214954)",POINT (-46.7012582819428 -23.5664776214954)
2,4,4 - Rua Diogo Moreira,REGULAR,-23.569145,-46.692003,0.0,"Av. Brigadeiro Faria Lima, na altura do número...",23,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],4,1000.0,3164200.0,,"(-46.6920025786078, -23.5691446194095)",POINT (-46.6920025786078 -23.5691446194095)


In [16]:
station_information = gpd.GeoDataFrame(station_information)
station_information['geometry'] = station_information.geometry.buffer(0.001)
station_information.head(3)

Unnamed: 0,station_id,name,physical_configuration,lat,lon,altitude,address,capacity,rental_methods,groups,obcn,nearby_distance,post_code,planned_date,lng_lat,geometry
0,1,1 - Largo da Batata,REGULAR,-23.566831,-46.693741,0.0,"Av. Brigadeiro Faria Lima, esquina R. Teodoro ...",83,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],1,1000.0,,,"(-46.6937412022694, -23.5668309564147)","POLYGON ((-46.69274 -23.56683, -46.69275 -23.5..."
1,3,3 - CPTM Pinheiros,VAULT,-23.566478,-46.701258,0.0,"R. Gilberto Sabino, 138/ ao lado do ponto de ô...",15,"[KEY, TRANSITCARD, PHONE]",[Pinheiros],3,1000.0,3164200.0,,"(-46.7012582819428, -23.5664776214954)","POLYGON ((-46.70026 -23.56648, -46.70026 -23.5..."
2,4,4 - Rua Diogo Moreira,REGULAR,-23.569145,-46.692003,0.0,"Av. Brigadeiro Faria Lima, na altura do número...",23,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",[Pinheiros],4,1000.0,3164200.0,,"(-46.6920025786078, -23.5691446194095)","POLYGON ((-46.69100 -23.56914, -46.69101 -23.5..."


In [17]:
client = MongoClient(host='localhost', port=27017)
db = client['bus_stops']

In [18]:
db.stops.find_one({'helper': {'$exists': False}})

{'_id': ObjectId('6109678d5ab6834a5d206e2b'),
 'agency_id': 'saopaulo_sp',
 'stop_id': '18910',
 'address': 'Morumbi',
 'desc': '',
 'gtfs_version': '2021-07-20',
 'lng_lat': [-46.701657, -23.621519],
 'loc': {'lat': -23.621519, 'lng': -46.701657},
 'modal': 'cptm',
 'trips': {'CPTM L09-0': {'route_id': 'CPTM L09',
   'headsign': 'GRAJAU',
   'direction': 0,
   'color': '#01A9A7',
   'angle': 218},
  'CPTM L09-1': {'route_id': 'CPTM L09',
   'headsign': 'OSASCO',
   'direction': 1,
   'color': '#01A9A7',
   'angle': 38}}}

In [19]:
stops = gpd.GeoDataFrame(list(db.stops.find()))
stops = stops[~stops.lng_lat.isna()]
type(stops)

pandas.core.frame.DataFrame

In [20]:
stops['geometry'] = stops.lng_lat.apply(Point)
stops_gdf = gpd.GeoDataFrame(stops)
stops_gdf.head(3)

Unnamed: 0,_id,agency_id,stop_id,address,desc,gtfs_version,lng_lat,loc,modal,trips,geometry
0,6109678d5ab6834a5d206e2b,saopaulo_sp,18910,Morumbi,,2021-07-20,"[-46.701657, -23.621519]","{'lat': -23.621519, 'lng': -46.701657}",cptm,"{'CPTM L09-0': {'route_id': 'CPTM L09', 'heads...",POINT (-46.70166 -23.62152)
1,6109678d5ab6834a5d206e2d,saopaulo_sp,18911,Berrini,,2021-07-20,"[-46.696825, -23.604669]","{'lat': -23.604669, 'lng': -46.696825}",cptm,"{'CPTM L09-0': {'route_id': 'CPTM L09', 'heads...",POINT (-46.69682 -23.60467)
2,6109678d5ab6834a5d206e2c,saopaulo_sp,18912,Vila Olímpia,,2021-07-20,"[-46.692871, -23.593185]","{'lat': -23.593185, 'lng': -46.692871}",cptm,"{'CPTM L09-0': {'route_id': 'CPTM L09', 'heads...",POINT (-46.69287 -23.59318)


In [21]:
stops_with_stations_data = gpd.sjoin(stops_gdf, station_information, how='inner', predicate='within')
stops_with_stations_data[["_id", "stop_id", "address_left", "desc", "modal", "geometry", "station_id", "name", "address_right", "capacity", "groups"]].head()

Unnamed: 0,_id,stop_id,address_left,desc,modal,geometry,station_id,name,address_right,capacity,groups
1,6109678d5ab6834a5d206e2d,18911,Berrini,,cptm,POINT (-46.69682 -23.60467),145,145 - CPTM Berrini,"Av. das Nações Unidas, em frente à estação da ...",23,[Itaim Bibi]
3,6109678d5ab6834a5d206e2e,18913,Cidade Jardim,,cptm,POINT (-46.69129 -23.58534),20,20 - CPTM Cidade Jardim,Praça Waldomiro Maluhy/ Entrada da estação Cid...,19,[Pinheiros]
11964,6109678f5ab6834a5d208d82,6311375,Ac. Acesso A Ponte,Ref.: R. Prof.artur Ramos / Pça. Waldomiro Maluhi,bus,POINT (-46.69071 -23.58471),20,20 - CPTM Cidade Jardim,Praça Waldomiro Maluhy/ Entrada da estação Cid...,19,[Pinheiros]
13609,610967925ab6834a5d20e819,630015451,"Av. Das Nações Unidas, 9113",Ref.: R Professor Artur Ramos/ Av Cidade Jardim,bus,POINT (-46.69069 -23.58522),20,20 - CPTM Cidade Jardim,Praça Waldomiro Maluhy/ Entrada da estação Cid...,19,[Pinheiros]
173,6109678d5ab6834a5d207268,1211334,Parada Terminal USP,"Av. Prof. Almeida Prado, 58 Ref.: Oposto À Par...",bus,POINT (-46.73181 -23.55251),254,254 - Terminal de Ônibus USP,Av. Prof. Almeida Prado / Terminal de Ônibus USP,15,[USP]


In [22]:
m = newMap()
for _, i in station_information.iterrows():
    station_geometry = gpd.GeoSeries(i['geometry']).simplify(tolerance=0.00001)
    station_json = station_geometry.to_json()
    station_json = f.GeoJson(data=station_json, style_function=lambda x: {'fillColor': 'orange'}).add_to(m)
for _, i in stops_with_stations_data.iterrows():
    lat = i['geometry'].y
    lon = i['geometry'].x
    f.Marker(location=[lat, lon]).add_to(m)

m