# Part1: Connecting to the CityBikes API

## Imports

In [1]:
from collections import defaultdict
import numpy as np
import pandas as pd

from utils import export, get

## Get a list of cities in the CityBikes API

In [2]:
url = 'http://api.citybik.es/v2/networks'
params = {}
headers = {'accept': 'application/json'}

response = get(url, params, headers)

First, I'll get a list of all networks available in the CityBikes API.

In [3]:
data_json: dict = response.json()
networks: list[dict] = data_json['networks']

# Print out the first network dict to see its structure.
networks[0]

{'company': ['ЗАО «СитиБайк»'],
 'href': '/v2/networks/velobike-moscow',
 'id': 'velobike-moscow',
 'location': {'city': 'Moscow',
  'country': 'RU',
  'latitude': 55.75,
  'longitude': 37.616667},
 'name': 'Velobike'}

Extract the cities and corresponding network IDs that can be accessed with the CityBikes API.

In [4]:
network_ids = defaultdict(list)
for network in networks:
    network_ids[network['location']['city']].append(network['id'])

# Get a list of the cities that can be accessed with the CityBikes API.
cities = list(network_ids.keys())
cities.sort()
cities[:11]

['10vorWien',
 'A Coruña',
 'A Illa de Arousa',
 'Aachen',
 'Abu Dhabi',
 'Acquarica del Capo',
 'Acquaviva delle fonti',
 'Agen',
 'Aigialeia',
 'Alba',
 'Albacete']

## Send a request to CityBikes for the city of your choice. 

I chose Santiago, Chile for this study. Santiago is a populous city with many bike stations. Also, at the time of data acqusition, it was early afternoon in Santiago and the weather there was slightly overcast and ~12 deg. C. Both of these factors would seem to promote cycling.

In [5]:
city = 'Santiago'
print(f'{city} in cities: {city in cities}')
print(f'No. of bike networks in {city}: {len(network_ids[city])}')

Santiago in cities: True
No. of bike networks in Santiago: 1


In [6]:
# Note that url, headers, and params were defined above.
network_id: str = network_ids['Santiago'][0]
endpoint: str = url + '/' + network_id

response = get(endpoint, params, headers)

In [7]:
stations: list[dict] = response.json()['network']['stations']

print(f'There are {len(stations)} bike stations in {city}.')

# Print out the first station to see its structure.
stations[0]

There are 230 bike stations in Santiago.


{'empty_slots': 7,
 'extra': {'address': 'Alcalde Dávalos 124',
  'altitude': 0.0,
  'ebikes': 0,
  'has_ebikes': True,
  'last_updated': 1714750092,
  'normal_bikes': 6,
  'payment': ['key', 'transitcard', 'creditcard', 'phone'],
  'payment-terminal': True,
  'post_code': '1111',
  'rental_uris': {},
  'renting': 1,
  'returning': 1,
  'slots': 13,
  'uid': '237'},
 'free_bikes': 6,
 'id': 'e1593acef03a0fd770595370586bc358',
 'latitude': -33.428334,
 'longitude': -70.627312,
 'name': 'P31 - Estación Canal 13',
 'timestamp': '2024-05-03T15:29:29.800000Z'}

## Parse the response to get various details for the bike stations in that city 

In [8]:
station_data = {
    'timestamp': [],
    'station_id': [],
    'name': [],
    
    'latitude': [],
    'longitude': [],
    'altitude': [],
    
    'slots': [],
    'free_bikes': [],
    'empty_slots': [],
    
    'has_ebikes': [],
    'ebikes': [],
    'normal_bikes': []
}

In [9]:
for station in stations:
    station_data['timestamp'].append(station.get('timestamp'))
    station_data['station_id'].append(station.get('id'))
    station_data['name'].append(station.get('name'))
    
    station_data['latitude'].append(station.get('latitude'))
    station_data['longitude'].append(station.get('longitude'))
    station_data['altitude'].append(station['extra'].get('altitude'))

    station_data['slots'].append(station['extra'].get('slots'))
    station_data['free_bikes'].append(station.get('free_bikes'))
    station_data['empty_slots'].append(station.get('empty_slots'))
    
    station_data['has_ebikes'].append(station['extra'].get('has_ebikes'))
    station_data['ebikes'].append(station['extra'].get('ebikes'))
    station_data['normal_bikes'].append(station['extra'].get('normal_bikes'))

## Put the parsed results into a DataFrame

In [10]:
stations_df = pd.DataFrame(station_data)
stations_df.head()

Unnamed: 0,timestamp,station_id,name,latitude,longitude,altitude,slots,free_bikes,empty_slots,has_ebikes,ebikes,normal_bikes
0,2024-05-03T15:29:29.800000Z,e1593acef03a0fd770595370586bc358,P31 - Estación Canal 13,-33.428334,-70.627312,0.0,13,6,7,True,0,6
1,2024-05-03T15:29:29.533000Z,3983dd515589a80338dd44a28f5ec414,V34 - Mestizo,-33.394,-70.6,0.0,23,8,15,True,0,8
2,2024-05-03T15:29:29.537000Z,405a3a5ca08c7536d3eb286cf8553025,V10 - Casa Costanera,-33.398,-70.598,0.0,11,8,3,True,0,8
3,2024-05-03T15:29:29.538000Z,0e8dfc3f137cb3911bd32f5fd45e0f93,V35 - Municipalidad de Vitacura,-33.398,-70.601,0.0,15,9,3,True,0,9
4,2024-05-03T15:29:29.539000Z,bdf4fc889476008ea8c644640f306bbc,V07 - Bicentenario,-33.401,-70.602,0.0,17,8,8,True,0,8


In [11]:
stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230 entries, 0 to 229
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   timestamp     230 non-null    object 
 1   station_id    230 non-null    object 
 2   name          230 non-null    object 
 3   latitude      230 non-null    float64
 4   longitude     230 non-null    float64
 5   altitude      230 non-null    float64
 6   slots         230 non-null    int64  
 7   free_bikes    230 non-null    int64  
 8   empty_slots   230 non-null    int64  
 9   has_ebikes    230 non-null    bool   
 10  ebikes        230 non-null    int64  
 11  normal_bikes  230 non-null    int64  
dtypes: bool(1), float64(3), int64(5), object(3)
memory usage: 20.1+ KB


## Export the DataFrame to CSV

In [12]:
export(stations_df, 'stations.csv')