# Part1: Connecting to the CityBikes API

## Imports

In [135]:
from collections import defaultdict
import numpy as np
import pandas as pd
import requests

## Get a list of cities in the CityBikes API

In [130]:
url = 'http://api.citybik.es/v2/networks'
headers = {'accept': 'application/json'}

try:
    response = requests.get(url, headers=headers)
    response.raise_for_status()
except requests.HTTPError as err:
    print(f'get request failed, {err}')
    response = None
else:
    print('get request succeeded')

get request succeeded


In [131]:
data_json: dict = response.json()
networks: list[dict] = data_json['networks']

# Print out the first network dict to see its structure.
networks[0]

{'company': ['ЗАО «СитиБайк»'],
 'href': '/v2/networks/velobike-moscow',
 'id': 'velobike-moscow',
 'location': {'city': 'Moscow',
  'country': 'RU',
  'latitude': 55.75,
  'longitude': 37.616667},
 'name': 'Velobike'}

Extract the cities and corresponding network IDs that can be accessed with the CityBikes API.

In [132]:
network_ids = defaultdict(list)
for network in networks:
    network_ids[network['location']['city']].append(network['id'])

# Get a list of the cities that can be accessed with the CityBikes API.
cities = list(network_ids.keys())
cities.sort()
cities[:5]

['10vorWien', 'A Coruña', 'A Illa de Arousa', 'Aachen', 'Abu Dhabi']

## Send a request to CityBikes for the city of your choice. 

I chose Santiago, Chile for this study. Santiago is a populous city with many bike stations. Also, at the time of data acqusition, it was late afternoon in Santiago and the weather there was slightly overcast and ~14 deg. C. Both of these factors would seem to promote cycling.

In [133]:
city = 'Santiago'
print(f'{city} in cities: {city in cities}')
print(f'No. of bike networks in {city}: {len(network_ids[city])}')

Santiago in cities: True
No. of bike networks in Santiago: 1


In [134]:
# Note that url and headers were defined above.
network_id: str = network_ids['Santiago'][0]
endpoint: str = url + '/' + network_id

try:
    response = requests.get(endpoint, headers=headers)
    response.raise_for_status()
except requests.HTTPError as err:
    print(f'get request failed, {err}')
    response = None
else:
    print('get request succeeded')

get request succeeded


In [141]:
stations: list[dict] = response.json()['network']['stations']

# Print out the first station to see its structure.
stations[0]

{'empty_slots': 5,
 'extra': {'address': 'Alcalde Dávalos 124',
  'altitude': 0.0,
  'ebikes': 0,
  'has_ebikes': True,
  'last_updated': 1714679595,
  'normal_bikes': 8,
  'payment': ['key', 'transitcard', 'creditcard', 'phone'],
  'payment-terminal': True,
  'post_code': '1111',
  'rental_uris': {},
  'renting': 1,
  'returning': 1,
  'slots': 13,
  'uid': '237'},
 'free_bikes': 8,
 'id': 'e1593acef03a0fd770595370586bc358',
 'latitude': -33.428334,
 'longitude': -70.627312,
 'name': 'P31 - Estación Canal 13',
 'timestamp': '2024-05-02T19:56:09.714000Z'}

## Parse the response to get various details for the bike stations in that city 

In [136]:
station_data = {
    'id': [],
    'name': [],
    'latitude': [],
    'longitude': [],
    'altitude': [],
    'free_bikes': [],
    'has_ebikes': [],
    'timestamp': []
}

In [142]:
for station in stations:
    station_data['id'].append(station['id'])
    station_data['name'].append(station['name'])
    station_data['latitude'].append(station['latitude'])
    station_data['longitude'].append(station['longitude'])
    station_data['altitude'].append(station['extra']['altitude'])
    station_data['free_bikes'].append(station['free_bikes'])
    station_data['has_ebikes'].append(station['extra']['has_ebikes'])
    station_data['timestamp'].append(station['timestamp'])

## Put the parsed results into a pandas.DataFrame

In [143]:
stations_df = pd.DataFrame(station_data)

In [144]:
stations_df.head()

Unnamed: 0,id,name,latitude,longitude,altitude,free_bikes,has_ebikes,timestamp
0,e1593acef03a0fd770595370586bc358,P31 - Estación Canal 13,-33.428334,-70.627312,0.0,8,True,2024-05-02T19:56:09.714000Z
1,3983dd515589a80338dd44a28f5ec414,V34 - Mestizo,-33.394,-70.6,0.0,7,True,2024-05-02T19:56:09.360000Z
2,405a3a5ca08c7536d3eb286cf8553025,V10 - Casa Costanera,-33.398,-70.598,0.0,3,True,2024-05-02T19:56:09.370000Z
3,0e8dfc3f137cb3911bd32f5fd45e0f93,V35 - Municipalidad de Vitacura,-33.398,-70.601,0.0,7,True,2024-05-02T19:56:09.372000Z
4,bdf4fc889476008ea8c644640f306bbc,V07 - Bicentenario,-33.401,-70.602,0.0,13,True,2024-05-02T19:56:09.373000Z


In [145]:
stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229 entries, 0 to 228
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          229 non-null    object 
 1   name        229 non-null    object 
 2   latitude    229 non-null    float64
 3   longitude   229 non-null    float64
 4   altitude    229 non-null    float64
 5   free_bikes  229 non-null    int64  
 6   has_ebikes  229 non-null    bool   
 7   timestamp   229 non-null    object 
dtypes: bool(1), float64(3), int64(1), object(3)
memory usage: 12.9+ KB


## Export the DataFrame to CSV

In [148]:
dirname = '../data/'
basename = 'stations.csv'
filename = dirname + basename

stations_df.to_csv(filename, sep=',')