##### 1.Explore the structure of the API, query the API and understand the data returned.

##### 2.Choose a city covered by the CityBikes API and retrieve all available bike stations in that city.

##### 3.For each bike station, use the API to call the latitude, longitude and number of bikes.

##### 4.Parse the JSON object into a Pandas dataframe.

In [1]:
import pandas as pd
import os # use this to access your environment variables
import requests # this will be used to call the APIs
import json #will be used to save snapshots of citybike data, since data is always live

In [2]:
networks = requests.request("GET", 'http://api.citybik.es/v2/networks') # gives list of all networks

In [3]:
networks = networks.json()

In [5]:
networks.keys()

dict_keys(['networks'])

In [7]:
networks['networks'][0]

{'company': ['ЗАО «СитиБайк»'],
 'href': '/v2/networks/velobike-moscow',
 'id': 'velobike-moscow',
 'location': {'city': 'Moscow',
  'country': 'RU',
  'latitude': 55.75,
  'longitude': 37.616667},
 'name': 'Velobike'}

In [8]:
# the citybike API is rather straightforward.
# to get a list of all cities they operate in, as well as the ids of the program in that city:
city = []
name = []
for i in networks['networks']:
    city.append(i['location']['city'])
    name.append(i['id'])
city_bikes = pd.DataFrame()
city_bikes['city'] = city
city_bikes['name'] = name
#headers not needed for this api, it seems.

In [9]:
# then we can search that dataframe for the city we'd like to study:
city_bikes.loc[city_bikes['city'].str.startswith('Toronto')]

Unnamed: 0,city,name
77,"Toronto, ON",bixi-toronto


In [10]:
# BUT since I'm not as familiar with Torontos infrastructure as I am with other city's 
# I have elected to take a closer look at one of my absolute favorites,Chicago
city_bikes.loc[city_bikes['city'].str.startswith('Chicago')]

Unnamed: 0,city,name
101,"Chicago, IL",divvy


In [11]:
def city_bike(name):

    url = f"http://api.citybik.es/v2/networks/{name}"
    
#     headers = { 
#         "name":name
#     }
#headers not needed for this api, it seems.
#toronto = bixi-toronto
#chicago = divvy

    response = requests.request("GET", url)
    return response

In [31]:
data = city_bike('divvy')
data = data.json()

In [32]:
# now we'll begin going through the data retieved. 
# already we have some idea of what we're looking for, 
# the latitude, longitude and information about the number of bikes present at each station
print(data.keys())
print(data['network'].keys())
data['network']['stations'][2]

dict_keys(['network'])
dict_keys(['company', 'ebikes', 'gbfs_href', 'href', 'id', 'location', 'name', 'stations'])


{'empty_slots': 0,
 'extra': {'ebikes': 1,
  'has_ebikes': True,
  'last_updated': 1700481590,
  'payment': ['key', 'creditcard', 'transitcard'],
  'payment-terminal': True,
  'rental_uris': {'android': 'https://chi.lft.to/lastmile_qr_scan',
   'ios': 'https://chi.lft.to/lastmile_qr_scan'},
  'renting': 1,
  'returning': 1,
  'slots': 15,
  'uid': 'a3ac1bc9-a135-11e9-9cda-0a87ae2ba916'},
 'free_bikes': 15,
 'id': 'd510712148d2ace2f7efade8e41b4fa4',
 'latitude': 41.950687,
 'longitude': -87.6687,
 'name': 'Ashland Ave & Grace St',
 'timestamp': '2023-11-20T12:00:40.642000Z'}

In [14]:
data['network']['id']

'divvy'

In [None]:
#the data we're looking for is within stations, except for the company name which will be the same for all this data.

In [33]:
timestamp=[]
comp_id=[]
name=[]
latitude=[]
longitude =[]
slots=[]
free_bikes=[]
renting=[]
for i in data['network']['stations']:
    comp_id.append('divvy') # since we've pulled this data by polling the citybikes site for 'divvy', we can just include this here
    name.append(i['name'])
    latitude.append(i['latitude'])
    longitude.append(i['longitude'])
    slots.append(i['extra']['slots'])
    free_bikes.append(i['free_bikes'])
    renting.append(i['extra']['renting'])
    timestamp.append(i['timestamp'])

In [34]:
# now we'll create a dataframe based off of this. Since my plan is to pull multiple views and compare them,
# this whole process will need to be performed manualled each time. 
# automating this will be a good idea in the future, to ensure a more regular and comprable data set.

Chi_6am_mon = pd.DataFrame() #this naming scheme is probably too cumbersome, but will be maintained for consistency and legibility
Chi_6am_mon['comp_id']=comp_id
Chi_6am_mon['name'] = name
Chi_6am_mon['latitude']=latitude
Chi_6am_mon['longitude']=longitude
Chi_6am_mon['slots']=slots
Chi_6am_mon['free_bikes']=free_bikes
Chi_6am_mon['renting']=renting
Chi_6am_mon['timestamp']=timestamp

In [35]:
Chi_6am_mon['timestamp'] = pd.to_datetime(Chi_6am_mon['timestamp']) 
#I THINK this sometimes throws errors because the data from the API is incomplete or corrupted, 
#best to re-query which seems to fix the processes

In [36]:
Chi_6am_mon['timestamp']

0      2023-11-20 12:00:40.543000+00:00
1      2023-11-20 12:00:37.536000+00:00
2      2023-11-20 12:00:40.642000+00:00
3      2023-11-20 12:00:37.536000+00:00
4      2023-11-20 12:00:38.036000+00:00
                     ...               
1658   2023-11-20 12:00:37.518000+00:00
1659   2023-11-20 12:00:37.694000+00:00
1660   2023-11-20 12:00:39.696000+00:00
1661   2023-11-20 12:00:39.957000+00:00
1662   2023-11-20 12:00:40.232000+00:00
Name: timestamp, Length: 1663, dtype: datetime64[ns, UTC]

In [25]:
Chi_6am_mon.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1663 entries, 0 to 1662
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   comp_id     1663 non-null   object 
 1   name        1663 non-null   object 
 2   latitude    1663 non-null   float64
 3   longitude   1663 non-null   float64
 4   slots       1663 non-null   int64  
 5   free_bikes  1663 non-null   int64  
 6   renting     1663 non-null   int64  
 7   timestamp   1663 non-null   object 
dtypes: float64(2), int64(3), object(3)
memory usage: 104.1+ KB


In [None]:
#The only issue with the data retrieved so far is that the timestamp is set to GMT. 
#Since the question I'm interested in is about the use, esp. as it relates to public transit, 
#having accurate local times associated with the data is key.

In [28]:
import datetime as dt
import pytz

In [23]:
Chi_6am_mon['timestamp'] = Chi_6am_mon['timestamp'].dt.tz_localize(pytz.timezone('US/Central')).dt.tz_convert(pytz.timezone('UTC'))
#FOR unknown reasons, at one point the above formula converted my data to central time, but in samples from the next day 
#the following days, the below example did.
#as far as I know I did not change anything

AttributeError: Can only use .dt accessor with datetimelike values

In [37]:
Chi_6am_mon['timestamp'] = Chi_6am_mon['timestamp'].dt.tz_convert('US/Central')
#in any case, all of my data did end up correctly formatted, which is good.

In [38]:
# to maintain records, this data will then be re-recorded as a .json file, ensuring it can be referenced and used later
# while also preserving these observation

Chi_6am_mon.to_json(r'unprocessed/Chi_6am_mon(raw).json')

In [None]:
#this process was then repeated for observations on Friday evening, Saturday and Sunday afternoons and Monday morning.