##### 1.Explore the structure of the API, query the API and understand the data returned.

##### 2.Choose a city covered by the CityBikes API and retrieve all available bike stations in that city.

##### 3.For each bike station, use the API to call the latitude, longitude and number of bikes.

##### 4.Parse the JSON object into a Pandas dataframe.

In [1]:
import pandas as pd
import os # use this to access your environment variables
import requests # this will be used to call the APIs
import json #will be used to save snapshots of citybike data, since data is always live

In [2]:
networks = requests.request("GET", 'http://api.citybik.es/v2/networks') # gives list of all networks

In [3]:
networks = networks.json()

In [4]:
networks.keys()

dict_keys(['networks'])

In [5]:
networks['networks'][0]

{'company': ['ЗАО «СитиБайк»'],
 'href': '/v2/networks/velobike-moscow',
 'id': 'velobike-moscow',
 'location': {'city': 'Moscow',
  'country': 'RU',
  'latitude': 55.75,
  'longitude': 37.616667},
 'name': 'Velobike'}

In [6]:
# the citybike API is rather straightforward.
# to get a list of all cities they operate in, as well as the ids of the program in that city:
city = []
name = []
for i in networks['networks']:
    city.append(i['location']['city'])
    name.append(i['id'])
city_bikes = pd.DataFrame()
city_bikes['city'] = city
city_bikes['name'] = name
#headers not needed for this api, it seems.

In [7]:
# then we can search that dataframe for the city we'd like to study:
city_bikes.loc[city_bikes['city'].str.startswith('Toronto')]

Unnamed: 0,city,name
77,"Toronto, ON",bixi-toronto


In [8]:
# BUT since I'm not as familiar with Torontos infrastructure as I am with other city's 
# I have elected to take a closer look at one of my absolute favorites,Chicago
city_bikes.loc[city_bikes['city'].str.startswith('Chicago')]

Unnamed: 0,city,name
101,"Chicago, IL",divvy


In [9]:
def city_bike(name):

    url = f"http://api.citybik.es/v2/networks/{name}"
    
#     headers = { 
#         "name":name
#     }
#headers not needed for this api, it seems.
#toronto = bixi-toronto
#chicago = divvy

    response = requests.request("GET", url)
    return response

In [23]:
data = city_bike('divvy')
data = data.json()

In [11]:
# now we'll begin going through the data retieved. 
# already we have some idea of what we're looking for, 
# the latitude, longitude and information about the number of bikes present at each station
print(data.keys())
print(data['network'].keys())
data['network']['stations'][2]

dict_keys(['network'])
dict_keys(['company', 'ebikes', 'gbfs_href', 'href', 'id', 'location', 'name', 'stations'])


{'empty_slots': 3,
 'extra': {'ebikes': 3,
  'has_ebikes': True,
  'last_updated': 1700576730,
  'payment': ['key', 'creditcard', 'transitcard'],
  'payment-terminal': True,
  'rental_uris': {'android': 'https://chi.lft.to/lastmile_qr_scan',
   'ios': 'https://chi.lft.to/lastmile_qr_scan'},
  'renting': 1,
  'returning': 1,
  'slots': 15,
  'uid': 'a3ac1bc9-a135-11e9-9cda-0a87ae2ba916'},
 'free_bikes': 12,
 'id': 'd510712148d2ace2f7efade8e41b4fa4',
 'latitude': 41.950687,
 'longitude': -87.6687,
 'name': 'Ashland Ave & Grace St',
 'timestamp': '2023-11-21T14:27:19.186000Z'}

In [12]:
data['network']['id']

'divvy'

In [13]:
#the data we're looking for is within stations, except for the company name which will be the same for all this data.

In [24]:
timestamp=[]
comp_id=[]
name=[]
latitude=[]
longitude =[]
slots=[]
free_bikes=[]
renting=[]
for i in data['network']['stations']:
    comp_id.append('divvy') # since we've pulled this data by polling the citybikes site for 'divvy', we can just include this here
    name.append(i['name'])
    latitude.append(i['latitude'])
    longitude.append(i['longitude'])
    slots.append(i['extra']['slots'])
    free_bikes.append(i['free_bikes'])
    renting.append(i['extra']['renting'])
    timestamp.append(i['timestamp'])

In [25]:
# now we'll create a dataframe based off of this. Since my plan is to pull multiple views and compare them,
# this whole process will need to be performed manualled each time. 
# automating this will be a good idea in the future, to ensure a more regular and comprable data set.

Chi_9am_mon = pd.DataFrame() #this naming scheme is probably too cumbersome, but will be maintained for consistency and legibility
Chi_9am_mon['comp_id']=comp_id
Chi_9am_mon['name'] = name
Chi_9am_mon['latitude']=latitude
Chi_9am_mon['longitude']=longitude
Chi_9am_mon['slots']=slots
Chi_9am_mon['free_bikes']=free_bikes
Chi_9am_mon['renting']=renting
Chi_9am_mon['timestamp']=timestamp

In [None]:
import datetime as dt #imported for use with converting timestamps
import pytz

In [29]:
Chi_9am_mon['timestamp'] = pd.to_datetime(Chi_5_mon['timestamp']) 
#I THINK this sometimes throws errors because the data from the API is incomplete or data corrupted at the stations even, 
#see: ValueError: time data "2023-11-20T12:31:04Z" doesn't match format "%Y-%m-%dT%H:%M:%S.%f%z", at position 1169.
#best to re-query which seems to fix the processes, or populate all datestamps from one, since they were retrieved at the same time

ValueError: time data "2023-11-21T14:27:17Z" doesn't match format "%Y-%m-%dT%H:%M:%S.%f%z", at position 1357. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [None]:
#The only issue with the data retrieved so far is that the timestamp is set to GMT. 
#Since the question I'm interested in is about the use, esp. as it relates to public transit, 
#having accurate local times associated with the data is key.

In [28]:
Chi_9am_mon['timestamp'] = Chi_9am_mon['timestamp'].dt.tz_convert('US/Central')
#in any case, all of my data did end up correctly formatted, which is good.

AttributeError: Can only use .dt accessor with datetimelike values

In [40]:
# If errors regarding the date persisit, we have two options, 
#removing all lines with corrupted date information
# or overwriting the date with the time of sampling.
#Since the data provided is live and we cannot be sure if missing date information equates to bad data about the stations, 
#I generally elect to retain all data about usage here
#this block can be used to apply a uniform time matching the time of sampling. 
timenow = pd.Timestamp.today()
Chi_9am_mon['timestamp'] = timenow

In [41]:
Chi_9am_mon.head()

Unnamed: 0,comp_id,name,latitude,longitude,slots,free_bikes,renting,timestamp
0,divvy,Lake Park Ave & 56th St,41.793242,-87.587782,19,2,1,2023-11-21 09:38:50.682485
1,divvy,Ada St & Washington Blvd,41.88283,-87.661206,15,8,1,2023-11-21 09:38:50.682485
2,divvy,Ashland Ave & Grace St,41.950687,-87.6687,15,12,1,2023-11-21 09:38:50.682485
3,divvy,Clark St & Wrightwood Ave,41.929546,-87.643118,15,8,1,2023-11-21 09:38:50.682485
4,divvy,Adler Planetarium,41.866095,-87.607267,39,22,1,2023-11-21 09:38:50.682485


In [42]:
Chi_9am_mon.columns

NameError: name 'Chi_9am_mon' is not defined

In [22]:
Chi_9am_mon['timestamp'] = Chi_9am_mon['timestamp'].dt.tz_localize(pytz.timezone('US/Central')).dt.tz_convert(pytz.timezone('UTC'))
#FOR unknown reasons, at one point the above formula converted my data to central time, but in samples from the next day 
#the following days, the below example did.
#as far as I know I did not change anything

AttributeError: Can only use .dt accessor with datetimelike values

In [None]:
# to maintain records, this data will then be re-recorded as a .json file, ensuring it can be referenced and used later
# while also preserving these observation

Chi_9am_mon.to_json(r'unprocessed/Chi_9am_mon(raw).json')

In [None]:
#this process should then repeated for observations at half-hour intervals to capture usage data over time and on subsequent days