## 1.Explore the structure of the API, query the API and understand the data returned.

## 2.Choose a city covered by the CityBikes API and retrieve all available bike stations in that city.

## 3.For each bike station, use the API to call the latitude, longitude and number of bikes.

## 4.Parse the JSON object into a Pandas dataframe.

In [None]:
import pandas as pd
import os # use this to access your environment variables
import requests # this will be used to call the APIs
import json #will be used to save snapshots of citybike data, since data is always live

In [None]:
networks = requests.request("GET", 'http://api.citybik.es/v2/networks') # gives list of all networks

In [None]:
networks = networks.json()

In [None]:
networks

In [None]:
# the citybike API is rather straightforward.
# to get a list of all cities they operate in, as well as the ids of the program in that city:
city = []
name = []
for i in networks['networks']:
    city.append(i['location']['city'])
    name.append(i['id'])
city_bikes = pd.DataFrame()
city_bikes['city'] = city
city_bikes['name'] = name
#headers not needed for this api, it seems.

In [None]:
# then we can search that dataframe for the city we'd like to study:
city_bikes.loc[city_bikes['city'].str.startswith('Toronto')]

In [None]:
# BUT since I'm not as familiar with Torontos infrastructure as I am with other city's 
# I have elected to take a closer look at one of my absolute favorites,Chicago
city_bikes.loc[city_bikes['city'].str.startswith('Chicago')]

In [None]:
def city_bike(name):

    url = f"http://api.citybik.es/v2/networks/{name}"
    
#     headers = { 
#         "name":name
#     }
#headers not needed for this api, it seems.
#toronto = bixi-toronto
#chicago = divvy

    response = requests.request("GET", url)
    return response

In [None]:
data = city_bike('divvy')
data = data.json()

In [None]:
# now we'll begin going through the data retieved. 
# already we have some idea of what we're looking for, 
# the latitude, longitude and information about the number of bikes present at each station
print(data.keys())
print(data['network'].keys())
data['network']['stations'][2]

In [None]:
data['network']['id']

In [None]:
#the data we're looking for is within stations, except for the company name which will be the same for all this data.

In [None]:
timestamp=[]
comp_id=[]
name=[]
latitude=[]
longitude =[]
slots=[]
free_bikes=[]
renting=[]
for i in data['network']['stations']:
    comp_id.append('divvy') # since we've pulled this data by polling the citybikes site for 'divvy', we can just include this here
    name.append(i['name'])
    latitude.append(i['latitude'])
    longitude.append(i['longitude'])
    slots.append(i['extra']['slots'])
    free_bikes.append(i['free_bikes'])
    renting.append(i['extra']['renting'])
    timestamp.append(i['timestamp'])

In [None]:
# now we'll create a dataframe based off of this. Since my plan is to pull multiple views and compare them,
# this whole process will need to be performed manualled each time. 
# automating this will be a good idea in the future, to ensure a more regular and comprable data set.
Chi_five30_fri = pd.DataFrame() #this naming scheme is probably too cumbersome, but will be maintained for consistency and legibility
Chi_five30_fri['comp_id']=comp_id
Chi_five30_fri['name'] = name
Chi_five30_fri['latitude']=latitude
Chi_five30_fri['longitude']=longitude
Chi_five30_fri['slots']=slots
Chi_five30_fri['free_bikes']=free_bikes
Chi_five30_fri['renting']=renting
Chi_five30_fri['timestamp']=timestamp

In [None]:
Chi_five30_fri['timestamp']

In [None]:
Chi_five30_fri['timestamp'] = pd.to_datetime(Chi_five30_fri['timestamp'])

In [None]:
Chi_five30_fri['timestamp']

In [None]:
Chi_five30_fri.info()

In [None]:
#The only issue with the data retrieved so far is that the timestamp is set to GMT. 
#Since the question I'm interested in is about the use, esp. as it relates to public transit, 
#having accurate local times associated with the data is key.

In [None]:
import datetime as dt
import pytz

In [None]:
Chi_five30_fri['timestamp'] = Chi_five30_fri['timestamp'].dt.tz_localize(pytz.timezone('US/Central')).dt.tz_convert(pytz.timezone('UTC'))
#FOR unknown reasons, at one point the above formula converted my data to central time, but in samples from the next day 
#the following days, the below example did.
#as far as I know I did not change anything

In [None]:
Chi_five30_fri['timestamp'] = Chi_five30_fri['timestamp'].dt.tz_convert('US/Central')
#in any case, all of my data did end up correctly formatted, which is good.

In [None]:
# to maintain records, this data will then be re-recorded as a .json file, ensuring it can be referenced and used later
# while also preserving these observation
Chi_five30_fri.to_json('Chi_five30_fri(raw).json')

In [None]:
#this process was then repeated for observations on Friday evening, Saturday and Sunday afternoons and Monday morning.