In [1]:
import requests
import pandas as pd
from pymongo import MongoClient, UpdateOne
import json
import requests
import time
from config import api_key_zip, api_key_census

In [2]:
# Get the latitude and longitude from the top 10 stations and top 10 end stations
# MongoDB connection URI
mongo_uri = "mongodb://localhost:27017/"
mongo = MongoClient(mongo_uri)

# Connect to the database
db = mongo.chicago_bikes  

# Connect to collections
start_stations_collection = db.Top10StartStations
end_stations_collection = db.Top10EndStations

# Fetch the top 10 start and end stations
top_start_stations = start_stations_collection.find({}, {'_id': 0, 'latitude': 1, 'longitude': 1})
top_end_stations = end_stations_collection.find({}, {'_id': 0, 'latitude': 1, 'longitude': 1})

# Function to iterate over the cursor and extract lat and long data
def extract_lat_long(cursor):
    return [(station['latitude'], station['longitude']) for station in cursor]

# Extracting latitudes and longitudes
start_station_coordinates = extract_lat_long(top_start_stations)
end_station_coordinates = extract_lat_long(top_end_stations)

# Now there are two lists of tuples containing the latitudes and longitudes
# of the top 10 start and end stations, respectively:
print("Start Stations Coordinates:", start_station_coordinates)
print("End Stations Coordinates:", end_station_coordinates)

Start Stations Coordinates: [(41.880958, -87.616743), (41.867888, -87.623041), (41.897448, -87.628722), (41.869265218438194, -87.67373085021973), (41.897448, -87.628722), (41.8810317, -87.62408432), (41.920771, -87.663712), (41.926277, -87.630834), (41.893992, -87.629318), (41.893992, -87.629318)]
End Stations Coordinates: [(41.892278, -87.612043), (41.911722, -87.626804), (41.90096039, -87.62377664), (41.880958, -87.616743), (41.912133, -87.634656), (41.8810317, -87.62408432), (41.926277, -87.630834), (41.902973, -87.63128), (41.88917683258, -87.6385057718), (41.903222, -87.634324)]


In [3]:
# Find the zip codes corresponding to each latitude and longitude using LocationIQ API

# Endpoint URL for reverse geocoding
URL = 'https://us1.locationiq.com/v1/reverse.php'

# Change variable name for start and end station coordinates
coordinates_list1 = start_station_coordinates
coordinates_list2 = end_station_coordinates

# Function to retrieve postal codes from two lists of coordinates 
def get_zip_codes_locationiq(start_station_coordinates, end_station_coordinates, api_key):
    zip_codes = []
    for (lat1, lon1), (lat2, lon2) in zip(start_station_coordinates, end_station_coordinates):
        for lat, lon in [(lat1, lon1), (lat2, lon2)]:
            params = {
                'key': api_key,
                'lat': lat,
                'lon': lon,
                'format': 'json'
            }
            
            # Make the request to LocationIQ
            response = requests.get(URL, params=params)
            
            if response.status_code == 200:
                data = response.json()
                # Check if postcode is available in the response
                zip_code = data.get('address', {}).get('postcode', None)
                zip_codes.append((lat, lon, zip_code))
            else:
                print(f"Error for {lat}, {lon}: {response.text}")
                zip_codes.append((lat, lon, None))
            
            # Respect the free tier limit of 1 request per second
            time.sleep(1)
    
    return zip_codes

# Retrieve ZIP codes for the provided coordinates
zip_codes_list = get_zip_codes_locationiq(coordinates_list1, coordinates_list2, api_key_zip)

# Output the results
for lat, lon, zip_code in zip_codes_list:
    print(f"Coordinates: ({lat}, {lon}) => ZIP Code: {zip_code}")


Coordinates: (41.880958, -87.616743) => ZIP Code: 60601
Coordinates: (41.892278, -87.612043) => ZIP Code: 60611
Coordinates: (41.867888, -87.623041) => ZIP Code: 60605
Coordinates: (41.911722, -87.626804) => ZIP Code: 60614
Coordinates: (41.897448, -87.628722) => ZIP Code: 60654
Coordinates: (41.90096039, -87.62377664) => ZIP Code: 60611
Coordinates: (41.869265218438194, -87.67373085021973) => ZIP Code: 60612
Coordinates: (41.880958, -87.616743) => ZIP Code: 60601
Coordinates: (41.897448, -87.628722) => ZIP Code: 60654
Coordinates: (41.912133, -87.634656) => ZIP Code: 60614
Coordinates: (41.8810317, -87.62408432) => ZIP Code: 60601
Coordinates: (41.8810317, -87.62408432) => ZIP Code: 60601
Coordinates: (41.920771, -87.663712) => ZIP Code: 60614
Coordinates: (41.926277, -87.630834) => ZIP Code: 60657
Coordinates: (41.926277, -87.630834) => ZIP Code: 60657
Coordinates: (41.902973, -87.63128) => ZIP Code: 60610
Coordinates: (41.893992, -87.629318) => ZIP Code: 60654
Coordinates: (41.88917

In [10]:
# Find the population for each zip code, using the American Community Survey (ACS) API.  We are using the ACS 5-year estimates as the 1 year estimates only cover areas with populations of 65,000 people or more

# List of ZIP codes from zip_code_list tuple
zip_codes = [tup[2] for tup in zip_codes_list if len(tup)>2]

# Your API key for the U.S. Census Bureau
api_key = api_key_census

# The base URL for the ACS5 5-Year Estimates API
base_url = 'https://api.census.gov/data/2019/acs/acs5'

# The variable code for total population
population_variable = 'B01003_001E'

# Dictionary to store the population data
population_data = {}

# Function to retrieve population by ZIP code
def get_population_by_zip(zip_code):
    parameters = {
        'get': population_variable,
        'for': f'zip code tabulation area:{zip_code}',
        'in': 'state:17',  # Adding the state code for Illinois
        'key': api_key
    }
    try:
        response = requests.get(base_url, params=parameters)
        response.raise_for_status()  # This will raise an HTTPError if the HTTP request returned an unsuccessful status code
        data = response.json()
        # Assuming the first element is headers, the second is data
        return int(data[1][0])
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error for ZIP code {zip_code}: {errh}")
    except requests.exceptions.ConnectionError as errc:
        print(f"Error Connecting for ZIP code {zip_code}: {errc}")
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error for ZIP code {zip_code}: {errt}")
    except requests.exceptions.RequestException as err:
        print(f"Error for ZIP code {zip_code}: {err}")
    return None

# Retrieve the population for each ZIP code and store in the dictionary
for zip_code in zip_codes:
    population_data[zip_code] = get_population_by_zip(zip_code)

# Print out the population data
for zip_code, population in population_data.items():
    if population is not None:
        print(f"The population for ZIP code {zip_code} is {population}")
    else:
        print(f"Population data not available for ZIP code {zip_code}.")

['60601', '60611', '60605', '60614', '60654', '60611', '60612', '60601', '60654', '60614', '60601', '60601', '60614', '60657', '60657', '60610', '60654', '60654', '60654', '60610']
The population for ZIP code 60601 is 15083
The population for ZIP code 60611 is 33224
The population for ZIP code 60605 is 29060
The population for ZIP code 60614 is 71954
The population for ZIP code 60654 is 20022
The population for ZIP code 60612 is 33735
The population for ZIP code 60657 is 70958
The population for ZIP code 60610 is 40548


In [None]:
# Merge population data back with Top 10 Start Stations and Top 10 End Stations


In [None]:
# Close the MongoDB connection
mongo.close()