In [23]:
import requests
import pandas as pd
from pymongo import MongoClient, UpdateOne
import json
import requests
import time
from config import api_key_zip, api_key_census

In [63]:
# Create a new database to hold copies of collections as to not interfere with data in the original collections
# MongoDB connection URI
mongo_uri = "mongodb://localhost:27017/"
mongo = MongoClient(mongo_uri)

# Connect to the original database
db = mongo.chicago_bikes

# Connect to the collections in the original database
start_stations_original = db['Top10StartStations']
end_stations_original = db['Top10EndStations']

# Connect to the new database
db2 = mongo["chicago_bikes_copy"]

# Specify the names of the new collections
new_start_collection_name = 'Top10StartStationsCopy'
new_end_collection_name = 'Top10EndStationsCopy'

# Drop existing collections with the same names in the new database (if needed)
db2[new_start_collection_name].drop()
db2[new_end_collection_name].drop()

# Create new collections in the new database
db2[new_start_collection_name].insert_many(start_stations_original.find())
db2[new_end_collection_name].insert_many(end_stations_original.find())

# Print each document in the new start stations collection to verify the data transfer
print(f"Documents in {new_start_collection_name}:")
for doc in db2[new_start_collection_name].find():
    print(doc)

# Print each document in the new end stations collection to verify the data transfer
print(f"\nDocuments in {new_end_collection_name}:")
for doc in db2[new_end_collection_name].find():
    print(doc)

Documents in Top10StartStationsCopy:
{'_id': 'Streeter Dr & Grand Ave', 'count': 71269, 'latitude': 41.880958, 'longitude': -87.616743}
{'_id': 'DuSable Lake Shore Dr & Monroe St', 'count': 39251, 'latitude': 41.867888, 'longitude': -87.623041}
{'_id': 'DuSable Lake Shore Dr & North Blvd', 'count': 37698, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Michigan Ave & Oak St', 'count': 37208, 'latitude': 41.869265218438194, 'longitude': -87.67373085021973}
{'_id': 'Wells St & Concord Ln', 'count': 34508, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Millennium Park', 'count': 32849, 'latitude': 41.8810317, 'longitude': -87.62408432}
{'_id': 'Clark St & Elm St', 'count': 32560, 'latitude': 41.920771, 'longitude': -87.663712}
{'_id': 'Kingsbury St & Kinzie St', 'count': 31614, 'latitude': 41.893992, 'longitude': -87.629318}
{'_id': 'Theater on the Lake', 'count': 31283, 'latitude': 41.926277, 'longitude': -87.630834}
{'_id': 'Wells St & Elm St', 'count': 28978, 'latitu

In [67]:
# Get the latitude and longitude from the top 10 stations and top 10 end stations

# Connect to collections
new_start_stations_collection = db2.Top10StartStationsCopy
new_end_stations_collection = db2.Top10EndStationsCopy

# Fetch the top 10 start and end stations
top_start_stations = new_start_stations_collection.find({}, {'_id': 0, 'latitude': 1, 'longitude': 1})
top_end_stations = new_end_stations_collection.find({}, {'_id': 0, 'latitude': 1, 'longitude': 1})

# Function to iterate over the cursor and extract lat and long data
def extract_lat_long(cursor):
    return [(station['latitude'], station['longitude']) for station in cursor]

# Extracting latitudes and longitudes
start_station_coordinates = extract_lat_long(top_start_stations)
end_station_coordinates = extract_lat_long(top_end_stations)

# Now there are two lists of tuples containing the latitudes and longitudes
# of the top 10 start and end stations, respectively:
print("Start Stations Coordinates:", start_station_coordinates)
print("End Stations Coordinates:", end_station_coordinates)

Start Stations Coordinates: [(41.880958, -87.616743), (41.867888, -87.623041), (41.897448, -87.628722), (41.869265218438194, -87.67373085021973), (41.897448, -87.628722), (41.8810317, -87.62408432), (41.920771, -87.663712), (41.893992, -87.629318), (41.926277, -87.630834), (41.893992, -87.629318)]
End Stations Coordinates: [(41.892278, -87.612043), (41.911722, -87.626804), (41.880958, -87.616743), (41.90096039, -87.62377664), (41.912133, -87.634656), (41.8810317, -87.62408432), (41.902973, -87.63128), (41.926277, -87.630834), (41.88917683258, -87.6385057718), (41.903222, -87.634324)]


In [68]:
# Pull in the data from Top10StartStations from original database
collection_name = 'Top10StartStationsCopy'
collection = db2[collection_name]
# The find() method without any parameters will return all documents in the collection
documents = collection.find()

# Print each document
for doc in documents:
    print(doc)

{'_id': 'Streeter Dr & Grand Ave', 'count': 71269, 'latitude': 41.880958, 'longitude': -87.616743}
{'_id': 'DuSable Lake Shore Dr & Monroe St', 'count': 39251, 'latitude': 41.867888, 'longitude': -87.623041}
{'_id': 'DuSable Lake Shore Dr & North Blvd', 'count': 37698, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Michigan Ave & Oak St', 'count': 37208, 'latitude': 41.869265218438194, 'longitude': -87.67373085021973}
{'_id': 'Wells St & Concord Ln', 'count': 34508, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Millennium Park', 'count': 32849, 'latitude': 41.8810317, 'longitude': -87.62408432}
{'_id': 'Clark St & Elm St', 'count': 32560, 'latitude': 41.920771, 'longitude': -87.663712}
{'_id': 'Kingsbury St & Kinzie St', 'count': 31614, 'latitude': 41.893992, 'longitude': -87.629318}
{'_id': 'Theater on the Lake', 'count': 31283, 'latitude': 41.926277, 'longitude': -87.630834}
{'_id': 'Wells St & Elm St', 'count': 28978, 'latitude': 41.893992, 'longitude': -87.6293

In [60]:
# Copy the Top 10 Start and End Station collections to edit them without affecting the originals, and add the zip codes to the collections
# Perform the duplication for Top10StartStations
db['Top10StartStations'].aggregate([
    {"$match": {}},  # This matches all documents and effectively copies them
    {"$out": "Top10StartStationsCopy"}  # The name of the new collection for start stations
])

# Perform the duplication for Top10EndStations
db['Top10EndStations'].aggregate([
    {"$match": {}},  # This matches all documents and effectively copies them
    {"$out": "Top10EndStationsCopy"}  # The name of the new collection for end stations
])

<pymongo.command_cursor.CommandCursor at 0x1d115b1b910>

In [61]:
# Check to see that end stations was copied correctly
collection_name = 'Top10EndStationsCopy'
collection = db2[collection_name]
# The find() method without any parameters will return all documents in the collection
documents = collection.find()

# Print each document
for doc in documents:
    print(doc)

In [31]:
# Find the zip codes corresponding to each latitude and longitude using LocationIQ API

# Endpoint URL for reverse geocoding
URL = 'https://us1.locationiq.com/v1/reverse.php'

# Change variable name for start and end station coordinates
coordinates_list1 = start_station_coordinates
coordinates_list2 = end_station_coordinates

# Function to retrieve postal codes from two lists of coordinates 
def get_zip_codes_locationiq(start_station_coordinates, end_station_coordinates, api_key):
    zip_codes = []
    for (lat1, lon1), (lat2, lon2) in zip(start_station_coordinates, end_station_coordinates):
        for lat, lon in [(lat1, lon1), (lat2, lon2)]:
            params = {
                'key': api_key,
                'lat': lat,
                'lon': lon,
                'format': 'json'
            }
            
            # Make the request to LocationIQ
            response = requests.get(URL, params=params)
            
            if response.status_code == 200:
                data = response.json()
                # Check if postcode is available in the response
                zip_code = data.get('address', {}).get('postcode', None)
                zip_codes.append((lat, lon, zip_code))
            else:
                print(f"Error for {lat}, {lon}: {response.text}")
                zip_codes.append((lat, lon, None))
            
            # Respect the free tier limit of 1 request per second
            time.sleep(1)
    
    return zip_codes

# Retrieve ZIP codes for the provided coordinates
zip_codes_list = get_zip_codes_locationiq(coordinates_list1, coordinates_list2, api_key_zip)

# Output the results
for lat, lon, zip_code in zip_codes_list:
    print(f"Coordinates: ({lat}, {lon}) => ZIP Code: {zip_code}")


Coordinates: (41.880958, -87.616743) => ZIP Code: 60601
Coordinates: (41.892278, -87.612043) => ZIP Code: 60611
Coordinates: (41.867888, -87.623041) => ZIP Code: 60605
Coordinates: (41.911722, -87.626804) => ZIP Code: 60614
Coordinates: (41.897448, -87.628722) => ZIP Code: 60654
Coordinates: (41.880958, -87.616743) => ZIP Code: 60601
Coordinates: (41.869265218438194, -87.67373085021973) => ZIP Code: 60612
Coordinates: (41.90096039, -87.62377664) => ZIP Code: 60611
Coordinates: (41.897448, -87.628722) => ZIP Code: 60654
Coordinates: (41.912133, -87.634656) => ZIP Code: 60614
Coordinates: (41.8810317, -87.62408432) => ZIP Code: 60601
Coordinates: (41.8810317, -87.62408432) => ZIP Code: 60601
Coordinates: (41.920771, -87.663712) => ZIP Code: 60614
Coordinates: (41.902973, -87.63128) => ZIP Code: 60610
Coordinates: (41.893992, -87.629318) => ZIP Code: 60654
Coordinates: (41.926277, -87.630834) => ZIP Code: 60657
Coordinates: (41.926277, -87.630834) => ZIP Code: 60657
Coordinates: (41.88917

In [32]:
# Check to make sure the collections were copied correctly
collection_name = 'Top10StartStationsCopy'
collection = db[collection_name]
# The find() method without any parameters will return all documents in the collection
documents = collection.find()

# Print each document
for doc in documents:
    print(doc)

{'_id': 'Streeter Dr & Grand Ave', 'count': 71269, 'latitude': 41.880958, 'longitude': -87.616743}
{'_id': 'DuSable Lake Shore Dr & Monroe St', 'count': 39251, 'latitude': 41.867888, 'longitude': -87.623041}
{'_id': 'DuSable Lake Shore Dr & North Blvd', 'count': 37698, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Michigan Ave & Oak St', 'count': 37208, 'latitude': 41.869265218438194, 'longitude': -87.67373085021973}
{'_id': 'Wells St & Concord Ln', 'count': 34508, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Millennium Park', 'count': 32849, 'latitude': 41.8810317, 'longitude': -87.62408432}
{'_id': 'Clark St & Elm St', 'count': 32560, 'latitude': 41.920771, 'longitude': -87.663712}
{'_id': 'Kingsbury St & Kinzie St', 'count': 31614, 'latitude': 41.893992, 'longitude': -87.629318}
{'_id': 'Theater on the Lake', 'count': 31283, 'latitude': 41.926277, 'longitude': -87.630834}
{'_id': 'Wells St & Elm St', 'count': 28978, 'latitude': 41.893992, 'longitude': -87.6293

In [33]:
# Function to update (and create if doesn't exist) collections with ZIP codes based on matching coordinates
def update_station_zip_codes(collection_name, zip_codes_list):
    collection = db[collection_name]
    for lat, lon, zip_code in zip_codes_list:
        # Ensure the ZIP code is not None
        if zip_code:
            # Build the query to find the matching document by its coordinates
            query = {
                'latitude': lat,
                'longitude': lon
            }
            # Build the update statement to set the ZIP code
            update = {
                '$setOnInsert': query,  # Set the coordinates on insert
                '$set': {'zip_code': zip_code}  # Set the ZIP code on insert or update
            }
            # Update the document in the collection, upserting if it does not exist
            result = collection.update_one(query, update, upsert=False)
            if result.upserted_id is not None:
                print(f"Inserted document with ZIP code {zip_code} for coordinates ({lat}, {lon})")
            else:
                print(f"Updated document with ZIP code {zip_code} for coordinates ({lat}, {lon})")

# Now call the function with the new collection names
update_station_zip_codes('Top10StartStationsCopy', zip_codes_list)
update_station_zip_codes('Top10EndStationsCopy', zip_codes_list)

Updated document with ZIP code 60601 for coordinates (41.880958, -87.616743)
Updated document with ZIP code 60611 for coordinates (41.892278, -87.612043)
Updated document with ZIP code 60605 for coordinates (41.867888, -87.623041)
Updated document with ZIP code 60614 for coordinates (41.911722, -87.626804)
Updated document with ZIP code 60654 for coordinates (41.897448, -87.628722)
Updated document with ZIP code 60601 for coordinates (41.880958, -87.616743)
Updated document with ZIP code 60612 for coordinates (41.869265218438194, -87.67373085021973)
Updated document with ZIP code 60611 for coordinates (41.90096039, -87.62377664)
Updated document with ZIP code 60654 for coordinates (41.897448, -87.628722)
Updated document with ZIP code 60614 for coordinates (41.912133, -87.634656)
Updated document with ZIP code 60601 for coordinates (41.8810317, -87.62408432)
Updated document with ZIP code 60601 for coordinates (41.8810317, -87.62408432)
Updated document with ZIP code 60614 for coordina

In [34]:
# Check to see that end stations was copied correctly
collection_name = 'Top10EndStationsCopy'
collection = db[collection_name]
# The find() method without any parameters will return all documents in the collection
documents = collection.find()

# Print each document
for doc in documents:
    print(doc)

{'_id': 'Streeter Dr & Grand Ave', 'count': 72540, 'latitude': 41.892278, 'longitude': -87.612043, 'zip_code': '60611'}
{'_id': 'DuSable Lake Shore Dr & North Blvd', 'count': 40563, 'latitude': 41.911722, 'longitude': -87.626804, 'zip_code': '60614'}
{'_id': 'DuSable Lake Shore Dr & Monroe St', 'count': 38500, 'latitude': 41.880958, 'longitude': -87.616743, 'zip_code': '60601'}
{'_id': 'Michigan Ave & Oak St', 'count': 38279, 'latitude': 41.90096039, 'longitude': -87.62377664, 'zip_code': '60611'}
{'_id': 'Wells St & Concord Ln', 'count': 34688, 'latitude': 41.912133, 'longitude': -87.634656, 'zip_code': '60614'}
{'_id': 'Millennium Park', 'count': 33705, 'latitude': 41.8810317, 'longitude': -87.62408432, 'zip_code': '60601'}
{'_id': 'Clark St & Elm St', 'count': 32227, 'latitude': 41.902973, 'longitude': -87.63128, 'zip_code': '60610'}
{'_id': 'Theater on the Lake', 'count': 31672, 'latitude': 41.926277, 'longitude': -87.630834, 'zip_code': '60657'}
{'_id': 'Kingsbury St & Kinzie St',

In [35]:
# Check to see that end stations was copied correctly
collection_name = 'Top10StartStationsCopy'
collection = db[collection_name]
# The find() method without any parameters will return all documents in the collection
documents = collection.find()

# Print each document
for doc in documents:
    print(doc)

{'_id': 'Streeter Dr & Grand Ave', 'count': 71269, 'latitude': 41.880958, 'longitude': -87.616743, 'zip_code': '60601'}
{'_id': 'DuSable Lake Shore Dr & Monroe St', 'count': 39251, 'latitude': 41.867888, 'longitude': -87.623041, 'zip_code': '60605'}
{'_id': 'DuSable Lake Shore Dr & North Blvd', 'count': 37698, 'latitude': 41.897448, 'longitude': -87.628722, 'zip_code': '60654'}
{'_id': 'Michigan Ave & Oak St', 'count': 37208, 'latitude': 41.869265218438194, 'longitude': -87.67373085021973, 'zip_code': '60612'}
{'_id': 'Wells St & Concord Ln', 'count': 34508, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Millennium Park', 'count': 32849, 'latitude': 41.8810317, 'longitude': -87.62408432, 'zip_code': '60601'}
{'_id': 'Clark St & Elm St', 'count': 32560, 'latitude': 41.920771, 'longitude': -87.663712, 'zip_code': '60614'}
{'_id': 'Kingsbury St & Kinzie St', 'count': 31614, 'latitude': 41.893992, 'longitude': -87.629318, 'zip_code': '60654'}
{'_id': 'Theater on the Lake', 'count

In [36]:
# Check to make sure the zip codes were put into the collections correctly
# Collections to check
collections_to_check = ['Top10StartStationsCopy', 'Top10EndStationsCopy']

for collection_name in collections_to_check:
    collection = db[collection_name]
    # Fetch all documents in the collection
    documents = collection.find({})

    # Iterate through the documents and check the 'zip_code' field
    for doc in documents:
        if 'zip_code' in doc and doc['zip_code'] is not None:
            print(f"Document ID {doc['_id']} has ZIP code: {doc['zip_code']}")
        else:
            print(f"Document ID {doc['_id']} does not have a ZIP code or it's set to None.")

Document ID Streeter Dr & Grand Ave has ZIP code: 60601
Document ID DuSable Lake Shore Dr & Monroe St has ZIP code: 60605
Document ID DuSable Lake Shore Dr & North Blvd has ZIP code: 60654
Document ID Michigan Ave & Oak St has ZIP code: 60612
Document ID Wells St & Concord Ln does not have a ZIP code or it's set to None.
Document ID Millennium Park has ZIP code: 60601
Document ID Clark St & Elm St has ZIP code: 60614
Document ID Kingsbury St & Kinzie St has ZIP code: 60654
Document ID Theater on the Lake has ZIP code: 60657
Document ID Wells St & Elm St does not have a ZIP code or it's set to None.
Document ID Streeter Dr & Grand Ave has ZIP code: 60611
Document ID DuSable Lake Shore Dr & North Blvd has ZIP code: 60614
Document ID DuSable Lake Shore Dr & Monroe St has ZIP code: 60601
Document ID Michigan Ave & Oak St has ZIP code: 60611
Document ID Wells St & Concord Ln has ZIP code: 60614
Document ID Millennium Park has ZIP code: 60601
Document ID Clark St & Elm St has ZIP code: 60610

In [37]:
# Find the population for each zip code, using the American Community Survey (ACS) API.  We are using the ACS 5-year estimates as the 1 year estimates only cover areas with populations of 65,000 people or more

# List of ZIP codes from zip_code_list tuple
zip_codes = [tup[2] for tup in zip_codes_list if len(tup)>2]

# Your API key for the U.S. Census Bureau
api_key = api_key_census

# The base URL for the ACS5 5-Year Estimates API
base_url = 'https://api.census.gov/data/2019/acs/acs5'

# The variable code for total population
population_variable = 'B01003_001E'

# Dictionary to store the population data
population_data = {}

# Function to retrieve population by ZIP code
def get_population_by_zip(zip_code):
    parameters = {
        'get': population_variable,
        'for': f'zip code tabulation area:{zip_code}',
        'in': 'state:17',  # Adding the state code for Illinois
        'key': api_key
    }
    try:
        response = requests.get(base_url, params=parameters)
        response.raise_for_status()  # This will raise an HTTPError if the HTTP request returned an unsuccessful status code
        data = response.json()
        # Assuming the first element is headers, the second is data
        return int(data[1][0])
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error for ZIP code {zip_code}: {errh}")
    except requests.exceptions.ConnectionError as errc:
        print(f"Error Connecting for ZIP code {zip_code}: {errc}")
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error for ZIP code {zip_code}: {errt}")
    except requests.exceptions.RequestException as err:
        print(f"Error for ZIP code {zip_code}: {err}")
    return None

# Retrieve the population for each ZIP code and store in the dictionary
for zip_code in zip_codes:
    population_data[zip_code] = get_population_by_zip(zip_code)

# Print out the population data
for zip_code, population in population_data.items():
    if population is not None:
        print(f"The population for ZIP code {zip_code} is {population}")
    else:
        print(f"Population data not available for ZIP code {zip_code}.")

The population for ZIP code 60601 is 15083
The population for ZIP code 60611 is 33224
The population for ZIP code 60605 is 29060
The population for ZIP code 60614 is 71954
The population for ZIP code 60654 is 20022
The population for ZIP code 60612 is 33735
The population for ZIP code 60610 is 40548
The population for ZIP code 60657 is 70958


In [38]:
# Add population data back with Top 10 Start Stations and Top 10 End Stations collections

# Connect to collections
start_stations_collection = db.Top10StartStationsCopy
end_stations_collection = db.Top10EndStationsCopy

# Retrieve the population data for each ZIP code from the census API
def get_population_data(api_key_census, zip_codes):
    population_data = {}
    for zip_code in zip_codes:
        # Construct the API request URL
        response = requests.get(
            f"https://api.census.gov/data/2019/acs/acs5",
            params={
                'get': 'B01003_001E',
                'for': f'zip code tabulation area:{zip_code}',
                'in': 'state:17',  # Illinois state code
                'key': api_key_census
            }
        )
        if response.status_code == 200:
            data = response.json()
            # Extract the population count and update the population_data dictionary
            population_data[zip_code] = int(data[1][0])
        else:
            print(f"Failed to retrieve data for ZIP code {zip_code}: {response.text}")
    
    return population_data

# Function to update the station documents with population data
def update_stations_with_population(collection, population_data):
    for zip_code, population in population_data.items():
        # Update documents in the collection with the matching ZIP code
        result = collection.update_many(
            {'zip_code': zip_code},
            {'$set': {'population': population}}
        )
        print(f"Updated {result.modified_count} documents in collection '{collection.name}' with ZIP code {zip_code} to population {population}")

# Retrieve a list of unique ZIP codes from both collections, safely checking for the 'zip_code' field
relevant_zip_codes = []
for collection in [start_stations_collection, end_stations_collection]:
    for doc in collection.find():
        zip_code = doc.get('zip_code')
        if zip_code:
            relevant_zip_codes.append(zip_code)
relevant_zip_codes = list(set(relevant_zip_codes))  # Remove duplicates

# Retrieve population data for the relevant ZIP codes
population_data = get_population_data(api_key_census, relevant_zip_codes)

# Update the collections with the population data
update_stations_with_population(start_stations_collection, population_data)
update_stations_with_population(end_stations_collection, population_data)

Updated 1 documents in collection 'Top10StartStationsCopy' with ZIP code 60605 to population 29060
Updated 2 documents in collection 'Top10StartStationsCopy' with ZIP code 60601 to population 15083
Updated 1 documents in collection 'Top10StartStationsCopy' with ZIP code 60612 to population 33735
Updated 0 documents in collection 'Top10StartStationsCopy' with ZIP code 60611 to population 33224
Updated 2 documents in collection 'Top10StartStationsCopy' with ZIP code 60654 to population 20022
Updated 1 documents in collection 'Top10StartStationsCopy' with ZIP code 60657 to population 70958
Updated 1 documents in collection 'Top10StartStationsCopy' with ZIP code 60614 to population 71954
Updated 0 documents in collection 'Top10StartStationsCopy' with ZIP code 60610 to population 40548
Updated 0 documents in collection 'Top10EndStationsCopy' with ZIP code 60605 to population 29060
Updated 2 documents in collection 'Top10EndStationsCopy' with ZIP code 60601 to population 15083
Updated 0 docu

In [39]:
# Check to make sure the collections were copied correctly
collection_name = 'Top10StartStationsCopy'
collection = db[collection_name]
# The find() method without any parameters will return all documents in the collection
documents = collection.find()

# Print each document
for doc in documents:
    print(doc)

{'_id': 'Streeter Dr & Grand Ave', 'count': 71269, 'latitude': 41.880958, 'longitude': -87.616743, 'zip_code': '60601', 'population': 15083}
{'_id': 'DuSable Lake Shore Dr & Monroe St', 'count': 39251, 'latitude': 41.867888, 'longitude': -87.623041, 'zip_code': '60605', 'population': 29060}
{'_id': 'DuSable Lake Shore Dr & North Blvd', 'count': 37698, 'latitude': 41.897448, 'longitude': -87.628722, 'zip_code': '60654', 'population': 20022}
{'_id': 'Michigan Ave & Oak St', 'count': 37208, 'latitude': 41.869265218438194, 'longitude': -87.67373085021973, 'zip_code': '60612', 'population': 33735}
{'_id': 'Wells St & Concord Ln', 'count': 34508, 'latitude': 41.897448, 'longitude': -87.628722}
{'_id': 'Millennium Park', 'count': 32849, 'latitude': 41.8810317, 'longitude': -87.62408432, 'zip_code': '60601', 'population': 15083}
{'_id': 'Clark St & Elm St', 'count': 32560, 'latitude': 41.920771, 'longitude': -87.663712, 'zip_code': '60614', 'population': 71954}
{'_id': 'Kingsbury St & Kinzie S

In [40]:
# Check to make sure the collections were copied correctly
collection_name = 'Top10EndStationsCopy'
collection = db[collection_name]
# The find() method without any parameters will return all documents in the collection
documents = collection.find()

# Print each document
for doc in documents:
    print(doc)

{'_id': 'Streeter Dr & Grand Ave', 'count': 72540, 'latitude': 41.892278, 'longitude': -87.612043, 'zip_code': '60611', 'population': 33224}
{'_id': 'DuSable Lake Shore Dr & North Blvd', 'count': 40563, 'latitude': 41.911722, 'longitude': -87.626804, 'zip_code': '60614', 'population': 71954}
{'_id': 'DuSable Lake Shore Dr & Monroe St', 'count': 38500, 'latitude': 41.880958, 'longitude': -87.616743, 'zip_code': '60601', 'population': 15083}
{'_id': 'Michigan Ave & Oak St', 'count': 38279, 'latitude': 41.90096039, 'longitude': -87.62377664, 'zip_code': '60611', 'population': 33224}
{'_id': 'Wells St & Concord Ln', 'count': 34688, 'latitude': 41.912133, 'longitude': -87.634656, 'zip_code': '60614', 'population': 71954}
{'_id': 'Millennium Park', 'count': 33705, 'latitude': 41.8810317, 'longitude': -87.62408432, 'zip_code': '60601', 'population': 15083}
{'_id': 'Clark St & Elm St', 'count': 32227, 'latitude': 41.902973, 'longitude': -87.63128, 'zip_code': '60610', 'population': 40548}
{'_i

# Begin analysis of data

In [57]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db2 = client['chicago_bikes_copy']  # Replace 'your_database' with your actual database name
collection = db2['Top10StartStationsCopy']  # Replace 'your_collection' with your actual collection name

# Retrieve data from MongoDB collection
cursor = collection.find({}, {'_id': 0, 'ZIP_code': 1, 'population': 1, 'count': 1})
mongo_data = list(cursor)

# Convert MongoDB data to DataFrame
df_pandas = pd.DataFrame(mongo_data)

# Calculate Pearson correlation coefficient and p-value
correlation_coefficient, p_value = pearsonr(df_pandas['population'], df_pandas['count'])

# Visualize the relationship with a scatter plot
plt.scatter(df_pandas['population'], df_pandas['count'])
plt.title(f'Population vs. Bike Usage\nCorrelation: {correlation_coefficient:.2f}, p-value: {p_value:.4f}')
plt.xlabel('Population')
plt.ylabel('Bike Usage')
plt.show()

KeyError: 'population'

In [19]:
# Close the MongoDB connection
mongo.close()