Final queries for visualizations

In [113]:
import pymongo
from pymongo import MongoClient, UpdateOne
import json

In [114]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [115]:
# Confirm list of databases
print(mongo.list_database_names())

['admin', 'chicago_bikes', 'classDB', 'config', 'fruits_db', 'gardenDB', 'local', 'met', 'travel_db', 'uk_food', 'withStationName']


In [116]:
# Assign the database to a variable name
db = mongo.chicago_bikes

In [117]:
# Review collections in database
print(db.list_collection_names())

['divvy_rides_by_season', 'withLatLong', 'withoutStationName', 'Top10Routes', 'Top10EndStations', 'weather_daily', 'Top10StartStations', 'withStationName', 'divvy_ridedata_merged', 'withDistinctStations', 'divvy_ridedata', 'RouteDistance', 'distinct_station_names', 'divvy_rides_by_month']


In [102]:
# Assign collections to variables 
divvy_rides = db['divvy_ridedata']
weather_daily = db['weather_daily']
divvy_ridedata_merged = db['divvy_ridedata_merged']
withoutStation = db['withoutStationName']
withStation = db['withStationName']
withLatLong = db['withLatLong']

In [53]:
# Count the number of documents in the collections 
print(divvy_rides.count_documents({}))
print(weather_daily.count_documents({}))
print(divvy_ridedata_merged.count_documents({}))
print(withoutStation.count_documents({}))
print(withStation.count_documents({}))
print(withLatLong.count_documents({}))

5667717
365
5667717
427449
4369360
5661859


In [57]:
# Review a document in each collection 
print(db.divvy_ridedata.find_one())
print(db.weather_daily.find_one())
print(db.divvy_ridedata_merged.find_one())
print(db.withoutStationName.find_one())
print(db.withStationName.find_one())
print(db.withLatLong.find_one())

{'_id': ObjectId('64e4163d948aee61d19bb3bc'), 'ride_id': 'A6CF8980A652D272', 'rideable_type': 'electric_bike', 'started_at': '2022-01-10 08:41:56', 'ended_at': '2022-01-10 08:46:17', 'start_station_name': 'Glenwood Ave & Touhy Ave', 'start_station_id': 525, 'end_station_name': 'Clark St & Touhy Ave', 'end_station_id': 'RP-007', 'start_lat': 42.012763, 'start_lng': -87.6659675, 'end_lat': 42.01256011541, 'end_lng': -87.6743671152, 'member_casual': 'casual', 'started_at_date': '2022-01-10', 'started_at_time': '08:41:56', 'ended_at_date': '2022-01-10', 'ended_at_time': '08:46:17', 'weather_data': {'_id': ObjectId('64e434c9b6c5bdee4f41aa4e'), 'date': '2022-01-10', 'cloud_cover': 17, 'precipitation': 0.24, 'min_temp': 9.5, 'max_temp': 20.84, 'morning_temp': 13.69, 'afternoon_temp': 9.73, 'evening_temp': 14, 'night_temp': 20.75, 'max_windspeed': 16.35}, 'cloud_cover': 17, 'precipitation': 0.24, 'min_temp': 9.5, 'max_temp': 20.84, 'morning_temp': 13.69, 'afternoon_temp': 9.73, 'evening_temp':

In [15]:
# Use aggregation pipeline to find top ten start stations
pipeline = [
    {
        "$group": {
            "_id": "$start_station_name",
            "count": {"$sum": 1},
            "latitude": {"$first": "$end_lat"},
            "longitude": {"$first": "$end_lng"}
        }
    },
    {
        "$sort": {"count": -1}
    },
    {
        "$limit": 10
    },
    {   "$out": "Top10StartStations"
}
]
# Perform the aggregation
result = list(withStation.aggregate(pipeline))

# Assign results to variable 
Top10StartStations = db['Top10StartStations']

In [37]:
# Count the number of documents in the collection 
print(Top10StartStations.count_documents({}))

10


In [38]:
# Review a document in the collection 
print(db.Top10StartStations.find_one())

{'_id': 'Streeter Dr & Grand Ave', 'count': 71269, 'latitude': 41.880958, 'longitude': -87.616743}


In [18]:
# Use aggregation pipeline to find top ten end stations
pipeline = [
    {
        "$group": {
            "_id": "$end_station_name",
            "count": {"$sum": 1},
            "latitude": {"$first": "$end_lat"},
            "longitude": {"$first": "$end_lng"}
        }
    },
    {
        "$sort": {"count": -1}
    },
    {
        "$limit": 10
    },
    {   "$out": "Top10EndStations"
}
]
# Perform the aggregation
result = list(withStation.aggregate(pipeline))

# Assign to a variable
Top10EndStations = db['Top10EndStations']

In [39]:
# Count the number of documents in the collection 
print(Top10EndStations.count_documents({}))

10


In [40]:
# Review a document in the collection 
print(db.Top10EndStations.find_one())

{'_id': 'Streeter Dr & Grand Ave', 'count': 72540, 'latitude': 41.892278, 'longitude': -87.612043}


In [42]:
# Create a pipeline query to find the top ten bike routes (by start and end station)
pipeline = [
    {
        "$group": {
            "_id": { "Start Station": "$start_station_name", "End Station": "$end_station_name"},
            "count": {"$sum": 1},
            "start latitude": {"$first": "$start_lat"},
            "start longitude": {"$first": "$start_lng"},
            "end latitude": {"$first": "$end_lat"},
            "end longitude": {"$first": "$end_lng"}
        }
    },
    {"$sort": {"count": -1}
},
    {
        "$limit": 10
},
    {   "$out": "Top10Routes"
}
]
# Perform the aggregation
result = list(withStation.aggregate(pipeline))

# Assign to a variable
Top10Routes = db['Top10Routes']

In [43]:
# Count the number of documents in the collection 
print(Top10Routes.count_documents({}))

10


In [44]:
# Review a document in the collection 
print(db.Top10Routes.find_one())

{'_id': {'Start Station': 'Streeter Dr & Grand Ave', 'End Station': 'Streeter Dr & Grand Ave'}, 'count': 12202, 'latitude': 41.892278, 'longitude': -87.612043}


In [24]:
# Create a pipeline query to find docouments that have lat/long  
pipeline = [
    {
        "$match": {
            "$and": [
                { "start_lat": { "$ne": "" } },
                { "start_lng": { "$ne": "" } },
                { "end_lat": { "$ne": "" } },
                { "end_lng": { "$ne": "" } }
            ]
        }
    }, 
    {"$out": "withLatLong"}
]

# Perform the aggregation
result = list(divvy_ridedata_merged.aggregate(pipeline))

In [25]:
# Create a pipeline query to find distance of each route in descending order by length  
pipeline = [
    {
        "$addFields": {
            "start_lat": { "$toDouble": "$start_lat" },
            "start_lng": { "$toDouble": "$start_lng" },
            "end_lat": { "$toDouble": "$end_lat" },
            "end_lng": { "$toDouble": "$end_lng" }
        }
    },
    {
        "$addFields": {
            "distance": {
                "$sqrt": {
                    "$add": [
                        {
                            "$pow": [
                                { "$subtract": ["$end_lat", "$start_lat"] },
                                2
                            ]
                        },
                        {
                            "$pow": [
                                {
                                    "$multiply": [
                                        { "$subtract": ["$end_lng", "$start_lng"] },
                                        { "$cos": { "$avg": ["$start_lat", "$end_lat"] } }
                                    ]
                                },
                                2
                            ]
                        }
                    ]
                }
            }
        }
    },
    {
        "$sort": {"distance": -1}
    },
    {"$out": "RouteDistance"}
]

# Perform the aggregation
result = list(withLatLong.aggregate(pipeline))

# Assign to a variable
RouteDistance = db['RouteDistance']

In [26]:
# Count the number of documents in the collection 
print(RouteDistance.count_documents({}))

5661859


In [27]:
# Find the first 10 documents
documents = RouteDistance.find().sort("distance", -1).limit(10)

# Print the documents
for doc in documents:
    print(doc)

{'_id': ObjectId('64e416a13e678b4792669420'), 'afternoon_temp': 48.54, 'cloud_cover': 75, 'end_lat': 0.0, 'end_lng': 0.0, 'end_station_id': 'chargingstx07', 'end_station_name': 'Green St & Madison Ave*', 'ended_at': '2022-11-09 12:26:18', 'ended_at_date': '2022-11-09', 'ended_at_time': '12:26:18', 'evening_temp': 51.37, 'max_temp': 60.37, 'max_windspeed': 13.8, 'member_casual': 'member', 'min_temp': 47.35, 'morning_temp': 51.44, 'night_temp': 51.82, 'precipitation': 0, 'ride_id': 'E9495F1DC3475D41', 'rideable_type': 'classic_bike', 'start_lat': 41.884114, 'start_lng': -87.654264, 'start_station_id': 18062, 'start_station_name': 'Aberdeen St & Randolph St', 'started_at': '2022-11-09 12:21:55', 'started_at_date': '2022-11-09', 'started_at_time': '12:21:55', 'weather_data': {'_id': ObjectId('64e434c9b6c5bdee4f41ab80'), 'date': '2022-11-09', 'cloud_cover': 75, 'precipitation': 0, 'min_temp': 47.35, 'max_temp': 60.37, 'morning_temp': 51.44, 'afternoon_temp': 48.54, 'evening_temp': 51.37, 'n

In [148]:
db["divvy_rides_by_month"].drop()

In [149]:
# Define the aggregation pipeline to pull rides by month 
pipeline = [
    {
        "$group": {
            "_id": {
                "year": {"$year": {"$toDate": "$started_at"}},
                "month": {"$month": {"$toDate": "$started_at"}}
            },
            "total_rides": {"$sum": 1}
        }
    },
    {
        "$project": {
            "_id": 0,
            "year": "$_id.year",
            "month": "$_id.month",
            "total_rides": 1
        }
    },
    {
        "$sort": {"year": 1, "month": 1}
    }
]

# Execute the aggregation pipeline and write to a new collection
divvy_rides_by_month = db["divvy_rides_by_month"]
divvy_rides_by_month.drop()  # Drop the collection
aggregated_result = divvy_ridedata_merged.aggregate(pipeline, allowDiskUse=True, collation=None)

for doc in aggregated_result:
    divvy_rides_by_month.insert_one(doc)

print("Aggregation result has been written to the new collection.")

Aggregation result has been written to the new collection.


In [29]:
# Define the aggregation pipeline to define divvy rides by season 
pipeline = [
    {
        "$group": {
            "_id": {
                "year": "$year",
                "season": {
                    "$switch": {
                        "branches": [
                            {"case": {"$in": ["$month", [3, 4, 5]]}, "then": "Spring"},
                            {"case": {"$in": ["$month", [6, 7, 8]]}, "then": "Summer"},
                            {"case": {"$in": ["$month", [9, 10, 11]]}, "then": "Autumn"},
                            {"case": {"$in": ["$month", [12, 1, 2]]}, "then": "Winter"}
                        ],
                        "default": "Unknown"
                    }
                }
            },
            "total_rides": {"$sum": "$total_rides"}
        }
    },
    {
        "$sort": {"_id.year": 1, "_id.season": 1}
    }
]

# Execute the aggregation pipeline
divvy_rides_by_season = db["divvy_rides_by_season"]
divvy_rides_by_season.drop()  # Drop the collection
aggregated_result = list(divvy_rides_by_month.aggregate(pipeline, allowDiskUse=True, collation=None))

# Insert the aggregated documents into the new collection
for doc in aggregated_result:
    print("Inserting document:", doc)
    divvy_rides_by_season.insert_one(doc)

print("Aggregation by season result has been written to the new collection.")

Inserting document: {'_id': {'year': 2022, 'season': 'Autumn'}, 'total_rides': 1597759}
Inserting document: {'_id': {'year': 2022, 'season': 'Spring'}, 'total_rides': 1290149}
Inserting document: {'_id': {'year': 2022, 'season': 'Summer'}, 'total_rides': 2378624}
Inserting document: {'_id': {'year': 2022, 'season': 'Winter'}, 'total_rides': 401185}
Aggregation by season result has been written to the new collection.


In [89]:
distinct_station_collection.find_one()


{'_id': ObjectId('64eabe40edcee4fd24d89160'),
 'start_station_name': 'Panama Ave & Grace St'}

In [150]:
print(db.list_collection_names())

['divvy_rides_by_season', 'distinct_station_names', 'withLatLong', 'withoutStationName', 'Top10Routes', 'Top10EndStations', 'divvy_rides_by_month', 'weather_daily', 'Top10StartStations', 'withStationName', 'divvy_ridedata_merged', 'divvy_ridedata', 'RouteDistance']


In [146]:
from bson import ObjectId

# Get distinct station names along with start_lat and start_lng
distinct_station_data = db["withStationName"].aggregate([
    {
        "$group": {
            "_id": "$start_station_name",
            "start_lat": {"$first": "$start_lat"},
            "start_lng": {"$first": "$start_lng"}
        }
    }
])

collection_name = "distinct_station_names"
station_names = db[collection_name]

station_name_documents = [{"start_station_name": name} for name in distinct_station_names]

station_name_documents = []
for data in distinct_station_data:
    station_name_documents.append({
        "start_station_name": data["_id"],
        "start_lat": data["start_lat"],
        "start_lng": data["start_lng"],
        "_id": str(ObjectId())
    })

station_names.insert_many(station_name_documents)

print(f"{len(station_name_documents)} distinct station names imported into '{collection_name}' collection.")

1556 distinct station names imported into 'distinct_station_names' collection.


In [147]:
station_names.find_one()


{'_id': '64eb6ac9edcee4fd24d8d441',
 'start_station_name': 'Eckhart Park',
 'start_lat': 41.89637337,
 'start_lng': -87.66098386}

In [212]:
# Define the aggregation pipeline to pull rides by month 

# Use aggregation pipeline to create a collection that contains start and end station names
pipeline = [
         {"$match": {"weather_data.sig_prcp": {"$exists": True, "$eq": "yes"},
                     }},
         {"$out": "sig_prcp_yes"}
         
]
# Perform the aggregation
result = list(divvy_ridedata_merged.aggregate(pipeline))
# Assign to a variable
sig_prcp_yes = db["sig_prcp_yes"]


In [169]:
# Define the aggregation pipeline to pull rides by month 

# Use aggregation pipeline to create a collection that contains start and end station names
pipeline = [
         {"$match": {"weather_data.sig_prcp": {"$exists": True, "$eq": "no"},
                     }},
         {"$out": "sig_prcp_no"}
         
]
# Perform the aggregation
result = list(divvy_ridedata_merged.aggregate(pipeline))
# Assign to a variable
sig_prcp_no = db["sig_prcp_no"]

In [242]:
# Define the aggregation pipeline to pull rides by month 

pipeline = [
    {
        "$group": {
            "_id": {
                "year": {"$year": {"$toDate": "$started_at"}},
                "month": {"$month": {"$toDate": "$started_at"}}
            },
            "total_rides": {"$sum": 1}
        }
    },
    {
        "$project": {
            "_id": 0,
            "year": "$_id.year",
            "month": "$_id.month",
            "total_rides": 1
        }
    },
    {
        "$sort": {"year": 1, "month": 1}
    }
]

# Execute the aggregation pipeline and write to a new collection
sig_prcp_yes_month = db["sig_prcp_yes_month"]
#divvy_rides_by_month = db["divvy_rides_by_month"]
sig_prcp_yes_month.drop()  # Drop the collection
aggregated_result = sig_prcp_yes.aggregate(pipeline, allowDiskUse=True, collation=None)

for doc in aggregated_result:
    sig_prcp_yes_month.insert_one(doc)

print("Aggregation result has been written to the new collection.")

# Function to insert number of days and sig_prcp count for each month
def update_num_days_and_sig_prcp_count(year, month):
    _, num_days = monthrange(year, month)
    query = {'year': year, 'month': month}
    update_query = {'$set': {'num_days': num_days}}
    sig_prcp_yes_month.update_one(query, update_query)

    # Count the number of days with sig_prcp = 'no' for the given month
# Count the number of days with sig_prcp = 'no' for the given month
    sig_prcp_count = weather_daily.count_documents({
        'date': {'$regex': f'^{year:04d}-{month:02d}'},  # Match the year and month in the date field
        'sig_prcp': 'yes'
    })
    print("Debug: year =", year, "month =", month, "sig_prcp_count =", sig_prcp_count)
    update_query = {'$set': {'sig_prcp_count': sig_prcp_count}}
    sig_prcp_yes_month.update_one(query, update_query)

    # Calculate average rides per day
    total_rides = sig_prcp_yes_month.find_one(query)['total_rides']
    sig_prcp_yes_month.update_one(query, update_query)

    # Calculate and update average rides per day with no significant precipitation
    query = {'year': 2022, 'month': month}
    document = sig_prcp_yes_month.find_one(query)
    
    if document['sig_prcp_count'] > 0:  # To avoid division by zero
        average_rides_per_day = document['total_rides'] / document['sig_prcp_count']
        update_query = {'$set': {'average_rides_per_day': average_rides_per_day}}
        sig_prcp_yes_month.update_one(query, update_query)

# Loop through each month in the year 2022
for month in range(1, 13):
    update_num_days_and_sig_prcp_count(2022, month)

print("Num days for 2022 inserted successfully!")



Aggregation result has been written to the new collection.
Debug: year = 2022 month = 1 sig_prcp_count = 5
Debug: year = 2022 month = 2 sig_prcp_count = 8
Debug: year = 2022 month = 3 sig_prcp_count = 9
Debug: year = 2022 month = 4 sig_prcp_count = 11
Debug: year = 2022 month = 5 sig_prcp_count = 4
Debug: year = 2022 month = 6 sig_prcp_count = 3
Debug: year = 2022 month = 7 sig_prcp_count = 6
Debug: year = 2022 month = 8 sig_prcp_count = 4
Debug: year = 2022 month = 9 sig_prcp_count = 2
Debug: year = 2022 month = 10 sig_prcp_count = 5
Debug: year = 2022 month = 11 sig_prcp_count = 5
Debug: year = 2022 month = 12 sig_prcp_count = 6
Num days for 2022 inserted successfully!


In [266]:
# Define the aggregation pipeline to pull rides by month 

pipeline = [
    {
        "$group": {
            "_id": {
                "year": {"$year": {"$toDate": "$started_at"}},
                "month": {"$month": {"$toDate": "$started_at"}}
            },
            "total_rides": {"$sum": 1}
        }
    },
    {
        "$project": {
            "_id": 0,
            "year": "$_id.year",
            "month": "$_id.month",
            "total_rides": 1
        }
    },
    {
        "$sort": {"year": 1, "month": 1}
    }
]

# Execute the aggregation pipeline and write to a new collection
sig_prcp_no_month = db["sig_prcp_no_month"]
#divvy_rides_by_month = db["divvy_rides_by_month"]
sig_prcp_no_month.drop()  # Drop the collection
aggregated_result = sig_prcp_no.aggregate(pipeline, allowDiskUse=True, collation=None)

for doc in aggregated_result:
    sig_prcp_no_month.insert_one(doc)

print("Aggregation result has been written to the new collection.")

def update_num_days_and_sig_prcp_count(year, month):
    _, num_days = monthrange(year, month)
    query = {'year': year, 'month': month}
    update_query = {'$set': {'num_days': num_days}}
    sig_prcp_no_month.update_one(query, update_query)

    # Count the number of days with sig_prcp = 'no' for the given month
    sig_prcp_count = weather_daily.count_documents({
        'date': {'$regex': f'^{year:04d}-{month:02d}'},
        'sig_prcp': 'no'
    })
    print("Debug: year =", year, "month =", month, "sig_prcp_count =", sig_prcp_count)
    update_query = {'$set': {'sig_prcp_count': sig_prcp_count}}
    sig_prcp_no_month.update_one(query, update_query)

    # Calculate average rides per day
    document = sig_prcp_no_month.find_one(query)
    
    if document['sig_prcp_count'] > 0:
        total_rides = document['total_rides']
        average_rides_per_day = total_rides / document['sig_prcp_count']
        update_query = {'$set': {'average_rides_per_day': average_rides_per_day}}
        sig_prcp_no_month.update_one(query, update_query)

# Loop through each month in the year 2022
for month in range(1, 13):
    update_num_days_and_sig_prcp_count(2022, month)
print("Num days for 2022 inserted successfully!")


Aggregation result has been written to the new collection.
Debug: year = 2022 month = 1 sig_prcp_count = 26
Debug: year = 2022 month = 2 sig_prcp_count = 20
Debug: year = 2022 month = 3 sig_prcp_count = 22
Debug: year = 2022 month = 4 sig_prcp_count = 19
Debug: year = 2022 month = 5 sig_prcp_count = 27
Debug: year = 2022 month = 6 sig_prcp_count = 27
Debug: year = 2022 month = 7 sig_prcp_count = 25
Debug: year = 2022 month = 8 sig_prcp_count = 27
Debug: year = 2022 month = 9 sig_prcp_count = 28
Debug: year = 2022 month = 10 sig_prcp_count = 26
Debug: year = 2022 month = 11 sig_prcp_count = 25
Debug: year = 2022 month = 12 sig_prcp_count = 25
Num days for 2022 inserted successfully!


In [275]:
sig_prcp_no_month.find_one()

{'_id': ObjectId('64eb9f63edcee4fd24d8dbb1'), 'total_rides': 94462, 'year': 2022, 'month': 1, 'num_days': 31, 'sig_prcp_count': 26, 'average_rides_per_day': 3633.153846153846}

In [279]:
# Create a new collection to store documents with string _id
sig_prcp_no_month_string.drop()
sig_prcp_no_month_string = db["sig_prcp_no_month_with_string_id"]

# Iterate through the documents in the original collection
for document in sig_prcp_no_month.find({}):
    document_id = document['_id']
    string_id = str(document_id)
    
    # Create a new document with the string _id and other fields
    new_document = {
        '_id': string_id,
        'year': document['year'],
        'month': document['month'],
        'total_rides': document['total_rides'],
        'num_days': document['num_days'],
        'sig_prcp_count': document['sig_prcp_count'],
        'average_rides_per_day': document['average_rides_per_day']
        # Include other fields from the original document
    }
    
    # Insert the new document into the new collection
    sig_prcp_no_month_string.insert_one(new_document)

print("Documents with string _id inserted into the new collection.")

Documents with string _id inserted into the new collection.


In [285]:
# Create a new collection to store documents with string _id
sig_prcp_yes_month_string.drop()
sig_prcp_yes_month_string = db["sig_prcp_yes_month_with_string_id"]

# Iterate through the documents in the original collection
for document in sig_prcp_yes_month.find({}):
    document_id = document['_id']
    string_id = str(document_id)
    
    # Create a new document with the string _id and other fields
    new_document = {
        '_id': string_id,
        'year': document['year'],
        'month': document['month'],
        'total_rides': document['total_rides'],
        'num_days': document['num_days'],
        'sig_prcp_count': document['sig_prcp_count'],
        'average_rides_per_day': document['average_rides_per_day']
        # Include other fields from the original document
    }
    
    # Insert the new document into the new collection
    sig_prcp_yes_month_string.insert_one(new_document)

print("Documents with string _id inserted into the new collection.")

Documents with string _id inserted into the new collection.


In [286]:
# Retrieve all documents from the collection
all_documents = sig_prcp_yes_month_string.find({})

# Iterate through the documents and print them
for doc in all_documents:
    print(doc)

{'_id': '64eb9a3cedcee4fd24d8db45', 'year': 2022, 'month': 1, 'total_rides': 9308, 'num_days': 31, 'sig_prcp_count': 5, 'average_rides_per_day': 1861.6}
{'_id': '64eb9a3cedcee4fd24d8db46', 'year': 2022, 'month': 2, 'total_rides': 26036, 'num_days': 28, 'sig_prcp_count': 8, 'average_rides_per_day': 3254.5}
{'_id': '64eb9a3cedcee4fd24d8db47', 'year': 2022, 'month': 3, 'total_rides': 71873, 'num_days': 31, 'sig_prcp_count': 9, 'average_rides_per_day': 7985.888888888889}
{'_id': '64eb9a3cedcee4fd24d8db48', 'year': 2022, 'month': 4, 'total_rides': 147085, 'num_days': 30, 'sig_prcp_count': 11, 'average_rides_per_day': 13371.363636363636}
{'_id': '64eb9a3cedcee4fd24d8db49', 'year': 2022, 'month': 5, 'total_rides': 49842, 'num_days': 31, 'sig_prcp_count': 4, 'average_rides_per_day': 12460.5}
{'_id': '64eb9a3cedcee4fd24d8db4a', 'year': 2022, 'month': 6, 'total_rides': 58888, 'num_days': 30, 'sig_prcp_count': 3, 'average_rides_per_day': 19629.333333333332}
{'_id': '64eb9a3cedcee4fd24d8db4b', 'ye

In [287]:
# Retrieve all documents from the collection
all_documents = sig_prcp_no_month.find({})

# Iterate through the documents and print them
for doc in all_documents:
    print(doc)

{'_id': ObjectId('64eb9f63edcee4fd24d8dbb1'), 'total_rides': 94462, 'year': 2022, 'month': 1, 'num_days': 31, 'sig_prcp_count': 26, 'average_rides_per_day': 3633.153846153846}
{'_id': ObjectId('64eb9f63edcee4fd24d8dbb2'), 'total_rides': 89573, 'year': 2022, 'month': 2, 'num_days': 28, 'sig_prcp_count': 20, 'average_rides_per_day': 4478.65}
{'_id': ObjectId('64eb9f63edcee4fd24d8dbb3'), 'total_rides': 212169, 'year': 2022, 'month': 3, 'num_days': 31, 'sig_prcp_count': 22, 'average_rides_per_day': 9644.045454545454}
{'_id': ObjectId('64eb9f63edcee4fd24d8dbb4'), 'total_rides': 224164, 'year': 2022, 'month': 4, 'num_days': 30, 'sig_prcp_count': 19, 'average_rides_per_day': 11798.105263157895}
{'_id': ObjectId('64eb9f63edcee4fd24d8dbb5'), 'total_rides': 585016, 'year': 2022, 'month': 5, 'num_days': 31, 'sig_prcp_count': 27, 'average_rides_per_day': 21667.25925925926}
{'_id': ObjectId('64eb9f63edcee4fd24d8dbb6'), 'total_rides': 710316, 'year': 2022, 'month': 6, 'num_days': 30, 'sig_prcp_count