In [1]:
import pymongo
from pymongo import MongoClient, UpdateOne
import json
from datetime import datetime
from collections import OrderedDict

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the database to a variable name
db = mongo.chicago_bikes

In [4]:
#define collection variables
divvy_rides = db['divvy_ridedata']
weather_daily = db['weather_daily']
divvy_ridedata_merged = db['divvy_ridedata_merged']

In [5]:
divvy_ridedata_merged = db["divvy_ridedata_merged"]

# Define the aggregation pipeline
pipeline = [
    {
        "$group": {
            "_id": {
                "year": {"$year": {"$toDate": "$started_at"}},
                "month": {"$month": {"$toDate": "$started_at"}}
            },
            "total_rides": {"$sum": 1}
        }
    },
    {
        "$project": {
            "_id": 0,
            "year": "$_id.year",
            "month": "$_id.month",
            "total_rides": 1
        }
    },
    {
        "$sort": {"year": 1, "month": 1}
    }
]

# Execute the aggregation pipeline and write to a new collection
divvy_rides_by_month = db["divvy_rides_by_month"]
divvy_rides_by_month.drop()  # Drop the collection
aggregated_result = divvy_ridedata_merged.aggregate(pipeline, allowDiskUse=True, collation=None)

for doc in aggregated_result:
    divvy_rides_by_month.insert_one(doc)

print("Aggregation result has been written to the new collection.")

Aggregation result has been written to the new collection.


In [6]:
print(db.list_collection_names())

['weather_daily', 'withLatLong', 'Top10Routes', 'Top10EndStations', 'withStationName', 'withoutStationName', 'Top10StartStations', 'divvy_ridedata', 'divvy_rides_by_month', 'divvy_ridedata_merged', 'RouteDistance']


In [7]:
# Define the projection to include the "month" field and exclude the "_id" field
projection = {"month": 1, "_id": 0}

# Retrieve the first document using find_one() with the projection
result = divvy_rides_by_month.find_one({}, projection)

# Print the result
print(result)


{'month': 1}


In [8]:
# Find all documents in the collection
all_documents = divvy_rides_by_month.find({})

# Print each document
for document in all_documents:
    print(document)

{'_id': ObjectId('64ea1cb2e1f1c38dc7ab9ffe'), 'total_rides': 103770, 'year': 2022, 'month': 1}
{'_id': ObjectId('64ea1cb2e1f1c38dc7ab9fff'), 'total_rides': 115609, 'year': 2022, 'month': 2}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba000'), 'total_rides': 284042, 'year': 2022, 'month': 3}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba001'), 'total_rides': 371249, 'year': 2022, 'month': 4}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba002'), 'total_rides': 634858, 'year': 2022, 'month': 5}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba003'), 'total_rides': 769204, 'year': 2022, 'month': 6}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba004'), 'total_rides': 823488, 'year': 2022, 'month': 7}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba005'), 'total_rides': 785932, 'year': 2022, 'month': 8}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba006'), 'total_rides': 701339, 'year': 2022, 'month': 9}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba007'), 'total_rides': 558685, 'year': 2022, 'month': 10}
{'_id': ObjectId('64ea1cb2e1f1c38dc7aba008'), 'to

In [9]:
divvy_rides_by_month = db["divvy_rides_by_month"]

# Define the aggregation pipeline
pipeline = [
    {
        "$group": {
            "_id": {
                "year": "$year",
                "season": {
                    "$switch": {
                        "branches": [
                            {"case": {"$in": ["$month", [3, 4, 5]]}, "then": "Spring"},
                            {"case": {"$in": ["$month", [6, 7, 8]]}, "then": "Summer"},
                            {"case": {"$in": ["$month", [9, 10, 11]]}, "then": "Autumn"},
                            {"case": {"$in": ["$month", [12, 1, 2]]}, "then": "Winter"}
                        ],
                        "default": "Unknown"
                    }
                }
            },
            "total_rides": {"$sum": "$total_rides"}
        }
    },
    {
        "$sort": {"_id.year": 1, "_id.season": 1}
    }
]

# Execute the aggregation pipeline
divvy_rides_by_season = db["divvy_rides_by_season"]
divvy_rides_by_season.drop()  # Drop the collection
aggregated_result = list(divvy_rides_by_month.aggregate(pipeline, allowDiskUse=True, collation=None))

# Insert the aggregated documents into the new collection
for doc in aggregated_result:
    print("Inserting document:", doc)
    divvy_rides_by_season.insert_one(doc)

print("Aggregation by season result has been written to the new collection.")

Inserting document: {'_id': {'year': 2022, 'season': 'Autumn'}, 'total_rides': 1597759}
Inserting document: {'_id': {'year': 2022, 'season': 'Spring'}, 'total_rides': 1290149}
Inserting document: {'_id': {'year': 2022, 'season': 'Summer'}, 'total_rides': 2378624}
Inserting document: {'_id': {'year': 2022, 'season': 'Winter'}, 'total_rides': 401185}
Aggregation by season result has been written to the new collection.


In [10]:
# Find all documents in the collection
all_documents = divvy_rides_by_season.find({})

# Print each document
for document in all_documents:
    print(document)

{'_id': {'year': 2022, 'season': 'Autumn'}, 'total_rides': 1597759}
{'_id': {'year': 2022, 'season': 'Spring'}, 'total_rides': 1290149}
{'_id': {'year': 2022, 'season': 'Summer'}, 'total_rides': 2378624}
{'_id': {'year': 2022, 'season': 'Winter'}, 'total_rides': 401185}


In [11]:
Top10StartStations = db['Top10StartStations'].find()
db.Top10StartStations.find_one()

{'_id': 'Streeter Dr & Grand Ave',
 'count': 71269,
 'latitude': 41.880958,
 'longitude': -87.616743}

In [12]:
db.Top10StartStations.find_one()

{'_id': 'Streeter Dr & Grand Ave',
 'count': 71269,
 'latitude': 41.880958,
 'longitude': -87.616743}