In [10]:
import pymongo
from pymongo import MongoClient, UpdateOne
import json
from datetime import datetime
from collections import OrderedDict

In [11]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [12]:
# assign the database to a variable name
db = mongo.chicago_bikes

In [13]:
#define collection variables
divvy_rides = db['divvy_ridedata']
weather_daily = db['weather_daily']
divvy_ridedata_merged = db['divvy_ridedata_merged']

In [60]:
divvy_ridedata_merged = db["divvy_ridedata_merged"]

# Define the aggregation pipeline
pipeline = [
    {
        "$group": {
            "_id": {
                "year": {"$year": {"$toDate": "$started_at"}},
                "month": {"$month": {"$toDate": "$started_at"}}
            },
            "total_rides": {"$sum": 1}
        }
    },
    {
        "$project": {
            "_id": 0,
            "year": "$_id.year",
            "month": "$_id.month",
            "total_rides": 1
        }
    },
    {
        "$sort": {"year": 1, "month": 1}
    }
]

# Execute the aggregation pipeline and write to a new collection
divvy_rides_by_month = db["divvy_rides_by_month"]
divvy_rides_by_month.drop()  # Drop the collection
aggregated_result = divvy_ridedata_merged.aggregate(pipeline, allowDiskUse=True, collation=None)

for doc in aggregated_result:
    divvy_rides_by_month.insert_one(doc)

print("Aggregation result has been written to the new collection.")

Aggregation result has been written to the new collection.


In [52]:
print(db.list_collection_names())

['divvy_rides_by_month', 'weather_daily', 'divvy_ridedata_merged', 'divvy_rides_by_season', 'divvy_ridedata']


In [65]:
# Define the projection to include the "month" field and exclude the "_id" field
projection = {"month": 1, "_id": 0}

# Retrieve the first document using find_one() with the projection
result = divvy_rides_by_month.find_one({}, projection)

# Print the result
print(result)


{'month': 1}


In [64]:
# Find all documents in the collection
all_documents = divvy_rides_by_month.find({})

# Print each document
for document in all_documents:
    print(document)

{'_id': ObjectId('64e7ffcbf51c9daca89e4364'), 'total_rides': 103770, 'year': 2022, 'month': 1}
{'_id': ObjectId('64e7ffcbf51c9daca89e4365'), 'total_rides': 115609, 'year': 2022, 'month': 2}
{'_id': ObjectId('64e7ffcbf51c9daca89e4366'), 'total_rides': 284042, 'year': 2022, 'month': 3}
{'_id': ObjectId('64e7ffcbf51c9daca89e4367'), 'total_rides': 371249, 'year': 2022, 'month': 4}
{'_id': ObjectId('64e7ffcbf51c9daca89e4368'), 'total_rides': 634858, 'year': 2022, 'month': 5}
{'_id': ObjectId('64e7ffcbf51c9daca89e4369'), 'total_rides': 769204, 'year': 2022, 'month': 6}
{'_id': ObjectId('64e7ffcbf51c9daca89e436a'), 'total_rides': 823488, 'year': 2022, 'month': 7}
{'_id': ObjectId('64e7ffcbf51c9daca89e436b'), 'total_rides': 785932, 'year': 2022, 'month': 8}
{'_id': ObjectId('64e7ffcbf51c9daca89e436c'), 'total_rides': 701339, 'year': 2022, 'month': 9}
{'_id': ObjectId('64e7ffcbf51c9daca89e436d'), 'total_rides': 558685, 'year': 2022, 'month': 10}
{'_id': ObjectId('64e7ffcbf51c9daca89e436e'), 'to

In [72]:
divvy_rides_by_month = db["divvy_rides_by_month"]

# Define the aggregation pipeline
pipeline = [
    {
        "$group": {
            "_id": {
                "year": "$year",
                "season": {
                    "$switch": {
                        "branches": [
                            {"case": {"$in": ["$month", [3, 4, 5]]}, "then": "Spring"},
                            {"case": {"$in": ["$month", [6, 7, 8]]}, "then": "Summer"},
                            {"case": {"$in": ["$month", [9, 10, 11]]}, "then": "Autumn"},
                            {"case": {"$in": ["$month", [12, 1, 2]]}, "then": "Winter"}
                        ],
                        "default": "Unknown"
                    }
                }
            },
            "total_rides": {"$sum": "$total_rides"}
        }
    },
    {
        "$sort": {"_id.year": 1, "_id.season": 1}
    }
]

# Execute the aggregation pipeline
divvy_rides_by_season = db["divvy_rides_by_season"]
divvy_rides_by_season.drop()  # Drop the collection
aggregated_result = list(divvy_rides_by_month.aggregate(pipeline, allowDiskUse=True, collation=None))

# Insert the aggregated documents into the new collection
for doc in aggregated_result:
    print("Inserting document:", doc)
    divvy_rides_by_season.insert_one(doc)

print("Aggregation by season result has been written to the new collection.")

Inserting document: {'_id': {'year': 2022, 'season': 'Autumn'}, 'total_rides': 1597759}
Inserting document: {'_id': {'year': 2022, 'season': 'Spring'}, 'total_rides': 1290149}
Inserting document: {'_id': {'year': 2022, 'season': 'Summer'}, 'total_rides': 2378624}
Inserting document: {'_id': {'year': 2022, 'season': 'Winter'}, 'total_rides': 401185}
Aggregation by season result has been written to the new collection.


In [73]:
# Find all documents in the collection
all_documents = divvy_rides_by_season.find({})

# Print each document
for document in all_documents:
    print(document)

{'_id': {'year': 2022, 'season': 'Autumn'}, 'total_rides': 1597759}
{'_id': {'year': 2022, 'season': 'Spring'}, 'total_rides': 1290149}
{'_id': {'year': 2022, 'season': 'Summer'}, 'total_rides': 2378624}
{'_id': {'year': 2022, 'season': 'Winter'}, 'total_rides': 401185}
