# Advanced MongoDB Aggregations
Demonstrating complex aggregation pipelines across sharded collections

In [None]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

from pymongo import MongoClient
import pandas as pd
from datetime import datetime, timedelta
import json

def print_mongo(obj):
    print(json.dumps(obj, indent=2, default=str))

client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
db = client.businessdb
print("Connected to MongoDB")

## Example 1: Industry Analysis with Multi-Stage Pipeline
Complex analysis of organizations by industry, country, and employee count

In [None]:
pipeline = [
    # Stage 1: Group by industry and country
    {"$group": {
        "_id": {
            "industry": "$industry",
            "country": "$country"
        },
        "totalCompanies": {"$sum": 1},
        "totalEmployees": {"$sum": "$numberOfEmployees"},
        "avgEmployees": {"$avg": "$numberOfEmployees"},
        "companies": {"$push": {
            "name": "$name",
            "employees": "$numberOfEmployees"
        }}
    }},
    # Stage 2: Group by industry for summary
    {"$group": {
        "_id": "$_id.industry",
        "totalCountries": {"$sum": 1},
        "totalCompanies": {"$sum": "$totalCompanies"},
        "totalEmployees": {"$sum": "$totalEmployees"},
        "avgEmployees": {"$avg": "$avgEmployees"},
        "countries": {"$push": {
            "country": "$_id.country",
            "totalCompanies": "$totalCompanies",
            "totalEmployees": "$totalEmployees",
            "avgEmployees": "$avgEmployees",
            "companies": "$companies"
        }}
    }}
]

result = db.organizations.aggregate(pipeline)
print_mongo(list(result))