# Advanced MongoDB Aggregations
Demonstrating complex aggregation pipelines across sharded collections

In [None]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

from pymongo import MongoClient
import pandas as pd
from datetime import datetime, timedelta
import json

def print_mongo(obj):
    print(json.dumps(obj, indent=2, default=str))

client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
db = client.businessdb
print("Connected to MongoDB")

## 1. Basic Company Statistics
Simple analysis of company sizes:
- Count companies per industry
- Calculate average employee count
- Show top 10 largest industries

In [None]:
pipeline_basic = [
    {"$group": {
        "_id": "$industry",
        "companyCount": {"$sum": 1},
        "avgEmployees": {"$avg": "$numberOfEmployees"},
        "totalEmployees": {"$sum": "$numberOfEmployees"}
    }},
    {"$sort": {"companyCount": -1}},
    {"$limit": 10}
]

print_mongo(list(db.organizations.aggregate(pipeline_basic)))

## 2. Company Age Analysis
Groups companies by founding year:
- Shows company establishment timeline
- Counts companies per decade
- Lists companies in each period

In [None]:
pipeline_age = [
    {"$group": {
        "_id": {"$subtract": [{"$floor": {"$divide": ["$founded", 10]}}, 197]}, 
        "count": {"$sum": 1},
        "companies": {"$push": {
            "name": "$name",
            "founded": "$founded"
        }}
    }},
    {"$sort": {"_id": 1}},
    {"$project": {
        "decade": {"$concat": ["19", {"$toString": {"$multiply": ["$_id", 10]}}, "s"]},
        "count": 1,
        "companies": 1
    }}
]

print_mongo(list(db.organizations.aggregate(pipeline_age)))

## 3. Industry Performance Analysis
This aggregation analyzes industry performance by:
- Grouping companies by industry and country
- Calculating total employees and average company size
- Ranking industries by employment impact
- Shows which industries are dominant in which countries

In [None]:
pipeline_industry = [
    {"$group": {
        "_id": {"industry": "$industry", "country": "$country"},
        "companyCount": {"$sum": 1},
        "totalEmployees": {"$sum": "$numberOfEmployees"},
        "avgCompanySize": {"$avg": "$numberOfEmployees"}
    }},
    {"$sort": {"totalEmployees": -1}},
    {"$group": {
        "_id": "$_id.industry",
        "countries": {"$push": {
            "country": "$_id.country",
            "companies": "$companyCount",
            "employees": "$totalEmployees",
            "avgSize": {"$round": ["$avgCompanySize", 0]}
        }},
        "totalEmployees": {"$sum": "$totalEmployees"}
    }},
    {"$sort": {"totalEmployees": -1}}
]

print_mongo(list(db.organizations.aggregate(pipeline_industry)))

## 4. Customer Subscription Trends
Analyzes customer subscription patterns by:
- Grouping subscriptions by month and country
- Calculating growth rates
- Identifying peak subscription periods
- Shows geographic expansion patterns

In [None]:
pipeline_subscriptions = [
    {"$addFields": {
        "yearMonth": {"$dateToString": {"format": "%Y-%m", "date": "$subscriptionDate"}}
    }},
    {"$group": {
        "_id": {"yearMonth": "$yearMonth", "country": "$country"},
        "newSubscriptions": {"$sum": 1},
        "customers": {"$push": {"name": {"$concat": ["$firstName", " ", "$lastName"]}}}
    }},
    {"$sort": {"_id.yearMonth": 1}},
    {"$group": {
        "_id": "$_id.country",
        "subscriptionTrend": {"$push": {
            "month": "$_id.yearMonth",
            "count": "$newSubscriptions",
            "customers": "$customers"
        }},
        "totalSubscriptions": {"$sum": "$newSubscriptions"}
    }},
    {"$sort": {"totalSubscriptions": -1}}
]

print_mongo(list(db.customers.aggregate(pipeline_subscriptions)))

## 5. Employee Age Distribution Analysis
Complex analysis of workforce demographics:
- Groups employees by age ranges and job titles
- Calculates age distribution in different roles
- Identifies career progression patterns
- Shows demographic trends in different positions

In [None]:
pipeline_demographics = [
    {"$addFields": {
        "age": {"$floor": {
            "$divide": [{"$subtract": ["$$NOW", "$dateOfBirth"]}, 
                        31536000000]  # milliseconds in a year
        }},
        "ageGroup": {
            "$switch": {
                "branches": [
                    {"case": {"$lt": ["$age", 25]}, "then": "18-25"},
                    {"case": {"$lt": ["$age", 35]}, "then": "26-35"},
                    {"case": {"$lt": ["$age", 45]}, "then": "36-45"},
                    {"case": {"$lt": ["$age", 55]}, "then": "46-55"}
                ],
                "default": "55+"
            }
        }
    }},
    {"$group": {
        "_id": {"jobTitle": "$jobTitle", "ageGroup": "$ageGroup"},
        "count": {"$sum": 1},
        "avgAge": {"$avg": "$age"},
        "employees": {"$push": {
            "name": {"$concat": ["$firstName", " ", "$lastName"]},
            "age": "$age"
        }}
    }},
    {"$sort": {"_id.jobTitle": 1, "_id.ageGroup": 1}}
]

print_mongo(list(db.people.aggregate(pipeline_demographics)))

## 6. Temporal Market Analysis
Analyzes market evolution over time:
- Tracks company growth by founding date
- Measures industry expansion rates
- Shows market maturity by sector
- Identifies emerging business trends

In [None]:
pipeline_market_evolution = [
    {"$bucket": {
        "groupBy": "$founded",
        "boundaries": [1970, 1980, 1990, 2000, 2010, 2020, 2025],
        "default": "unknown",
        "output": {
            "companyCount": {"$sum": 1},
            "totalEmployees": {"$sum": "$numberOfEmployees"},
            "industries": {"$addToSet": "$industry"},
            "avgCompanySize": {"$avg": "$numberOfEmployees"},
            "companies": {"$push": {
                "name": "$name",
                "industry": "$industry",
                "employees": "$numberOfEmployees"
            }}
        }
    }},
    {"$sort": {"_id": 1}}
]

print_mongo(list(db.organizations.aggregate(pipeline_market_evolution)))