In [None]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

import pandas as pd
from pymongo import MongoClient
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

def print_mongo(obj):
    """Pretty print MongoDB output"""
    print(json.dumps(obj, indent=2, default=str))

def get_mongo_client(max_retries=5, retry_delay=5):
    """Connect to MongoDB with retry logic"""
    for attempt in range(max_retries):
        try:
            client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
            client.admin.command('ping')
            print("Successfully connected to MongoDB")
            return client
        except Exception as e:
            print(f"Connection attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise

client = get_mongo_client()
db = client.businessdb

In [None]:
org_collection = db["organizations"]

In [None]:
# Count how many organizations exist by industry
pipeline = [
    {"$group": {"_id": "$industry", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
]

results = list(org_collection.aggregate(pipeline))
print("Number of organizations by industry:")
for r in results:
    print_mongo(r)

In [None]:
# Calculate the average number of employees for organizations in each country
pipeline = [
    {"$group": {"_id": "$country", "avgEmployees": {"$avg": "$numberOfEmployees"}}},
    {"$sort": {"avgEmployees": -1}}
]

results = list(org_collection.aggregate(pipeline))
print("\nAverage employees by country:")
for r in results:
    print_mongo(r)

In [None]:
people_collection = db["people"]

In [None]:
# Count how many people by sex
pipeline = [
    {"$group": {"_id": "$sex", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
]

results = list(people_collection.aggregate(pipeline))
print("Number of people by sex:")
for r in results:
    print_mongo(r)

In [None]:
# Group people by year of birth, then count how many were born each year
pipeline = [
    {
        "$group": {
            "_id": {"$year": "$dateOfBirth"},
            "count": {"$sum": 1}
        }
    },
    {"$sort": {"_id": 1}}  # ascending by year
]

results = list(people_collection.aggregate(pipeline))
print("\nPeople grouped by year of birth:")
for r in results:
    print_mongo(r)

In [None]:
customers_collection = db["customers"]

In [None]:
# Count how many customers by country
pipeline = [
    {"$group": {"_id": "$country", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
]

results = list(customers_collection.aggregate(pipeline))
print("Number of customers by country:")
for r in results:
    print_mongo(r)

In [None]:
# Group by subscription year, then count how many customers subscribed each year
pipeline = [
    {
        "$group": {
            "_id": {"$year": "$subscriptionDate"},
            "count": {"$sum": 1}
        }
    },
    {"$sort": {"_id": 1}}
]

results = list(customers_collection.aggregate(pipeline))
print("\nCustomers grouped by subscription year:")
for r in results:
    print_mongo(r)