In [3]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

import pandas as pd
from pymongo import MongoClient
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

def print_mongo(obj):
    """Pretty print MongoDB output"""
    print(json.dumps(obj, indent=2, default=str))

def get_mongo_client(max_retries=5, retry_delay=5):
    """Connect to MongoDB with retry logic"""
    for attempt in range(max_retries):
        try:
            client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
            client.admin.command('ping')
            print("Successfully connected to MongoDB")
            return client
        except Exception as e:
            print(f"Connection attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise

client = get_mongo_client()
db = client.businessdb

Successfully connected to MongoDB


In [4]:
org_collection = db["organizations"]

In [6]:
# Count how many organizations exist by industry
pipeline = [
    {"$group": {"_id": "$industry", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
]

results = list(org_collection.aggregate(pipeline))
print("Number of organizations by industry:")
for r in results:
    print_mongo(r)

Number of organizations by industry:
{
  "_id": "Technology",
  "count": 101
}
{
  "_id": "Plastics",
  "count": 4
}
{
  "_id": "Military Industry",
  "count": 3
}
{
  "_id": "Primary / Secondary Education",
  "count": 3
}
{
  "_id": "Printing",
  "count": 3
}
{
  "_id": "Facilities Services",
  "count": 3
}
{
  "_id": "Transportation",
  "count": 2
}
{
  "_id": "Human Resources / HR",
  "count": 2
}
{
  "_id": "Consumer Electronics",
  "count": 2
}
{
  "_id": "Broadcast Media",
  "count": 2
}
{
  "_id": "Museums / Institutions",
  "count": 2
}
{
  "_id": "Pharmaceuticals",
  "count": 2
}
{
  "_id": "Online Publishing",
  "count": 2
}
{
  "_id": "Software",
  "count": 2
}
{
  "_id": "Investment Management / Hedge Fund / Private Equity",
  "count": 2
}
{
  "_id": "Capital Markets / Hedge Fund / Private Equity",
  "count": 2
}
{
  "_id": "Civic / Social Organization",
  "count": 2
}
{
  "_id": "Luxury Goods / Jewelry",
  "count": 2
}
{
  "_id": "Legislative Office",
  "count": 2
}
{
  "_

In [7]:
# Calculate the average number of employees for organizations in each country
pipeline = [
    {"$group": {"_id": "$country", "avgEmployees": {"$avg": "$numberOfEmployees"}}},
    {"$sort": {"avgEmployees": -1}}
]

results = list(org_collection.aggregate(pipeline))
print("\nAverage employees by country:")
for r in results:
    print_mongo(r)


Average employees by country:
{
  "_id": "Guatemala",
  "avgEmployees": 9995.0
}
{
  "_id": "Uzbekistan",
  "avgEmployees": 9698.0
}
{
  "_id": "Germany",
  "avgEmployees": 9443.0
}
{
  "_id": "Brazil",
  "avgEmployees": 9315.0
}
{
  "_id": "Vietnam",
  "avgEmployees": 9097.0
}
{
  "_id": "United Arab Emirates",
  "avgEmployees": 9079.0
}
{
  "_id": "Tonga",
  "avgEmployees": 9069.0
}
{
  "_id": "Grenada",
  "avgEmployees": 9067.0
}
{
  "_id": "Denmark",
  "avgEmployees": 9011.0
}
{
  "_id": "Monaco",
  "avgEmployees": 8987.0
}
{
  "_id": "Palau",
  "avgEmployees": 8741.0
}
{
  "_id": "Honduras",
  "avgEmployees": 8508.0
}
{
  "_id": "Liechtenstein",
  "avgEmployees": 8433.0
}
{
  "_id": "Marshall Islands",
  "avgEmployees": 8245.0
}
{
  "_id": "El Salvador",
  "avgEmployees": 8172.0
}
{
  "_id": "Kuwait",
  "avgEmployees": 8167.0
}
{
  "_id": "Pakistan",
  "avgEmployees": 8156.0
}
{
  "_id": "Botswana",
  "avgEmployees": 7961.0
}
{
  "_id": "Mauritius",
  "avgEmployees": 7870.0
}
{
 

In [8]:
people_collection = db["people"]

In [9]:
# Count how many people by sex
pipeline = [
    {"$group": {"_id": "$sex", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
]

results = list(people_collection.aggregate(pipeline))
print("Number of people by sex:")
for r in results:
    print_mongo(r)

Number of people by sex:
{
  "_id": "Female",
  "count": 53
}
{
  "_id": "Male",
  "count": 47
}


In [10]:
# Group people by year of birth, then count how many were born each year
pipeline = [
    {
        "$group": {
            "_id": {"$year": "$dateOfBirth"},
            "count": {"$sum": 1}
        }
    },
    {"$sort": {"_id": 1}}  # ascending by year
]

results = list(people_collection.aggregate(pipeline))
print("\nPeople grouped by year of birth:")
for r in results:
    print_mongo(r)


People grouped by year of birth:
{
  "_id": 1908,
  "count": 3
}
{
  "_id": 1909,
  "count": 1
}
{
  "_id": 1910,
  "count": 2
}
{
  "_id": 1911,
  "count": 2
}
{
  "_id": 1915,
  "count": 1
}
{
  "_id": 1916,
  "count": 1
}
{
  "_id": 1921,
  "count": 1
}
{
  "_id": 1924,
  "count": 2
}
{
  "_id": 1925,
  "count": 1
}
{
  "_id": 1926,
  "count": 1
}
{
  "_id": 1927,
  "count": 1
}
{
  "_id": 1930,
  "count": 3
}
{
  "_id": 1931,
  "count": 2
}
{
  "_id": 1932,
  "count": 2
}
{
  "_id": 1933,
  "count": 1
}
{
  "_id": 1938,
  "count": 3
}
{
  "_id": 1941,
  "count": 1
}
{
  "_id": 1942,
  "count": 1
}
{
  "_id": 1943,
  "count": 1
}
{
  "_id": 1944,
  "count": 2
}
{
  "_id": 1945,
  "count": 1
}
{
  "_id": 1946,
  "count": 1
}
{
  "_id": 1947,
  "count": 2
}
{
  "_id": 1948,
  "count": 1
}
{
  "_id": 1949,
  "count": 2
}
{
  "_id": 1952,
  "count": 1
}
{
  "_id": 1953,
  "count": 1
}
{
  "_id": 1954,
  "count": 2
}
{
  "_id": 1955,
  "count": 1
}
{
  "_id": 1956,
  "count": 1
}
{
  "_

In [11]:
customers_collection = db["customers"]

In [12]:
# Count how many customers by country
pipeline = [
    {"$group": {"_id": "$country", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
]

results = list(customers_collection.aggregate(pipeline))
print("Number of customers by country:")
for r in results:
    print_mongo(r)

Number of customers by country:
{
  "_id": "Solomon Islands",
  "count": 4
}
{
  "_id": "Canada",
  "count": 2
}
{
  "_id": "Oman",
  "count": 2
}
{
  "_id": "South Georgia and the South Sandwich Islands",
  "count": 2
}
{
  "_id": "Togo",
  "count": 2
}
{
  "_id": "Netherlands",
  "count": 2
}
{
  "_id": "Dominican Republic",
  "count": 2
}
{
  "_id": "Zimbabwe",
  "count": 2
}
{
  "_id": "Belarus",
  "count": 2
}
{
  "_id": "Bulgaria",
  "count": 2
}
{
  "_id": "Sri Lanka",
  "count": 2
}
{
  "_id": "Saint Vincent and the Grenadines",
  "count": 2
}
{
  "_id": "United Arab Emirates",
  "count": 2
}
{
  "_id": "Iran",
  "count": 1
}
{
  "_id": "Monaco",
  "count": 1
}
{
  "_id": "Burkina Faso",
  "count": 1
}
{
  "_id": "Morocco",
  "count": 1
}
{
  "_id": "Aruba",
  "count": 1
}
{
  "_id": "Timor-Leste",
  "count": 1
}
{
  "_id": "Saint Barthelemy",
  "count": 1
}
{
  "_id": "Uzbekistan",
  "count": 1
}
{
  "_id": "Guyana",
  "count": 1
}
{
  "_id": "Madagascar",
  "count": 1
}
{
  "

In [None]:
# Group by subscription year, then count how many customers subscribed each year
pipeline = [
    {
        "$group": {
            "_id": {"$year": "$subscriptionDate"},
            "count": {"$sum": 1}
        }
    },
    {"$sort": {"_id": 1}}
]

results = list(customers_collection.aggregate(pipeline))
print("\nCustomers grouped by subscription year:")
for r in results:
    print_mongo(r)