In [1]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

import pandas as pd
from pymongo import MongoClient
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

def print_mongo(obj):
    """Pretty print MongoDB output"""
    print(json.dumps(obj, indent=2, default=str))

def get_mongo_client(max_retries=5, retry_delay=5):
    """Connect to MongoDB with retry logic"""
    for attempt in range(max_retries):
        try:
            client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
            client.admin.command('ping')
            print("Successfully connected to MongoDB")
            return client
        except Exception as e:
            print(f"Connection attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise

client = get_mongo_client()
db = client.businessdb

Successfully connected to MongoDB


In [2]:
# Check server status
try:
    server_status = client.admin.command("serverStatus")
    print("\nServer status (mongos):")
    print_mongo(server_status)
except Exception as e:
    print(f"Error retrieving server status: {e}")


Server status (mongos):
{
  "host": "router1",
  "version": "6.0.20",
  "process": "mongos",
  "pid": 1,
  "uptime": 66.0,
  "uptimeMillis": 65628,
  "uptimeEstimate": 65,
  "localTime": "2025-01-18 19:25:26.335000",
  "asserts": {
    "regular": 0,
    "msg": 0,
    "user": 114,
    "tripwire": 0,
    "rollovers": 0
  },
  "connections": {
    "current": 11,
    "available": 838849,
    "totalCreated": 39,
    "active": 5,
    "threaded": 11,
    "exhaustIsMaster": 0,
    "exhaustHello": 2,
    "awaitingTopologyChanges": 4,
    "loadBalanced": 0
  },
  "defaultRWConcern": {
    "defaultReadConcern": {
      "level": "local"
    },
    "defaultWriteConcern": {
      "w": "majority",
      "wtimeout": 0
    },
    "defaultWriteConcernSource": "implicit",
    "defaultReadConcernSource": "implicit",
    "localUpdateWallClockTime": "2025-01-18 19:25:17.556000"
  },
  "extra_info": {
    "note": "fields vary by platform",
    "user_time_us": 617968,
    "system_time_us": 269115,
    "maxim

In [3]:
# Build info - the MongoDB version, git commit, and other build details
build_info = client.admin.command("buildInfo")
print("\nBuild Info:")
print_mongo(build_info)


Build Info:
{
  "version": "6.0.20",
  "gitVersion": "ef4efd469fb0c5626c629c4f0f21398c9b5dd08f",
  "modules": [],
  "allocator": "tcmalloc",
  "javascriptEngine": "mozjs",
  "sysInfo": "deprecated",
  "versionArray": [
    6,
    0,
    20,
    0
  ],
  "openssl": {
    "running": "OpenSSL 3.0.2 15 Mar 2022",
    "compiled": "OpenSSL 3.0.2 15 Mar 2022"
  },
  "buildEnvironment": {
    "distmod": "ubuntu2204",
    "distarch": "x86_64",
    "cc": "/opt/mongodbtoolchain/v3/bin/gcc: gcc (GCC) 8.5.0",
    "ccflags": "-Werror -include mongo/platform/basic.h -ffp-contract=off -fasynchronous-unwind-tables -ggdb -Wall -Wsign-compare -Wno-unknown-pragmas -Winvalid-pch -fno-omit-frame-pointer -fno-strict-aliasing -O2 -march=sandybridge -mtune=generic -mprefer-vector-width=128 -Wno-unused-local-typedefs -Wno-unused-function -Wno-deprecated-declarations -Wno-unused-const-variable -Wno-unused-but-set-variable -Wno-missing-braces -fstack-protector-strong -fdebug-types-section -Wa,--nocompress-debug-

In [4]:
# Host info - details about the underlying host/VM/hardware where MongoDB runs (CPU arch, operating system, etc.)
host_info = client.admin.command("hostInfo")
print("\nHost Info:")
print_mongo(host_info)


Host Info:
{
  "system": {
    "currentTime": "2025-01-18 19:25:33.506000",
    "hostname": "router1",
    "cpuAddrSize": 64,
    "memSizeMB": 12874,
    "memLimitMB": 12874,
    "numCores": 16,
    "numPhysicalCores": 8,
    "numCpuSockets": 1,
    "cpuArch": "x86_64",
    "numaEnabled": false,
    "numNumaNodes": 1
  },
  "os": {
    "type": "Linux",
    "name": "Ubuntu",
    "version": "22.04"
  },
  "extra": {
    "extra": {
      "versionString": "Linux version 5.15.167.4-microsoft-standard-WSL2 (root@f9c826d3017f) (gcc (GCC) 11.2.0, GNU ld (GNU Binutils) 2.37) #1 SMP Tue Nov 5 00:21:55 UTC 2024",
      "libcVersion": "2.35",
      "kernelVersion": "5.15.167.4-microsoft-standard-WSL2",
      "cpuString": "AMD Ryzen 7 8840U w/ Radeon 780M Graphics",
      "cpuFrequencyMHz": "3293.607",
      "cpuFeatures": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl tsc

In [5]:
# View Databases
databases = client.list_database_names()
print("\nDatabases accessible via this router:")
print_mongo(databases)


Databases accessible via this router:
[
  "admin",
  "businessdb",
  "config"
]


In [6]:
# List Collections in businessdb
collections = db.list_collection_names()
print("\nCollections in 'businessdb':")
print_mongo(collections)


Collections in 'businessdb':
[
  "people",
  "organizations",
  "customers"
]


In [7]:
# List all shards
list_shards = client.admin.command("listShards")
print("Current shards in the cluster:")
print_mongo(list_shards)

Current shards in the cluster:
{
  "shards": [
    {
      "_id": "shard1rs",
      "host": "shard1rs/shard1-1:27017,shard1-2:27017,shard1-3:27017",
      "state": 1,
      "topologyTime": "Timestamp(1737228144, 3)"
    },
    {
      "_id": "shard2rs",
      "host": "shard2rs/shard2-1:27017,shard2-2:27017,shard2-3:27017",
      "state": 1,
      "topologyTime": "Timestamp(1737228145, 1)"
    },
    {
      "_id": "shard3rs",
      "host": "shard3rs/shard3-1:27017,shard3-2:27017,shard3-3:27017",
      "state": 1,
      "topologyTime": "Timestamp(1737228145, 7)"
    }
  ],
  "ok": 1.0,
  "$clusterTime": {
    "clusterTime": "Timestamp(1737228337, 2)",
    "signature": {
      "hash": "b'\\xe3\\xafL\\xb7\\x7f\\x8f\\x13\\x89\\xa0k\\x13\\x9e\\x12QU|j\\xd9\\xd5\\x05'",
      "keyId": 7461337961091563545
    }
  },
  "operationTime": "Timestamp(1737228337, 2)"
}


In [8]:
# Chunk distribution - see how many chunks each shard currently holds across all sharded collections
pipeline = [
    {"$group": {"_id": "$shard", "numChunks": {"$sum": 1}}},
    {"$sort": {"numChunks": -1}}
]
chunk_distribution = list(client.config.chunks.aggregate(pipeline))
print("\nChunk distribution across shards:")
for shard_info in chunk_distribution:
    print_mongo(shard_info)



Chunk distribution across shards:
{
  "_id": "shard1rs",
  "numChunks": 3
}
{
  "_id": "shard2rs",
  "numChunks": 3
}
{
  "_id": "shard3rs",
  "numChunks": 3
}


In [None]:
# Chunk balancer settings
balancer_settings = client.config.settings.find_one({"_id": "balancer"})
print("\nBalancer settings:")
print_mongo(balancer_settings)

In [None]:
# Find all collections inside businessdb and print their JSON Schema

collections_info = db.command("listCollections", filter={"type": "collection"})

print("Schema (validation rules) for all collections in 'businessdb':\n")
for coll_def in collections_info["cursor"]["firstBatch"]:
    coll_name = coll_def["name"]
    options = coll_def.get("options", {})

    # If there's a validator, it shows up under options.validator
    validator = options.get("validator")
    
    print(f"Collection: {coll_name}")
    if validator:
        print("Validator (JSON Schema):")
        print_mongo(validator)
    else:
        print("No validator or JSON Schema found.")
    print("-" * 50)
