In [1]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

import pandas as pd
from pymongo import MongoClient
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

def print_mongo(obj):
    """Pretty print MongoDB output"""
    print(json.dumps(obj, indent=2, default=str))

def get_mongo_client(max_retries=5, retry_delay=5):
    """Connect to MongoDB with retry logic"""
    for attempt in range(max_retries):
        try:
            client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
            client.admin.command('ping')
            print("Successfully connected to MongoDB")
            return client
        except Exception as e:
            print(f"Connection attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise

client = get_mongo_client()
db = client.businessdb

Successfully connected to MongoDB


In [2]:
# Check server status
try:
    server_status = client.admin.command("serverStatus")
    print("\nServer status (mongos):")
    print_mongo(server_status)
except Exception as e:
    print(f"Error retrieving server status: {e}")


Server status (mongos):
{
  "host": "router1",
  "version": "6.0.19",
  "process": "mongos",
  "pid": 1,
  "uptime": 4260.0,
  "uptimeMillis": 4259827,
  "uptimeEstimate": 4259,
  "localTime": "2024-12-30 21:29:37.302000",
  "asserts": {
    "regular": 0,
    "msg": 0,
    "user": 55886,
    "tripwire": 0,
    "rollovers": 0
  },
  "connections": {
    "current": 84,
    "available": 838776,
    "totalCreated": 15618,
    "active": 26,
    "threaded": 84,
    "exhaustIsMaster": 0,
    "exhaustHello": 17,
    "awaitingTopologyChanges": 25,
    "loadBalanced": 0
  },
  "defaultRWConcern": {
    "defaultReadConcern": {
      "level": "local"
    },
    "defaultWriteConcern": {
      "w": "majority",
      "wtimeout": 0
    },
    "defaultWriteConcernSource": "implicit",
    "defaultReadConcernSource": "implicit",
    "localUpdateWallClockTime": "2024-12-30 21:29:32.385000"
  },
  "extra_info": {
    "note": "fields vary by platform",
    "user_time_us": 56153193,
    "system_time_us": 37

In [3]:
# Build info - the MongoDB version, git commit, and other build details
build_info = client.admin.command("buildInfo")
print("\nBuild Info:")
print_mongo(build_info)


Build Info:
{
  "version": "6.0.19",
  "gitVersion": "a7ada5ff3a4d8a1e2ed88f86bd6b3d1d16cb43c6",
  "modules": [],
  "allocator": "tcmalloc",
  "javascriptEngine": "mozjs",
  "sysInfo": "deprecated",
  "versionArray": [
    6,
    0,
    19,
    0
  ],
  "openssl": {
    "running": "OpenSSL 3.0.2 15 Mar 2022",
    "compiled": "OpenSSL 3.0.2 15 Mar 2022"
  },
  "buildEnvironment": {
    "distmod": "ubuntu2204",
    "distarch": "x86_64",
    "cc": "/opt/mongodbtoolchain/v3/bin/gcc: gcc (GCC) 8.5.0",
    "ccflags": "-Werror -include mongo/platform/basic.h -ffp-contract=off -fasynchronous-unwind-tables -ggdb -Wall -Wsign-compare -Wno-unknown-pragmas -Winvalid-pch -fno-omit-frame-pointer -fno-strict-aliasing -O2 -march=sandybridge -mtune=generic -mprefer-vector-width=128 -Wno-unused-local-typedefs -Wno-unused-function -Wno-deprecated-declarations -Wno-unused-const-variable -Wno-unused-but-set-variable -Wno-missing-braces -fstack-protector-strong -fdebug-types-section -Wa,--nocompress-debug-

In [None]:
# Host info - details about the underlying host/VM/hardware where MongoDB runs (CPU arch, operating system, etc.)
host_info = client.admin.command("hostInfo")
print("\nHost Info:")
print_mongo(host_info)

In [None]:
# View Databases
databases = client.list_database_names()
print("\nDatabases accessible via this router:")
print_mongo(databases)

In [None]:
# List Collections in businessdb
collections = db.list_collection_names()
print("\nCollections in 'businessdb':")
print_mongo(collections)

In [8]:
# List all shards
list_shards = client.admin.command("listShards")
print("Current shards in the cluster:")
print_mongo(list_shards)

Current shards in the cluster:
{
  "shards": [
    {
      "_id": "shard1rs",
      "host": "shard1rs/shard1-1:27017,shard1-2:27017,shard1-3:27017",
      "state": 1,
      "topologyTime": "Timestamp(1735589939, 4)"
    },
    {
      "_id": "shard2rs",
      "host": "shard2rs/shard2-1:27017,shard2-2:27017,shard2-3:27017",
      "state": 1,
      "topologyTime": "Timestamp(1735589939, 10)"
    },
    {
      "_id": "shard3rs",
      "host": "shard3rs/shard3-1:27017,shard3-2:27017,shard3-3:27017",
      "state": 1,
      "topologyTime": "Timestamp(1735589940, 1)"
    }
  ],
  "ok": 1.0,
  "$clusterTime": {
    "clusterTime": "Timestamp(1735594680, 48)",
    "signature": {
      "hash": "b'\\xe2\\x9b\\x808\\xaeY\\xe2\\xca]\\x06,\\xca\\xf8i0\\x8d|\\xaddJ'",
      "keyId": 7454301898422616089
    }
  },
  "operationTime": "Timestamp(1735594680, 48)"
}


In [10]:
# Chunk distribution - see how many chunks each shard currently holds across all sharded collections
pipeline = [
    {"$group": {"_id": "$shard", "numChunks": {"$sum": 1}}},
    {"$sort": {"numChunks": -1}}
]
chunk_distribution = list(client.config.chunks.aggregate(pipeline))
print("\nChunk distribution across shards:")
for shard_info in chunk_distribution:
    print_mongo(shard_info)



Chunk distribution across shards:
{
  "_id": "shard1rs",
  "numChunks": 1027
}
{
  "_id": "shard2rs",
  "numChunks": 3
}
{
  "_id": "shard3rs",
  "numChunks": 3
}


In [11]:
# Chunk balancer settings
balancer_settings = client.config.settings.find_one({"_id": "balancer"})
print("\nBalancer settings:")
print_mongo(balancer_settings)


Balancer settings:
{
  "_id": "balancer",
  "mode": "full",
  "stopped": false
}


In [None]:
# Find all collections inside businessdb and print their JSON Schema

collections_info = db.command("listCollections", filter={"type": "collection"})

print("Schema (validation rules) for all collections in 'businessdb':\n")
for coll_def in collections_info["cursor"]["firstBatch"]:
    coll_name = coll_def["name"]
    options = coll_def.get("options", {})

    # If there's a validator, it shows up under options.validator
    validator = options.get("validator")
    
    print(f"Collection: {coll_name}")
    if validator:
        print("Validator (JSON Schema):")
        print_mongo(validator)
    else:
        print("No validator or JSON Schema found.")
    print("-" * 50)
