In [None]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

import pandas as pd
from pymongo import MongoClient
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

def print_mongo(obj):
    """Pretty print MongoDB output"""
    print(json.dumps(obj, indent=2, default=str))

def get_mongo_client(max_retries=5, retry_delay=5):
    """Connect to MongoDB with retry logic"""
    for attempt in range(max_retries):
        try:
            client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
            client.admin.command('ping')
            print("Successfully connected to MongoDB")
            return client
        except Exception as e:
            print(f"Connection attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise

client = get_mongo_client()
db = client.businessdb

In [None]:
# Check server status
try:
    server_status = client.admin.command("serverStatus")
    print("\nServer status (mongos):")
    print_mongo(server_status)
except Exception as e:
    print(f"Error retrieving server status: {e}")

In [None]:
# Build info - the MongoDB version, git commit, and other build details
build_info = client.admin.command("buildInfo")
print("\nBuild Info:")
print_mongo(build_info)

In [None]:
# Host info - details about the underlying host/VM/hardware where MongoDB runs (CPU arch, operating system, etc.)
host_info = client.admin.command("hostInfo")
print("\nHost Info:")
print_mongo(host_info)

In [None]:
# View Databases
databases = client.list_database_names()
print("\nDatabases accessible via this router:")
print_mongo(databases)

In [None]:
# List Collections in businessdb
collections = db.list_collection_names()
print("\nCollections in 'businessdb':")
print_mongo(collections)

In [None]:
# List all shards
list_shards = client.admin.command("listShards")
print("Current shards in the cluster:")
print_mongo(list_shards)

In [None]:
# Example: connect to a specific shard's primary host
shard_primary_client = MongoClient("mongodb://shardA-primary:27017")
shard_primary_admin = shard_primary_client.admin

try:
    repl_status = shard_primary_admin.command("replSetGetStatus")
    print("\nReplica Set Status on shardA-primary:")
    print_mongo(repl_status)
except Exception as e:
    print(f"Error retrieving replSetGetStatus: {e}")

In [None]:
# Check which collections in the entire cluster are sharded, and see the shard key info
sharded_collections = list(client.config.collections.find({"dropped": False}))
print("\nSharded collections metadata (from config.collections):")
for coll in sharded_collections:
    print_mongo(coll)

In [None]:
# Chunk distribution - see how many chunks each shard currently holds across all sharded collections
pipeline = [
    {"$group": {"_id": "$shard", "numChunks": {"$sum": 1}}},
    {"$sort": {"numChunks": -1}}
]
chunk_distribution = list(client.config.chunks.aggregate(pipeline))
print("\nChunk distribution across shards:")
for shard_info in chunk_distribution:
    print_mongo(shard_info)


In [None]:
# Chunk balancer settings
balancer_settings = client.config.settings.find_one({"_id": "balancer"})
print("\nBalancer settings:")
print_mongo(balancer_settings)