In [None]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

import pandas as pd
from pymongo import MongoClient
import json
import time
import warnings
import threading

warnings.filterwarnings('ignore')

def print_mongo(obj):
    """Pretty print MongoDB output"""
    print(json.dumps(obj, indent=2, default=str))

def get_mongo_router_client(max_retries=5, retry_delay=5):
    """Connect to MongoDB via the router with retry logic"""
    for attempt in range(max_retries):
        try:
            # Connect to the mongos router on 'router1:27017'
            client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
            client.admin.command('ping')
            print("Successfully connected to MongoDB (via router1)")
            return client
        except Exception as e:
            print(f"Connection attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise

def get_shard_primary_client(shard_primary_host='shard1-1:27017'):
    """
    Connects directly to the shard's primary node (or your best guess),
    so we can run 'rs.stepDown()' and forcibly trigger a failover.
    
    NOTE: Adjust 'shard_primary_host' to match your actual primary node.
    """
    # If the shard requires authentication, specify the same admin credentials, 
    # but point to the shard node directly:
    uri = f'mongodb://admin:admin@{shard_primary_host}/admin?authSource=admin'
    shard_client = MongoClient(uri)
    # Check connection
    shard_client.admin.command('ping')
    print(f"Successfully connected to shard primary: {shard_primary_host}")
    return shard_client

In [None]:
router_client = get_mongo_router_client()
db = router_client.businessdb

In [None]:
# Verify shards:
try:
    shards_info = router_client.admin.command("listShards")
    print("\nSharded cluster details (listShards):")
    print_mongo(shards_info)
except Exception as e:
    print(f"Could not list shards: {e}")

In [None]:
organizations = db["organizations"]

In [None]:
keep_running = True

def continuous_operations():
    i = 0
    while keep_running:
        try:
            doc_id = f"failover-demo-{i}"
            doc = {
                "organizationId": doc_id,
                "name": f"Testing org {i}",
                "industry": "FailoverTest",
                "founded": 2024,
                "numberOfEmployees": i * 10
            }
            # Insert
            organizations.insert_one(doc)
            # Read
            found_doc = organizations.find_one({"organizationId": doc_id})
            if found_doc:
                print(f"[Worker] Iter {i}: Inserted+found doc {doc_id}")
            else:
                print(f"[Worker] Iter {i}: Could NOT find doc {doc_id}")
            i += 1
        except Exception as e:
            # If the shard's primary goes down, we might see a brief exception
            print(f"[Worker] Error on iteration {i}: {e}")
        time.sleep(2)

worker_thread = threading.Thread(target=continuous_operations, daemon=True)
worker_thread.start()

time.sleep(6)
print("\nNow we'll trigger a failover on shard1 by stepping down its primary...")

In [None]:
try:
    shard_primary_client = get_shard_primary_client('shard1-1:27017')  # or whichever node is currently primary
    # The 'replSetStepDown' command steps down the current primary for N seconds
    # forcing a new election in that replica set
    stepdown_result = shard_primary_client.admin.command({
        "replSetStepDown": 30,  # step down for 30s
        "force": True
    })
    print("\nStep down result:")
    print_mongo(stepdown_result)

except Exception as e:
    # Typically, the step-down command might throw an exception even when successful, 
    # because the primary is no longer primary right after step-down
    print(f"Caught exception while stepping down primary (often normal): {e}")

# We wait a bit to let the replica set elect a new primary
print("\nWaiting 10 seconds for the replica set to elect a new primary...")
time.sleep(10)

In [None]:
time.sleep(10)

# Stop the worker thread
keep_running = False
worker_thread.join()