In [None]:
!pip install pymongo

In [None]:
from pymongo import MongoClient
from datetime import datetime, timedelta
import random

In [None]:
def generate_patient_data(num_records, start_time, patient_id_start, patient_id_end):
    data = []
    for i in range(num_records):
        record_time = start_time + timedelta(seconds=i)
        record_date = record_time.replace(hour=0, minute=0, second=0, microsecond=0)  # Convert date to datetime
        record = {
            "patient_id": f"patient_{random.randint(patient_id_start, patient_id_end)}",
            "time": record_time,  # Keep the timestamp
            "date": record_date,  # Store the 'date' as a datetime object with zeroed time
            "heart_rate": random.randint(60, 100),
            "blood_pressure": {
                "systolic": random.randint(110, 140),
                "diastolic": random.randint(70, 90)
            },
            "temperature": round(random.uniform(36.0, 37.5), 1)
        }
        data.append(record)
    return data

In [None]:
def insert_data_in_batches(collection, data, batch_size=5000):
    for i in range(0, len(data), batch_size):
        batch = data[i:i + batch_size]
        collection.insert_many(batch)
        print(f"Inserted batch {i // batch_size + 1} with {len(batch)} records.")

In [None]:
num_days = 10
patient_id_start=30
patient_id_end=50

db_name = "healthcare"
collection_name = "patient_data"
connection_string = "mongodb://<Replace ROUTER IP HERE>:27017/"

In [None]:
num_records = num_days * 80000
start_time = datetime.now() - timedelta(days=num_days)
patient_data = generate_patient_data(num_records, start_time, patient_id_start, patient_id_end)

client = MongoClient(connection_string)
admin_db = client.admin
config_db = client.config.settings
config_db.update_one({"_id": "chunksize"}, {"$set": {"value": 2}}, upsert=True)
db = client[db_name]
collection = db[collection_name]

In [None]:
client.admin.command('enableSharding', db_name)
client.admin.command('shardCollection', f'{db_name}.{collection_name}', key={"patient_id": "hashed"})

{'collectionsharded': 'healthcare.patient_data',
 'ok': 1.0,
 '$clusterTime': {'clusterTime': Timestamp(1733177666, 38),
  'signature': {'hash': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
   'keyId': 0}},
 'operationTime': Timestamp(1733177666, 34)}

In [None]:
insert_data_in_batches(collection, patient_data, batch_size=5000)