In [2]:
!pip install faker pymongo pandas tqdm

Collecting faker
  Downloading faker-37.4.0-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
Collecting pandas
  Downloading pandas-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting tzdata
  Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Collecting pytz>=2020.1
  Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Installing collected packages: pytz, tzdata, pandas, faker
Successfully installed faker-37.4.0 pandas-2.3.0 pytz-2025.2 tzdata-2025.2


In [None]:
from faker import Faker
import pandas as pd
import random
from tqdm import tqdm
from pymongo import MongoClient
from datetime import datetime, time,timedelta
from dotenv import load_dotenv

load_dotenv()

import uuid
import os

fake = Faker()
uri = os.getenv("MONGODB_URI")
client = MongoClient(uri)
db = client['qest_db']

NUM_CLIENTS = 500
NUM_ORDERS = 1000
NUM_PAYMENTS = 1200
NUM_COURSES = 20
NUM_CLASSES = 100

In [52]:
from datetime import datetime, time, timedelta
from faker import Faker
from pymongo import MongoClient
from uuid import uuid4
import random

collections = ['clients', 'courses', 'classes', 'orders', 'payments', 'attendance']
for coll in collections:
    db[coll].drop()

# Helper: date at start of day
def dt(d):
    return datetime.combine(d, time.min)

# ↪ 1️⃣ Clients
NUM_CLIENTS = 200
clients = [{
    '_id': str(uuid4()),
    'name': fake.name(),
    'email': fake.unique.email(),
    'phone': fake.phone_number(),
    'created_at': dt(fake.date_between('-2y','today')),
    'last_active': dt(fake.date_between('-90d','today'))
} for _ in range(NUM_CLIENTS)]
db.clients.insert_many(clients)
print("Inserted clients:", len(clients))

# ↪ 2️⃣ Courses
field_pool = [
    "Yoga",
    "Pilates",
    "Aerial Yoga",
    "Tai Chi",
    "Karate",
    "Kickboxing",
    "Zumba",
    "Barre Fitness",
    "HIIT",
    "Circuit Training",
    "Strength Training",
    "CrossFit"
]

courses = [{
    '_id': str(uuid4()),
    'title': title,
    'instructor': fake.name(),
    'description': fake.text(max_nb_chars=150),
    'status': random.choice(['upcoming', 'ongoing', 'completed'])
} for title in field_pool]

db.courses.insert_many(courses)
print("Inserted courses:", len(courses))

# ↪ 3️⃣ Classes
NUM_CLASSES = 60
classes = []
for _ in range(NUM_CLASSES):
    c = random.choice(courses)
    sd = fake.date_between('-90d','+90d')
    ed = sd + timedelta(days=random.randint(5,30))
    classes.append({
        '_id': str(uuid4()),
        'course_id': c['_id'],
        'instructor': c['instructor'],
        'start_date': dt(sd),
        'end_date': dt(ed),
        'capacity': random.randint(5,40),
        'status': random.choice(['scheduled','in_progress','finished'])
    })
db.classes.insert_many(classes)
print("Inserted classes:", len(classes))

# ↪ 4️⃣ Orders
NUM_ORDERS = 500
orders = []
for _ in range(NUM_ORDERS):
    cust = random.choice(clients)
    svc = random.choice(courses + classes)
    st = 'course' if 'title' in svc else 'class'
    cd = fake.date_between('-180d','today')
    amt = round(random.uniform(50,500),2)
    stt = random.choice(['paid','pending'])
    orders.append({
        '_id': str(uuid4()),
        'client_id': cust['_id'],
        'service_id': svc['_id'],
        'service_type': st,
        'amount': amt,
        'status': stt,
        'created_at': dt(cd)
    })
db.orders.insert_many(orders)
print("Inserted orders:", len(orders))

# ↪ 5️⃣ Payments
NUM_PAYMENTS = 400
payments = []
p_orders = [o for o in orders if o['status'] in ('paid','pending')]
for _ in range(min(NUM_PAYMENTS, len(p_orders))):
    o = random.choice(p_orders)
    pd = fake.date_between(start_date=o['created_at'], end_date='today')
    amt = o['amount'] if o['status']=='paid' else round(random.uniform(0,o['amount']),2)
    payments.append({
        '_id': str(uuid4()),
        'order_id': o['_id'],
        'amount': amt,
        'method': random.choice(['card','cash','bank']),
        'paid_at': dt(pd)
    })
db.payments.insert_many(payments)
print("Inserted payments:", len(payments))

# ↪ 6️⃣ Attendance (new)
attendance = []
today = datetime.today().date()

for cl in classes:
    # Ensure valid class date range
    cls_start = cl['start_date'].date()
    cls_end = min(cl['end_date'].date(), today)
    if cls_start >= cls_end:
        continue  # skip classes with no valid attendance window

    enrolled_clients = random.sample(clients, k=random.randint(5, min(len(clients), cl['capacity'])))

    for cust in enrolled_clients:
        # Choose number of attendance entries per class
        sessions = random.randint(3, max(3, min(cl['capacity'], (cls_end - cls_start).days)))
        for _ in range(sessions):
            date = fake.date_between(start_date=cls_start, end_date=cls_end)
            attendance.append({
                '_id': str(uuid4()),
                'client_id': cust['_id'],
                'class_id': cl['_id'],
                'date': dt(date),
                'present': random.random() < 0.8
            })

db.attendance.insert_many(attendance)
print("Inserted attendance records:", len(attendance))

Inserted clients: 200
Inserted courses: 12
Inserted classes: 60
Inserted orders: 500
Inserted payments: 400
Inserted attendance records: 2550
