In [None]:

# Python code - adapted data collection & querying with MongoDB and UCI Student Performance data

# 1. Import necessary packages
import pandas as pd
from pymongo import MongoClient
import json
from datetime import datetime
import random
import time

# 2. Load UCI Student Performance data

# 2. Load UCI Student Performance data
url_math = "https://archive.ics.uci.edu/ml/machine-learning-databases/00320/student-mat.csv"
data = pd.read_csv(url_math, sep=';')

# Display first 5 rows
print(data.head())

# Explanation:
# The dataset has info about student grades and demographics.
# We'll use some columns and convert each row into a 'StudentPerformance' record.

# 3. Define a data class to hold student info (like Scala's case class)
class StudentPerformance:
    def __init__(self, school, sex, age, address, famsize, Medu, Fedu, G1, G2, G3, timestamp):
        self.school = school
        self.sex = sex
        self.age = age
        self.address = address
        self.famsize = famsize
        self.Medu = Medu  # Mother's education
        self.Fedu = Fedu  # Father's education
        self.G1 = G1      # First period grade
        self.G2 = G2      # Second period grade
        self.G3 = G3      # Final grade
        self.timestamp = timestamp  # We'll add a timestamp field similar to your Scala code

    def to_dict(self):
        # Convert to dict for JSON storage in MongoDB
        return self.__dict__

# 4. Connect to MongoDB (must be running locally or use a cloud service)
client = MongoClient("mongodb://localhost:27017/")
db = client["student_performance_db"]
collection = db["performance"]

# 5. Insert a single student record
row = data.iloc[0]
student = StudentPerformance(
    school=row["school"],
    sex=row["sex"],
    age=row["age"],
    address=row["address"],
    famsize=row["famsize"],
    Medu=row["Medu"],
    Fedu=row["Fedu"],
    G1=row["G1"],
    G2=row["G2"],
    G3=row["G3"],
    timestamp=int(time.time() * 1000)  # current timestamp in ms
)
result = collection.insert_one(student.to_dict())
print("Inserted single document with id:", result.inserted_id)

# 6. Bulk insert multiple random records with updated timestamps
records = []
for i in range(100):
    row = data.sample(n=1).iloc[0]
    sp = StudentPerformance(
        school=row["school"],
        sex=row["sex"],
        age=row["age"],
        address=row["address"],
        famsize=row["famsize"],
        Medu=row["Medu"],
        Fedu=row["Fedu"],
        G1=row["G1"],
        G2=row["G2"],
        G3=row["G3"],
        timestamp=int(time.time() * 1000) + i * 1000  # simulate different timestamps
    )
    records.append(sp.to_dict())

bulk_result = collection.insert_many(records)
print(f"Inserted {len(bulk_result.inserted_ids)} documents in bulk")

# 7. Query: Find all students with school 'GP' (like sensorId query in Scala)
query_school = "GP"
found_docs = collection.find({"school": query_school})

print(f"Students from school {query_school}:")
for doc in found_docs.limit(5):  # show first 5 results
    print(doc)

# 8. Query: Find last 10 records sorted by timestamp descending (like your by_timestamp view)
last_10 = list(collection.find().sort("timestamp", -1).limit(10))
print("Last 10 inserted student records by timestamp:")
for rec in last_10:
    print(rec)

# 9. Close the MongoDB client connection
client.close()
