# Load data into Cosmos DB using the MongoDB API

This notebook demonstrates how to load data into Cosmos DB from Cosmic Works JSON files into the database using **Pymongo** and the MongoDB API.

In [2]:
import os
import json
import pymongo
from pymongo import UpdateOne, DeleteMany
from models import CalendarCourse, Degree, Course, Department, User
from dotenv import load_dotenv
load_dotenv()

True

### Load data

In [3]:
with open('../data/courses.json') as f:
    courses = json.load(f)
with open('../data/programs.json') as f:
    degrees = json.load(f)
with open('../data/departments.json') as f:
    departments = json.load(f)
with open('../data/sections.json') as f:
    sections = json.load(f)
with open('../data/users.json', 'r') as f:
    users = json.load(f)

### Connect to cosmos database

In [4]:
CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")

db_client = pymongo.MongoClient(CONNECTION_STRING)
db = db_client["db"]

cal_course_coll = db["calendar_courses"]
course_coll = db["courses"]
degree_coll = db["degrees"]
department_coll = db["departments"]
users_coll = db["users"]
user_courses_coll = db["user_courses"]

  db_client = pymongo.MongoClient(CONNECTION_STRING)


### Clear collections

In [5]:
course_coll.bulk_write([DeleteMany({})])
degree_coll.bulk_write([DeleteMany({})])
department_coll.bulk_write([DeleteMany({})])
users_coll.bulk_write([DeleteMany({})])
user_courses_coll.bulk_write([DeleteMany({})])
print("Database cleared")

Database cleared


### Insert users, departments, calendar courses, degrees, and 2024/2025 courses and sections

Might look like alot, but it's just repeated code

In [None]:
# User
users = [User(**data) for data in users]
users_coll.bulk_write([UpdateOne({"_id": obj.id}, {"$set": obj.model_dump(by_alias=True)}, upsert=True) 
                        for obj in users])

# Department
departments = [Department(**data) for data in departments.values()]
department_coll.bulk_write([UpdateOne({"_id": obj.code}, {"$set": obj.model_dump(by_alias=True)}, upsert=True) 
                        for obj in departments])

# CalendarCourse
calendar_course = [CalendarCourse(**data) for data in courses.values() if 'embedding' in data]
cal_course_coll.bulk_write([UpdateOne({"_id": obj.code}, {"$set": obj.model_dump(by_alias=True)}, upsert=True) 
                        for obj in calendar_course])
cal_course_coll.create_index([("name", pymongo.ASCENDING)], unique=False)

# Degree
degrees = [Degree(**data) for data in degrees.values()]
degree_coll.bulk_write([UpdateOne({"_id": obj.code}, {"$set": obj.model_dump(by_alias=True)}, upsert=True) 
                        for obj in degrees])

# Course
courses_sections = [Course(**data) for data in sorted(sections.values(), key=lambda x: x['code'])]
course_coll.bulk_write([UpdateOne({"_id": obj.code}, {"$set": obj.model_dump(by_alias=True)}, upsert=True) 
                        for obj in courses_sections])

In [8]:
db_client.close()