# MongoDB

### Setting things up

Remember to restart the kernel after running this code cell if you didn't already have pymongo installed (you only need to do this once)

In [27]:
pip install pymongo

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pymongo
from pymongo import MongoClient
import pandas as pd

In [4]:
client = MongoClient('localhost', 27017) 

In [5]:
client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [6]:
db = client.lms # "example" is the database name

In [7]:
collection = db.students ## collection within database

### Inserting/deleting data

MongoDB is made up of, in a simplified way, massive JSON objects

### Querying data

## courses

Let's explore some example data that might be similar to the type you are creating in your projects

In [8]:
courses = pd.read_csv('https://raw.githubusercontent.com/methinky/forge_project2/main/test_forge%20-%20Courses.csv')
courses

Unnamed: 0,Course_Name,Code,Description,Instructor
0,COMM_2020,12736,Introduction to Management Accounting,Roger Martin
1,STAT_1601,12564,Introduction to Data Science with R,Prince Afriyie
2,STAT_1601,12645,Introduction to Data Science with R,Richard Ross
3,STAT_1601,21031,Introduction to Data Science with R,Prince Afriyie
4,COMM_2020,14295,Introduction to Management Accounting,Roger Martin
5,COMM_3410,14730,Commercial Law I,Sherri Moore
6,COMM_3410,14731,Commercial Law I,Sherri Moore
7,SLAV_2360,11386,Dracula,Stanley Stephanic
8,APMA_3110,15413,Applied Statistics and Probability,Gary Koenig
9,MATH_3350,15762,Applied Linear Algebra,Jeffrey Holt


We can easily convert flat data to dictionary format with `to_dict()`

We can then insert it into our MongoDB

## Students

In [9]:
students = pd.read_csv("https://raw.githubusercontent.com/methinky/forge_project2/main/test_forge%20-%20Students.csv")
students

Unnamed: 0,Student_ID,First_Name,Last_Name,Email
0,rqb6bc,Melissa,Zheng,rqb6bc@virginia.edu
1,pfg9yq,Anya,Hariharan,pfg9yq@virginia.edu
2,zds3st,Isaac,Tabor,zds3st@virginia.edu
3,jrp86y,Peter,Park,jrp86y@virginia.edu
4,mcf5yc,Aryan,Prabhudesai,mcf5yc@virginia.edu
5,ppz2ry,Alex,Jones,ppz2ry@virginia.edu
6,rin7tx,Cheryl,Abernathy,rin7tx@virginia.edu
7,vip4my,Colleen,Lindstrom,vip4my@virginia.edu
8,eru9fh,Robin,Park,eru9fh@virginia.edu
9,fmp6yj,Laura,Peters,fmp6yj@virginia.edu


## StudentClasses

In [10]:
StudentClasses2 = pd.read_csv("https://raw.githubusercontent.com/methinky/forge_project2/main/test_forge%20-%20StudentClasses%20(2).csv")
StudentClasses2

Unnamed: 0,Code,Student_ID
0,12736,rqb6bc
1,12736,svw7gw
2,12736,zds3st
3,12736,fmp6yj
4,12736,clw3dh
5,12564,ppz2ry
6,12564,pfg9yq
7,12564,zds3st
8,12564,jrp86y
9,12564,mcf5yc


In [11]:
Assignments = pd.read_csv("https://raw.githubusercontent.com/methinky/forge_project2/main/test_forge%20-%20Assignments%20(1).csv")
Assignments

Unnamed: 0,Assignment_Id,Code,Assignment_Title,Weight,Date_Due
0,12736E1,12736,Exam 1,0.1,02/29/24
1,12736E2,12736,Exam 2,0.1,03/21/24
2,12736M,12736,Midterm,0.2,04/14/24
3,12736F,12736,Final,0.6,05/04/24
4,12564E1,12564,Exam 1,0.1,02/20/24
5,12564E2,12564,Exam 2,0.1,03/28/24
6,12564M,12564,Midterm,0.2,04/11/24
7,12564F,12564,Final,0.6,05/07/24
8,12645E1,12645,Exam 1,0.1,05/08/24
9,12645E2,12645,Exam 2,0.1,03/15/24


In [12]:
Grades = pd.read_csv("https://raw.githubusercontent.com/methinky/forge_project2/main/test_forge%20-%20Grades%20(1).csv")
Grades

Unnamed: 0,Student_ID,Assignment_Id,Score,Date_Submitted
0,rqb6bc,12736E1,78,02/29/24
1,rqb6bc,12736E2,92,03/21/24
2,rqb6bc,12736M,85,04/14/24
3,rqb6bc,12736F,64,05/04/24
4,svw7gw,12736E1,47,02/29/24
...,...,...,...,...
235,wdk0je,15764F,100,02/03/24
236,clw3dh,15764E1,93,03/24/24
237,clw3dh,15764E2,100,05/24/24
238,clw3dh,15764M,88,04/22/24


Just like how we create data with dictionaries, we query data the same way

In [13]:
# Merge student_courses with students to get student names
student_courses_merged = pd.merge(StudentClasses2, students, on='Student_ID')

# Merge the result with courses to get course names
full_merge = pd.merge(student_courses_merged, courses, left_on='Code', right_on='Code')

# Merge assignments with courses to get course names
assignments_merged = pd.merge(Assignments, courses, on='Code')

# Merge grades with assignments to get assignment names
grades_merged = pd.merge(Grades, assignments_merged, on='Assignment_Id')

# Merge grades with students to get student names
grades_merged = pd.merge(grades_merged, students, on='Student_ID')


In [14]:
assignments_merged.head()

Unnamed: 0,Assignment_Id,Code,Assignment_Title,Weight,Date_Due,Course_Name,Description,Instructor
0,12736E1,12736,Exam 1,0.1,02/29/24,COMM_2020,Introduction to Management Accounting,Roger Martin
1,12736E2,12736,Exam 2,0.1,03/21/24,COMM_2020,Introduction to Management Accounting,Roger Martin
2,12736M,12736,Midterm,0.2,04/14/24,COMM_2020,Introduction to Management Accounting,Roger Martin
3,12736F,12736,Final,0.6,05/04/24,COMM_2020,Introduction to Management Accounting,Roger Martin
4,12564E1,12564,Exam 1,0.1,02/20/24,STAT_1601,Introduction to Data Science with R,Prince Afriyie


In [15]:
nested_data = []

for course_code, group in full_merge.groupby('Code'):
    course_name = group['Course_Name'].values[0]
    students = group[['Student_ID', 'First_Name','Last_Name','Email']].drop_duplicates().to_dict('records')
    
    # Get assignments for the course
    course_assignments = assignments_merged[assignments_merged['Code'] == course_code]
    assignments_list = []
    
    for _, assignment in course_assignments.iterrows():
        assignment_id = assignment['Assignment_Id']
        assignment_name = assignment['Assignment_Title']
        assignment_weight = assignment['Weight']
        
        # Get grades for the assignment
        assignment_grades = grades_merged[grades_merged['Assignment_Id'] == assignment_id]
        grades_list = assignment_grades[['Student_ID', 'Score']].to_dict('records')
        
        assignments_list.append({
            'Assignment_Id': assignment_id,
            'Assignment_Title': assignment_name,
            'Weight': assignment_weight,
            'Grades': grades_list
        })
    
    nested_data.append({
        'Code': course_code,
        'Course_Name': course_name,
        'Students': students,
        'Assignments': assignments_list
    })


In [16]:
nested_data

[{'Code': 11386,
  'Course_Name': 'SLAV_2360',
  'Students': [{'Student_ID': 'rqb6bc',
    'First_Name': 'Melissa',
    'Last_Name': 'Zheng',
    'Email': 'rqb6bc@virginia.edu'},
   {'Student_ID': 'zds3st',
    'First_Name': 'Isaac',
    'Last_Name': 'Tabor',
    'Email': 'zds3st@virginia.edu'},
   {'Student_ID': 'ppz2ry',
    'First_Name': 'Alex',
    'Last_Name': 'Jones',
    'Email': 'ppz2ry@virginia.edu'},
   {'Student_ID': 'pfg9yq',
    'First_Name': 'Anya',
    'Last_Name': 'Hariharan',
    'Email': 'pfg9yq@virginia.edu'},
   {'Student_ID': 'jrp86y',
    'First_Name': 'Peter',
    'Last_Name': 'Park',
    'Email': 'jrp86y@virginia.edu'}],
  'Assignments': [{'Assignment_Id': '11386E1',
    'Assignment_Title': 'Exam 1',
    'Weight': 0.1,
    'Grades': [{'Student_ID': 'rqb6bc', 'Score': 80},
     {'Student_ID': 'zds3st', 'Score': 77},
     {'Student_ID': 'pfg9yq', 'Score': 68},
     {'Student_ID': 'jrp86y', 'Score': 51}]},
   {'Assignment_Id': '11386E2',
    'Assignment_Title': 'Ex

In [17]:
print(student_courses_merged[student_courses_merged['Code'] == 11386])


     Code Student_ID First_Name  Last_Name                Email
2   11386     rqb6bc    Melissa      Zheng  rqb6bc@virginia.edu
11  11386     zds3st      Isaac      Tabor  zds3st@virginia.edu
21  11386     ppz2ry       Alex      Jones  ppz2ry@virginia.edu
25  11386     pfg9yq       Anya  Hariharan  pfg9yq@virginia.edu
30  11386     jrp86y      Peter       Park  jrp86y@virginia.edu


In [18]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['mydatabase']
courses_collection = db['courses']

# Insert the nested data
courses_collection.insert_many(nested_data)

InsertManyResult([ObjectId('6658c29812cb5103b79ca931'), ObjectId('6658c29812cb5103b79ca932'), ObjectId('6658c29812cb5103b79ca933'), ObjectId('6658c29812cb5103b79ca934'), ObjectId('6658c29812cb5103b79ca935'), ObjectId('6658c29812cb5103b79ca936'), ObjectId('6658c29812cb5103b79ca937'), ObjectId('6658c29812cb5103b79ca938'), ObjectId('6658c29812cb5103b79ca939'), ObjectId('6658c29812cb5103b79ca93a'), ObjectId('6658c29812cb5103b79ca93b'), ObjectId('6658c29812cb5103b79ca93c')], acknowledged=True)

## QUERY

In [19]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['mydatabase']

In [20]:
# Aggregation pipeline
pipeline = [
    # Match documents with the course code 12736
    {"$match": {"Code": 12736}},
    
    # Unwind the Students array
    {"$unwind": "$Students"},
    
    # Project to include the required fields
    {"$project": {
        "_id": 0,
        "Course_Name": 1,
        "Student_First_Name": "$Students.First_Name",
        "Student_Last_Name": "$Students.Last_Name"
    }}
]

# Perform aggregation
result = db.courses.aggregate(pipeline)

# Print the results
for doc in result:
    print(doc)


{'Course_Name': 'COMM_2020', 'Student_First_Name': 'Melissa', 'Student_Last_Name': 'Zheng'}
{'Course_Name': 'COMM_2020', 'Student_First_Name': 'Edward', 'Student_Last_Name': 'Lee'}
{'Course_Name': 'COMM_2020', 'Student_First_Name': 'Isaac', 'Student_Last_Name': 'Tabor'}
{'Course_Name': 'COMM_2020', 'Student_First_Name': 'Laura', 'Student_Last_Name': 'Peters'}
{'Course_Name': 'COMM_2020', 'Student_First_Name': 'Felipe', 'Student_Last_Name': 'Mendez'}


In [25]:
pipeline = [
    # Unwind the Students array
    {"$unwind": "$Students"},
    
    # Unwind the Assignments array
    {"$unwind": "$Assignments"},
    
    # Unwind the Grades array
    {"$unwind": "$Assignments.Grades"},
    
    # Group by necessary fields to calculate the total score and total weight for each student in each course
    {"$group": {
        "_id": {
            "Student_ID": "$Students.Student_ID",
            "Course_Code": "$Code",
            "Course_Name": "$Course_Name",
            "First_Name": "$Students.First_Name",
            "Last_Name": "$Students.Last_Name"
        },
        "Total_Score": {
            "$sum": {
                "$multiply": ["$Assignments.Grades.Score", "$Assignments.Weight"]
            }
        },
        "Total_Weight": {
            "$sum": "$Assignments.Weight"
        }
    }},
    
    # Project intermediate results to debug
    {"$project": {
        "_id": 0,
        "Student_ID": "$_id.Student_ID",
        "Course_Code": "$_id.Course_Code",
        "First_Name": "$_id.First_Name",
        "Last_Name": "$_id.Last_Name",
        "Course_Name": "$_id.Course_Name",
        "CourseGrade": {
            "$cond": {
                "if": {"$eq": ["$Total_Weight", 0]},
                "then": None,  # or you could use 0 or another default value
                "else": {"$divide": ["$Total_Score", "$Total_Weight"]}
            }
        }
    }}
]
# Perform aggregation
result = db.courses.aggregate(pipeline)
# Print the results
for doc in result:
    print(doc)


{'Student_ID': 'imc4bv', 'Course_Code': 21031, 'First_Name': 'Douglas', 'Last_Name': 'Polansky', 'Course_Name': 'STAT_1601', 'CourseGrade': 78.12}
{'Student_ID': 'ppz2ry', 'Course_Code': 11386, 'First_Name': 'Alex', 'Last_Name': 'Jones', 'Course_Name': 'SLAV_2360', 'CourseGrade': 80.325}
{'Student_ID': 'jrp86y', 'Course_Code': 12564, 'First_Name': 'Peter', 'Last_Name': 'Park', 'Course_Name': 'STAT_1601', 'CourseGrade': 70.675}
{'Student_ID': 'svw7gw', 'Course_Code': 12736, 'First_Name': 'Edward', 'Last_Name': 'Lee', 'Course_Name': 'COMM_2020', 'CourseGrade': 67.14}
{'Student_ID': 'fpn8kb', 'Course_Code': 12645, 'First_Name': 'Nicholas', 'Last_Name': 'Spielberg', 'Course_Name': 'STAT_1601', 'CourseGrade': 62.02}
{'Student_ID': 'mcf5yc', 'Course_Code': 15762, 'First_Name': 'Aryan', 'Last_Name': 'Prabhudesai', 'Course_Name': 'MATH_3350', 'CourseGrade': 85.875}
{'Student_ID': 'wdk0je', 'Course_Code': 15413, 'First_Name': 'Tyrone', 'Last_Name': 'Twain', 'Course_Name': 'APMA_3110', 'CourseGr

In [26]:
pipeline = [
    # Match the specific Student_ID
    {"$match": {"Students.Student_ID": "zds3st"}},
    
    # Unwind the Students array
    {"$unwind": "$Students"},
    
    # Further filter for the specific Student_ID after unwinding
    {"$match": {"Students.Student_ID": "zds3st"}},

    # Lookup to join Assignments and unwind Grades
    {
        "$lookup": {
            "from": "courses",
            "let": {"courseCode": "$Code"},
            "pipeline": [
                {"$match": {"$expr": {"$eq": ["$Code", "$$courseCode"]}}},
                {"$unwind": "$Assignments"},
                {"$unwind": "$Assignments.Grades"},
                {
                    "$project": {
                        "_id": 0,
                        "Assignment_Weight": "$Assignments.Weight",
                        "Grade_Score": "$Assignments.Grades.Score"
                    }
                }
            ],
            "as": "AssignmentsWithGrades"
        }
    },
    
    # Unwind the joined array
    {"$unwind": "$AssignmentsWithGrades"},
    
    # Group by necessary fields to calculate the total score and total weight for each student in each course
    {
        "$group": {
            "_id": {
                "Student_ID": "$Students.Student_ID",
                "Course_Code": "$Code",
                "Course_Name": "$Course_Name",
                "First_Name": "$Students.First_Name",
                "Last_Name": "$Students.Last_Name"
            },
            "Total_Score": {
                "$sum": {
                    "$multiply": [
                        "$AssignmentsWithGrades.Grade_Score",
                        "$AssignmentsWithGrades.Assignment_Weight"
                    ]
                }
            },
            "Total_Weight": {"$sum": "$AssignmentsWithGrades.Assignment_Weight"}
        }
    },
    
    # Project the final results
    {
        "$project": {
            "_id": 0,
            "Student_ID": "$_id.Student_ID",
            "Course_Code": "$_id.Course_Code",
            "First_Name": "$_id.First_Name",
            "Last_Name": "$_id.Last_Name",
            "Course_Name": "$_id.Course_Name",
            "CourseGrade": {
                "$cond": {
                    "if": {"$eq": ["$Total_Weight", 0]},
                    "then": None,  # or you could use 0 or another default value
                    "else": {"$divide": ["$Total_Score", "$Total_Weight"]}
                }
            }
        }
    }
]

# Perform aggregation
result = db.courses.aggregate(pipeline)

# Print the results
for doc in result:
    print(doc)



{'Student_ID': 'zds3st', 'Course_Code': 12736, 'First_Name': 'Isaac', 'Last_Name': 'Tabor', 'Course_Name': 'COMM_2020', 'CourseGrade': 67.14}
{'Student_ID': 'zds3st', 'Course_Code': 14730, 'First_Name': 'Isaac', 'Last_Name': 'Tabor', 'Course_Name': 'COMM_3410', 'CourseGrade': 73.44}
{'Student_ID': 'zds3st', 'Course_Code': 12564, 'First_Name': 'Isaac', 'Last_Name': 'Tabor', 'Course_Name': 'STAT_1601', 'CourseGrade': 70.675}
{'Student_ID': 'zds3st', 'Course_Code': 11386, 'First_Name': 'Isaac', 'Last_Name': 'Tabor', 'Course_Name': 'SLAV_2360', 'CourseGrade': 80.325}
