In [1]:
import xml.etree.ElementTree as ET

#Extracting the custom classes from the Components module, See components folder for class definitions
from Components.Class import Class
from Components.Course import Course
from Components.HardConstraint import HardConstraint
from Components.SoftConstraint import SoftConstraint
from Components.Student import Student
from Components.Subpart import Subpart
from Components.Timing import Timing

#Name of input file, change this file if you use a different file. This file is outputted from "trim_dataset.ipynb"
inputFile = "agh-fis-spr17_trimmed.xml"

In [2]:
# Passing the path of the 
# xml document to enable the 
# parsing process 
tree = ET.parse(inputFile) 
# getting the parent tag of 
# the xml document 
root = tree.getroot() 

In [3]:
#extracting just courses
coursesXML = root.findall('./courses/course')

#Initializing empty dictionaries
courses = {}
subparts = {}
classes = {}

#Iterating through each course's XML
for course in coursesXML:
    #Finding the first config, other configs are not considered
    config = course.find('config')
    courseID = course.get('id')

    #Extracting all subparts in courseXML
    subpartXML = config.findall('subpart')
    #listing subparts to be added to course object
    subpartList = []
    for subpart in subpartXML:
        subpartID = subpart.get('id')
        subpartList.append(subpartID)

        #Finding all classes from Subpart XML
        classesXML = subpart.findall('class')
        #Listing all classes to be added to subpart object
        classList = []
        for singleClass in classesXML:
            classID = singleClass.get('id')
            classList.append(classID)
            classLimit = singleClass.get('limit')
            classParent = singleClass.get('parent')

            #Finding all timings from Class XML
            timeXML = singleClass.findall('time')
            #Listing all timings to be added to Class object
            timings = []
            for timing in timeXML:
                days = timing.get('days')
                length = timing.get('length')
                penalty = timing.get('penalty')
                start = timing.get('start')
                weeks = timing.get('weeks')
                timingObject = Timing(days, length, start, weeks, penalty)
                timings.append(timingObject)
            #Creating a new class object
            classObject = Class(classID, classLimit, classParent, subpartID, courseID, timings)
            #Adding the new class object to class dictionary
            classes[classID] = classObject
        #Creating a new subpart object
        subpartObject = Subpart(subpartID, courseID, classList)
        #Adding the new subpart object to class dictionary
        subparts[subpartID] = subpartObject
    #Creating a new course object
    courseObject = Course(courseID, subpartList)
    #Adding the new subpart object to class dictionary
    courses[courseID] = courseObject
print("%d Classes extracted"%(len(classes)))
print("%d Subparts extracted"%(len(subparts)))
print("%d Courses extracted"%(len(courses)))

1239 Classes extracted
665 Subparts extracted
340 Courses extracted


In [4]:
#Initializing constraint lists
hardConstraints = []
softConstraints = []

#Finding all the distribution constraints from root
distributionXML = root.findall('./distributions/distribution')

#Listing the required types of constraints to be extracted
requiredTypes = ['Precedence', 'SameAttendees', 'NotOverlap', 'SameTime', 'DifferentTime', 'MinGap', 'DifferentDays']

#Iterating through all distribution constraints
for dist in distributionXML:
    #getting type of constraints to be checked
    constraintType = dist.get('type')
    #Removing the () from MinGap(G) for simpler comparison
    constraintType = constraintType.split('(')[0]

    #Checking if the current distribution's type matches any of the required types
    if constraintType in requiredTypes:
        #finding all classIDs and then appending to a list
        classList = []
        classXML = dist.findall('class')
        for singleClass in classXML:
            classList.append(singleClass.get('id'))
        #Check if constraint is hard, if yes, adding it to hard constraints
        if(dist.get('required') == 'true'):
            hardConstraints.append(HardConstraint(constraintType, classList))
        #If not, adding it to soft constraints
        else:
            penalty = dist.get('penalty')
            softConstraints.append(SoftConstraint(constraintType, classList, penalty))

print("%d Hard Constraints extracted"%(len(hardConstraints)))
print("%d Soft Constraints extracted"%(len(softConstraints)))

664 Hard Constraints extracted
7 Soft Constraints extracted


In [5]:
students = []
#Finding all students in XML
studentXML = root.findall('./students/student')

#Iterating through all student XMLs
for stud in studentXML:
    studentId = stud.get('id')
    #Finding and listing all courses enrolled
    courseXML = stud.findall('course')
    courseList = []
    for singleCourse in courseXML:
        courseID = singleCourse.get('id')
        courseList.append(courseID)
        #From the courses dictionary, getting the corresponding course class and appending the studentID to that course
        courses[courseID].addStudent(studentId)
    #Appending the new student object to student list
    students.append(Student(studentId, courseList))

print("%d Students extracted"%(len(students)))


1641 Students extracted
