In [1]:
import xml.etree.ElementTree as ET

In [2]:
mytree = ET.parse('Downloaded/wbg-fal10.xml')
myroot = mytree.getroot()

### Rooms
Each **room** is specified by its **id**(int) and **capacity**(int). A room may not be available at certain times, which are defined by **unavailable**(list) elements using the **days**(str) of the week, the **start**(int) time slot, and a **length**(int) in slots for a set of **weeks**(str) during the semester. <br>
* *No travel time constraint for this test set*

In [3]:
rooms = []

for x in myroot[1]:
    rooms.append({'id': int(x.attrib['id']), 'capacity': int(x.attrib['capacity'])})
    rooms[-1]['unavailable'] = []
    
    #unavailable
    for i in range(len(x)):
        rooms[-1]['unavailable'].append({})
        ua = rooms[-1]['unavailable'][-1]
        ua['days'] = x[i].attrib['days']
        ua['start'] = int(x[i].attrib['start'])
        ua['length'] = int(x[i].attrib['length'])
        ua['weeks'] = x[i].attrib['weeks']

In [4]:
rooms

[{'id': 1, 'capacity': 1, 'unavailable': []},
 {'id': 2, 'capacity': 1, 'unavailable': []},
 {'id': 3,
  'capacity': 2,
  'unavailable': [{'days': '1000000',
    'start': 240,
    'length': 24,
    'weeks': '0000000001000000'},
   {'days': '0010000',
    'start': 240,
    'length': 24,
    'weeks': '0010000000000000'}]},
 {'id': 4,
  'capacity': 1,
  'unavailable': [{'days': '0100000',
    'start': 240,
    'length': 24,
    'weeks': '0000000001000000'}]},
 {'id': 5, 'capacity': 1, 'unavailable': []},
 {'id': 6,
  'capacity': 4,
  'unavailable': [{'days': '0100000',
    'start': 240,
    'length': 24,
    'weeks': '0000000001000000'}]},
 {'id': 7,
  'capacity': 2,
  'unavailable': [{'days': '1000000',
    'start': 240,
    'length': 24,
    'weeks': '0000010000000000'},
   {'days': '1000000',
    'start': 240,
    'length': 24,
    'weeks': '0000000001000000'},
   {'days': '1000000',
    'start': 240,
    'length': 24,
    'weeks': '0000000000010000'},
   {'days': '0100000',
    'start

### Courses
Courses may have a very complex hierarchical structure of classes. Since there are *no config constraints (only 1 config for every course)*, therefore the config is skipped.<br>
* Each **course** has an **id**, and 1 or many **subpart**.
* Each **subpart** has an **id**, and 1 or many **class**.
* Each **class** has an **id** and **limit**. It may or may not has a **parent**. It has 1 or many **room** & **time** option(s) in which it can only pick one
    * Each **room constraint** has an **id** and a **penalty**
    * Each **time constraint** has **days**, **start**, **length**, **weeks**, **penalty**

In [5]:
courses = []

for x in myroot[2]:
    courses.append({'id': int(x.attrib['id'])})
    courses[-1]['subpart'] = []
    
    #subpart
    for s in range(len(x[0])):
        courses[-1]['subpart'].append({})
        sp = courses[-1]['subpart'][-1]
        sp['id'] = int(x[0][s].attrib['id'])
        sp['class'] = []
        
        #class
        for c in range(len(x[0][s])):
            sp['class'].append({})
            cl = sp['class'][-1]
            cl['id'] = int(x[0][s][c].attrib['id'])
            cl['limit'] = int(x[0][s][c].attrib['limit'])
            parent = int(x[0][s][c].attrib.get('parent',0))
            if parent:
                cl['parent'] = parent
            cl['room'] = []
            cl['time'] = []
            
            
            #room & time
            for i in range(len(x[0][s][c])):
                if x[0][s][c][i].tag == 'room':
                    cl['room'].append({})
                    r = cl['room'][-1]
                    r['id'] = int(x[0][s][c][i].attrib['id'])
                    r['penalty'] = int(x[0][s][c][i].attrib['penalty'])
                else:
                    cl['time'].append({})
                    t = cl['time'][-1]
                    t['days'] = x[0][s][c][i].attrib['days']
                    t['start'] = int(x[0][s][c][i].attrib['start'])
                    t['length'] = int(x[0][s][c][i].attrib['length'])
                    t['weeks'] = x[0][s][c][i].attrib['weeks']
                    t['penalty'] = int(x[0][s][c][i].attrib['penalty'])

#courses

In [6]:
courses

[{'id': 1,
  'subpart': [{'id': 1,
    'class': [{'id': 1,
      'limit': 2,
      'room': [{'id': 6, 'penalty': 4},
       {'id': 7, 'penalty': 0},
       {'id': 3, 'penalty': 0}],
      'time': [{'days': '1010100',
        'start': 102,
        'length': 10,
        'weeks': '1111111111111111',
        'penalty': 16},
       {'days': '1010100',
        'start': 114,
        'length': 10,
        'weeks': '1111111111111111',
        'penalty': 0},
       {'days': '1010100',
        'start': 126,
        'length': 10,
        'weeks': '1111111111111111',
        'penalty': 0},
       {'days': '1010100',
        'start': 138,
        'length': 10,
        'weeks': '1111111111111111',
        'penalty': 0},
       {'days': '1010100',
        'start': 150,
        'length': 10,
        'weeks': '1111111111111111',
        'penalty': 0},
       {'days': '1010100',
        'start': 162,
        'length': 10,
        'weeks': '1111111111111111',
        'penalty': 0},
       {'days': '101010

### Students
Each **student** has a unique **id** and a list of courses that he or she needs to attend. Each **course** is specified by its course **id**.

In [7]:
students = []

for x in myroot[4]:
    students.append({'id': int(x.attrib['id'])})
    students[-1]['course'] = []
    
    for c in range(len(x)):
        students[-1]['course'].append(int(x[c].attrib['id']))
    
students

[{'id': 1, 'course': [1, 19, 20, 7, 8]},
 {'id': 2, 'course': [1, 19, 7, 8, 11]},
 {'id': 3, 'course': [19, 4, 5, 8, 9]},
 {'id': 4, 'course': [3, 5, 6, 9, 15]},
 {'id': 5, 'course': [3, 5, 6, 9, 15]},
 {'id': 6, 'course': [3, 5, 6, 9, 15]},
 {'id': 7, 'course': [17, 2, 5, 6, 7, 10]},
 {'id': 8, 'course': [3, 5, 6, 7, 10]},
 {'id': 9, 'course': [17, 5, 6, 9, 10]},
 {'id': 10, 'course': [17, 4, 5, 6, 9]},
 {'id': 11, 'course': [18, 19, 3, 21, 7]},
 {'id': 12, 'course': [16, 2, 7, 9, 11, 12]},
 {'id': 13, 'course': [16, 17, 20, 12, 13]},
 {'id': 14, 'course': [3, 5, 6, 9, 14]},
 {'id': 15, 'course': [3, 5, 6, 11, 14]},
 {'id': 16, 'course': [3, 5, 21, 6, 14]},
 {'id': 17, 'course': [18, 3, 21, 6, 9]},
 {'id': 18, 'course': [3, 6, 9, 11, 12]},
 {'id': 19, 'course': [17, 18, 4, 5, 7]}]

### Distributions Constraints
Two types of constraints in this test set: **SameAttendees**(hard) and **NotOverlap**(soft). 
* They mean the same thing: class i & class j is not allowed to overlap each other, for all pairs of i,j indicated in the constraint. 
* The only difference is the **required** flag: hard means it's not allowed to violate, while soft means we can violate it with a **penalty**. 
* Each constraint includes a list of **class**.

In [8]:
constraints = []

for x in myroot[3]:
    constraints.append({'type': x.attrib['type']})
    constraint = constraints[-1]
    
    constraint['required'] = bool(x.attrib.get('required', 0))
    if not constraint['required']:
        constraint['penalty'] = int(x.attrib['penalty'])
    
    constraint['class'] = []
    for c in range(len(x)):
        constraint['class'].append(int(x[c].attrib['id']))
        
constraints

[{'type': 'SameAttendees', 'required': True, 'class': [1, 31, 32, 37, 38, 39]},
 {'type': 'SameAttendees', 'required': True, 'class': [3, 4, 5, 28, 29, 30]},
 {'type': 'SameAttendees',
  'required': True,
  'class': [40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51]},
 {'type': 'SameAttendees', 'required': True, 'class': [40, 37]},
 {'type': 'SameAttendees', 'required': True, 'class': [41, 37]},
 {'type': 'SameAttendees', 'required': True, 'class': [42, 37]},
 {'type': 'SameAttendees', 'required': True, 'class': [43, 37]},
 {'type': 'SameAttendees', 'required': True, 'class': [44, 38]},
 {'type': 'SameAttendees', 'required': True, 'class': [45, 38]},
 {'type': 'SameAttendees', 'required': True, 'class': [46, 38]},
 {'type': 'SameAttendees', 'required': True, 'class': [47, 38]},
 {'type': 'SameAttendees', 'required': True, 'class': [48, 39]},
 {'type': 'SameAttendees', 'required': True, 'class': [49, 39]},
 {'type': 'SameAttendees', 'required': True, 'class': [50, 39]},
 {'type': 'SameAtt

### Export data to .py
Define destination filepath at command `filename = ...`

In [9]:
filename = 'dataset_01.py'

file = open(filename, 'w')
file.write('rooms = ' + str(rooms))
file.write('\n')
file.write('courses = ' + str(courses))
file.write('\n')
file.write('students = '+  str(students))
file.write('\n')
file.write('constraints = ' + str(constraints))
file.close()