In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import re
import pickle 
from urllib.parse import urlparse, parse_qs

In [45]:
def getAllCours():
    URL_ROOT = 'https://edu.epfl.ch/'
    shs = ['https://edu.epfl.ch/studyplan/fr/bachelor/programme-sciences-humaines-et-sociales/', 'https://edu.epfl.ch/studyplan/fr/master/programme-sciences-humaines-et-sociales/']
    page = requests.get(URL_ROOT)
    soup = BeautifulSoup(page.content, "html.parser")
    cards = soup.findAll("div", class_="card-title")
    annees = [card.find('a').get('href') for card in cards]
    courses = []
    for annee in annees:
        page = requests.get(URL_ROOT + annee)
        soup = BeautifulSoup(page.content, "html.parser")
        sections = [x.get('href') for x in soup.find('main').find('ul').findAll('a')]
        for section in sections:
            page = requests.get(URL_ROOT + section)
            soup = BeautifulSoup(page.content, "html.parser")
            for cours in soup.find('main').findAll('div', class_="cours-name"):
                if cours.find('a') != None:
                    courses.append(cours.find('a').get('href').split('/').pop())
    courses.remove('programme-sciences-humaines-et-sociales')
    for url in shs:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, "html.parser")
        for cours in soup.findAll("div", class_="cours-name"):
            if cours.find('a') != None:
                courses.append(cours.find('a').get('href').split('/').pop())
                
    return courses

In [17]:
rooms_filter = [
    'POL.N3.E',
    'POL315.1',
    'PHxx',
    'Max412',
    'STCC - Garden Full',
    'ELG124',
    'EXTRANEF126',
    'CHCIGC'
]

rooms_map = {
    'CE1': 'CE11',
    'CM4': 'CM14',
    'BC07-08': ['BC07','BC08'],
    'CM3': 'CM13',
    'CE4': 'CE14',
    'CM2': 'CM12',
    'SG1': 'SG1138',
    'CE6': 'CE13',
    'CM5': 'CM15',
    'CE5': 'CE15',
    'CM1': 'CM11',
    'CE2': 'CE12',
    'CE3': 'CE13',
    'RLC E1 240': 'RLCE1240'
}

In [83]:
def parseCours(url):
    page = requests.get(url)
    if (page.status_code == 404):
        print(url)
        return
        
    soup = BeautifulSoup(page.content, "html.parser")
    
    schedule = dict()
    title = soup.find('main').find('h1').text
    if (soup.find('div', class_="course-summary") == None):
        print(url)
    code = soup.find('div', class_="course-summary").findAll('p')[0].text.split('/')[0].strip()
    credits = int(re.findall(r'\d+', soup.find('div', class_="course-summary").findAll('p')[0].text.split('/')[1])[0])
    teachers = [(x.text, x.get('href')) for x in soup.find('div', class_="course-summary").findAll('p')[1].findAll('a')]

    semester = soup.find('div', class_="study-plans").findAll('div', class_="collapse-item")[0].findAll('li')[0].text.split(':')[1].strip()
    if (semester != 'Printemps' and semester != 'Automne'):
        semester = None
        
    if (semester == None):
        # Ecole doctorale
        #print(f'Ecole doctorale : {url}')
        schedule = dict()
        iframe_soup = BeautifulSoup(requests.get(soup.find("iframe").attrs['src']).content, "html.parser")
        if (iframe_soup.find('table') == None):
            #print(f'\033[91m SKIP (no schedule) \033[0m')
            return
        semester = 'Printemps'
        rows = iframe_soup.findAll('tr')
        creneaux = []
        for i, row in enumerate(rows):
            if (i == 0):
                continue
            if (row.find('th') != None):
                day = row.find('th').text.split('\xa0')[0][:2]
                if ('2023' not in row.find('th').text.split('\xa0')[1]):
                    year = row.find('th').text.split('\xa0')[1]
                    #print(f'\033[91m SKIP (not 2023 -> {year}) \033[0m')
                    day = None
                    continue
                if (int(row.find('th').text.split('\xa0')[1].split('.')[1]) > 5):
                    year = row.find('th').text.split('\xa0')[1]
                    #print(f'\033[91m SKIP (during summer -> {year}) \033[0m')
                    day = None
                    continue
            elif (row.get("class") != None and 'grisleger' in row.get("class") and day != None):
                time = [x.split(':')[0] for x in row.findAll('td')[0].text.split('-')]
                duration = int(time[1]) - int(time[0])
                time = f"{int(time[0])}-{int(time[0]) + 1}"
                rooms_found = [room.text for room in row.findAll('td')[1].findAll('a')]
                rooms = []
                for room in rooms_found:
                    if (room in rooms_map):
                        if (isinstance(rooms_map[room], list)):
                            rooms.append([x for x in rooms_map[room]])
                        else:
                            rooms.append(rooms_map[room])
                    elif (room not in rooms_filter):
                        rooms.append(room)
                label = row.findAll('td')[2].text
                if (label == 'L'):
                    label = 'cours'
                elif(label == 'E'):
                    label = 'exercice'
                elif(label == 'P'):
                    label = 'projet'
                else:
                    print(label)
                creneau = {
                    'day': day,
                    'time': time,
                    'label': label,
                    'rooms': rooms,
                    'duration': duration
                }
                if (len(rooms) > 0):
                    creneaux.append(creneau)
                creneau = {}
        if len(creneaux) == 0:
            #print(f'\033[91m SKIP (no creneaux) \033[0m')
            return
        for creneau in creneaux:
            day = creneau['day']
            time = creneau['time']
            if (time not in schedule):
                schedule[time] = dict()
            if (day not in schedule[time]):
                schedule[time][day] = {
                    'duration': creneau['duration'],
                    'rooms': creneau['rooms'],
                    'label': creneau['label']
                }
            elif (schedule[time][day]['duration'] == creneau['duration']):
                old_rooms = schedule[time][day]['rooms']
                new_rooms = creneau['rooms']
                schedule[time][day]['rooms'] = list(set(old_rooms + new_rooms))
        #print(f'\033[92m {schedule} \033[0m')

    else:
        # Not ecole doctorale
        if (soup.find("table", class_="semaineDeRef") != None):
            rows = soup.find("table", class_="semaineDeRef").findAll("tr")
            days = []
            for i, row in enumerate(rows):
                col = row.findAll('td')
                skip_days = 0
                for j, col in enumerate(col):
                    if (i == 0):
                        if (j > 0):
                            days.append(col.text)
                    else:
                        if (j == 0):
                            time = col.text
                        else:
                            day = days[j-1]
                            if (time in schedule):
                                if (day in schedule[time]):
                                    if ('skip' in schedule[time][day]):
                                        skip_days += 1
                            classes = col.get('class')
                            if (classes != None and "taken" in classes):
                                if (col.get('rowspan') == None):
                                    duration = 1
                                else:
                                    duration = int(col.get('rowspan'))
                                classes.remove('taken')
                                if (len(classes) != 0):
                                    label = classes[0]
                                    day = days[j-1 + skip_days]
                                    rooms_found = [room.text for room in col.findAll('a')]
                                    rooms = []
                                    for room in rooms_found:
                                        if (room in rooms_map):
                                            if (isinstance(rooms_map[room], list)):
                                                for x in rooms_map[room]:
                                                    rooms.append(x)
                                            else:
                                                rooms.append(rooms_map[room])
                                        elif (room not in rooms_filter):
                                            rooms.append(room)
                                    if (len(rooms) > 0):
                                        if (time not in schedule):
                                            schedule[time] = dict()
                                        schedule[time][day] = {
                                            'label': label,
                                            'duration': duration,
                                            'rooms': rooms
                                        }
                                        if (duration > 1):
                                            for k in range(duration):
                                                k_time = '-'.join(list(map(lambda x: str(int(x)+k), time.split('-'))))
                                                if (k_time not in schedule):
                                                    schedule[k_time] = dict()
                                                    schedule[k_time][day] = {
                                                        'skip': True
                                                    }
        if (len(schedule.keys()) == 0):
            print(f'\033[91m NO SCHEDULE ({code}) \033[0m')
            return


    
    course = {
        'name': title,
        'code': code,
        'credits': credits,
        'semester': semester,
        'teachers': teachers,
        'schedule': schedule
    }

    return course

In [46]:
def URL_ISA(isa_course):
    return ('https://isa.epfl.ch/imoniteur_ISAP/!gedpublicreports.html?'
    'ww_i_reportModel=2096516283&'
    'ww_i_reportModelXsl=2096516327&'
    'ww_x_GPS=3206846895&'
    'ww_x_MATIERE={course}&'
    'ww_x_DATE_DEBUT=01.09.2022&'
    'ww_x_DATE_FIN=31.08.2023&'
    'ww_x_LANGUE=fr&'
    'ww_x_AFFICHE_ENTETE=1&'
    'ww_x_AFFICHE_PROF=1').format(course=isa_course)

In [44]:
parseCours("https://edu.epfl.ch/coursebook/fr/elements-de-statistiques-pour-les-data-sciences-EE-209")

adding creneau
('9-10', 'Ma', 1)
adding creneau


{'name': 'Eléments de statistiques pour les data sciences',
 'code': 'EE-209',
 'credits': 3,
 'semester': 'Printemps',
 'teachers': [('Obozinski Guillaume Romain',
   'https://people.epfl.ch/307123?lang=fr')],
 'schedule': {'8-9': {'Ma': {'label': 'cours',
    'duration': 2,
    'rooms': ['RLCE1240']}},
  '9-10': {'Ma': {'skip': True},
   'Je': {'label': 'exercice', 'duration': 1, 'rooms': ['CM12', 'CM1105']}}}}

In [28]:
print('-'.join(list(map(lambda x: str(int(x)+2), '15-16'.split('-')))))

17-18


In [47]:
courses_url = getAllCours()

In [84]:
URL_ROOT = 'https://edu.epfl.ch/coursebook/fr/'
courses_url = list(set(courses_url))

courses = []
for url in courses_url:
    courses.append(parseCours(URL_ROOT + url))



[91m NO SCHEDULE (CS-596) [0m
[91m NO SCHEDULE (CS-599) [0m
[91m NO SCHEDULE (EE-492(g)) [0m
[91m NO SCHEDULE (AR-302(h)) [0m
[91m NO SCHEDULE (AR-201(a)) [0m
[91m NO SCHEDULE (AR-202(m)) [0m
[91m NO SCHEDULE (PHYS-320) [0m
[91m NO SCHEDULE (COM-307) [0m
[91m NO SCHEDULE (MATH-596) [0m
[91m NO SCHEDULE (AR-302(ai)) [0m
[91m NO SCHEDULE (MGT-589) [0m
[91m NO SCHEDULE (BIO-503) [0m
[91m NO SCHEDULE (AR-201(r)) [0m
[91m NO SCHEDULE (CS-358) [0m
[91m NO SCHEDULE (AR-599) [0m
[91m NO SCHEDULE (ME-468) [0m
[91m NO SCHEDULE (MICRO-499) [0m
[91m NO SCHEDULE (ChE-599) [0m
[91m NO SCHEDULE (EE-589) [0m
[91m NO SCHEDULE (COM-416) [0m
[91m NO SCHEDULE (PHYS-319) [0m
[91m NO SCHEDULE (EE-490(a)) [0m
[91m NO SCHEDULE (CH-599) [0m
[91m NO SCHEDULE (HUM-202) [0m
[91m NO SCHEDULE (MATH-598) [0m
[91m NO SCHEDULE (AR-301(ae)) [0m
[91m NO SCHEDULE (MATH-595) [0m
[91m NO SCHEDULE (EE-492(d)) [0m
[91m NO SCHEDULE (PENS-211) [0m
[91m NO SCHEDULE (MAT

In [87]:
data = list(filter(lambda x: x != None, courses))

with open('./data/data.pkl', 'wb') as f:
        pickle.dump(data, f)

In [61]:
len(data)

1317

In [62]:
def save_files_entities():
    data = load_file('./data/data.pkl')

    # Get list of unique teachers
    teachers = sum([x['teachers'] for x in data], [])
    teachers = list(set(teachers))
    teachers = [{
        'name': x[0],
        'people_url': x[1]
    } for x in teachers]

    # Get list of unique rooms
    #rooms = []
    #for x in data:
    #    for time in x['schedule'].values():
    #        for day in time.values():
    #            rooms.append(day['rooms'])

    #rooms = list(set(sum(rooms, [])))
    #rooms = [{ 'name': room } for room in rooms]

    # Get list of unique courses
    codes = []
    courses = []
    for course in data:
        if (course['code'] not in codes):
            courses.append({
                'name': course['name'],
                'code': course['code'],
                'credits': course['credits'],
                'semester': course['semester']
            })
            codes.append(course['code'])

    with open('./data/teachers.pkl', 'wb') as f:
        pickle.dump(teachers, f)
    
    # with open('./data/rooms.pkl', 'wb') as f:
    #    pickle.dump(rooms, f)

    with open('./data/courses.pkl', 'wb') as f:
        pickle.dump(courses, f)

In [86]:
def save_files_relations():
    data = load_file('./data/data.pkl')

    # Get list of unique teach_in
    teach_in = []
    for course in data:
        for teacher in course['teachers']:
            teach_in.append((
                course['code'],
                teacher[0]
            ))
    teach_in = list(set(teach_in))
    
    # Get list of unique booking
    booking = []
    for course in data:
        for time, row in course['schedule'].items():
            for day, creneau in row.items():
                if ('skip' not in creneau):
                    for room in creneau['rooms']:
                        booking.append({
                            'room': room,
                            'course': course['code'],
                            'label': creneau['label'],
                            'duration': creneau['duration'],
                            'time': time,
                            'day': day,
                            'semester': course['semester']
                        })
    
    with open('./data/teach_in.pkl', 'wb') as f:
        pickle.dump(teach_in, f)

    with open('./data/booking.pkl', 'wb') as f:
        pickle.dump(booking, f)

In [64]:
def update_db_entities(db):
    teachers = load_file('./data/teachers.pkl')
    #rooms = load_file('./data/rooms.pkl')
    courses = load_file('./data/courses.pkl')
    
    #for x in rooms:
    #    db.rooms.update_one({'name': x['name']}, {"$set": x}, upsert=True)
    for x in teachers:
        db.teachers.update_one({'name': x['name']}, {"$set": x}, upsert=True)
    for x in courses:
        db.courses.update_one({'code': x['code']}, {"$set": x}, upsert=True)

In [88]:
def update_db_relations(db):
    queried_rooms = db.rooms.find()
    queried_teachers = db.teachers.find()
    queried_courses = db.courses.find()

    map_room = dict()
    for room in queried_rooms:
        map_room[room['name']] = room['_id']

    map_teacher = dict()
    for teacher in queried_teachers:
        map_teacher[teacher['name']] = teacher['_id'] 

    map_course = dict()
    for course in queried_courses:
        map_course[course['code']] = course['_id']

    # Load the relations
    teach_in = load_file('./data/teach_in.pkl')
    booking = load_file('./data/booking.pkl')

    # Map the DB ids
    teach_in = [{
        'teacher': map_teacher[x[1]],
        'course': map_course[x[0]]
    } for x in teach_in]

    booking = [{
        **x,
        'room': map_room[x['room']],
        'course': map_course[x['course']],
    } for x in booking]

    # Insert in DB

    db.teach_in.drop()
    db.booking.drop()

    db.teach_in.insert_many(teach_in)
    db.booking.insert_many(booking)

    

In [66]:
def load_file(name):
    with open(name, 'rb') as handle:
        data = pickle.load(handle)
    return data

In [238]:
def DB_indices(db):
    try:
        db.rooms.create_index([("name", pymongo.ASCENDING)], name="room_name", unique=True)
        db.teachers.create_index([("name", pymongo.ASCENDING)], name="teacher_unique", unique=True)
        db.courses.create_index([("code", pymongo.ASCENDING)], name="course_unique", unique=True)
        db.teach_in.create_index([("teacher", pymongo.ASCENDING), ("course", pymongo.ASCENDING)], name="teach_in_unique", unique=True)
        db.booking.create_index([("room", pymongo.ASCENDING), ("time", pymongo.ASCENDING), ("day", pymongo.ASCENDING), ("semester", pymongo.ASCENDING)], name="booking_unique", unique=True)
        db.plans.create_index([("promo", pymongo.ASCENDING), ("course", pymongo.ASCENDING), ("section", pymongo.ASCENDING)], name="unique_plan", unique=True)
    except Exception as err:
        print(err)

In [239]:
import pymongo
from pymongo import MongoClient
import config

client = MongoClient(f"mongodb+srv://{config.DB_USER}:{config.DB_PASSWORD}@{config.DB_URL}/?retryWrites=true&w=majority")

db = client[config.DB_NAME]

DB_indices(db)

Index build failed: 0820e267-ab45-47a2-9074-4a6b2368839f: Collection occupancy-epfl.booking ( 91a8a261-ad0d-4d22-ab4b-690feb61b6d0 ) :: caused by :: E11000 duplicate key error collection: occupancy-epfl.booking index: booking_unique dup key: { room: ObjectId('63eac65e5cdd0d4aa102a4cf'), time: "9-10", day: "Ma", semester: "Printemps" }, full error: {'ok': 0.0, 'errmsg': 'Index build failed: 0820e267-ab45-47a2-9074-4a6b2368839f: Collection occupancy-epfl.booking ( 91a8a261-ad0d-4d22-ab4b-690feb61b6d0 ) :: caused by :: E11000 duplicate key error collection: occupancy-epfl.booking index: booking_unique dup key: { room: ObjectId(\'63eac65e5cdd0d4aa102a4cf\'), time: "9-10", day: "Ma", semester: "Printemps" }', 'code': 11000, 'codeName': 'DuplicateKey', 'keyPattern': {'room': 1, 'time': 1, 'day': 1, 'semester': 1}, 'keyValue': {'room': ObjectId('63eac65e5cdd0d4aa102a4cf'), 'time': '9-10', 'day': 'Ma', 'semester': 'Printemps'}, '$clusterTime': {'clusterTime': Timestamp(1676746157, 81), 'signat

In [69]:
save_files_entities()
update_db_entities(db)

In [89]:
save_files_relations()
update_db_relations(db)

In [102]:
xml = '<GetFeature xmlns="http://www.opengis.net/wfs" service="WFS" version="1.1.0" outputFormat="GML3" maxFeatures="100000" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.opengis.net/wfs http://schemas.opengis.net/wfs/1.1.0/wfs.xsd">'
        '<Query typeName="feature:batiments_wmsquery" srsName="EPSG:2056" xmlns:feature="http://mapserver.gis.umn.edu/mapserver"><Filter xmlns="http://www.opengis.net/ogc"><BBOX><PropertyName>the_geom</PropertyName><Envelope xmlns="http://www.opengis.net/gml" srsName="EPSG:2056"><lowerCorner>{low1} {low2}</lowerCorner><upperCorner>{up1} {up2}</upperCorner></Envelope></BBOX></Filter></Query></GetFeature>'

IndentationError: unexpected indent (1460820677.py, line 2)

In [129]:
def list_rooms(low, up, floor, max=1000):
    low1, low2 = low
    up1, up2 = up
    request_url = f"https://plan.epfl.ch/mapserv_proxy?ogcserver=source+for+image%2Fpng&cache_version=9fe661ce469e4692b9e402b22d8cb420&floor={floor}"
    xml = f'<GetFeature xmlns="http://www.opengis.net/wfs" service="WFS" version="1.1.0" outputFormat="GML3" maxFeatures="{max}" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.opengis.net/wfs http://schemas.opengis.net/wfs/1.1.0/wfs.xsd"><Query typeName="feature:batiments_wmsquery" srsName="EPSG:2056" xmlns:feature="http://mapserver.gis.umn.edu/mapserver"><Filter xmlns="http://www.opengis.net/ogc"><BBOX><PropertyName>the_geom</PropertyName><Envelope xmlns="http://www.opengis.net/gml" srsName="EPSG:2056"><lowerCorner>{low1} {low2}</lowerCorner><upperCorner>{up1} {up2}</upperCorner></Envelope></BBOX></Filter></Query></GetFeature>'

    r = requests.post(request_url, data=xml)
    room_xml = BeautifulSoup(r.text, 'xml')
    if (room_xml.find('gml:Null') != None):
        return None
    return room_xml.findAll('gml:featureMember')

def parse_room(room_xml):
    room_name = BeautifulSoup(room_xml.find('ms:room_abr_link').text, 'html.parser').find('div', class_="room").text.replace(" ", "")
    room_type = room_xml.find('ms:room_uti_a').text
    return { 'name': room_name, 'type': room_type }

In [159]:
rooms_xml = list_rooms((2533565.4081416847, 1152107.9784703811), (2532650.4135850836, 1152685.3502971812), -1, max=5000)

In [160]:
len(rooms_xml)

1511

In [None]:
rooms = []
types = []

In [161]:
for room_xml in rooms_xml:
    room = parse_room(room_xml)
    if (room == None):
        continue
    if (room not in rooms):
        rooms.append(room)
    if (room['type'] not in types):
        types.append(room['type'])

In [269]:
query_rooms = db.rooms.find()

for room in query_rooms:
    room_name = room['name']
    found = False
    for x in rooms:
        if (x['name'] == room_name):
            db.rooms.update_one({'name': x['name']}, {"$set": {
                'type': x['type']
            }}, upsert=True)
            found = True
            break
    if (found == False):
        print(f'{room["name"]}')

In [231]:
map_semester = {
    'Bachelor 1': 'BA1',
    'Bachelor 2' : 'BA2',
    'Bachelor 3' : 'BA3',
    'Bachelor 4' : 'BA4',
    'Bachelor 5' : 'BA5',
    'Bachelor 6' : 'BA6',
}

map_section = {
    'Génie mécanique': 'GM',
    'Architecture': 'AR',
    'Chimie et génie chimique': 'CGC',
    'Génie civil': 'GC',
    'Génie électrique et électronique ': 'EL',
    'Informatique': 'IN',
    'Ingénierie des sciences du vivant': 'SV',
    'Mathématiques': 'MA',
    'Microtechnique': 'MT',
    'Physique': 'PH',
    'Science et génie des matériaux': 'MX',
    "Sciences et ingénierie de l'environnement": 'SIE',
    'Systèmes de communication': 'SC'
 }

In [245]:
URL_BA = "https://edu.epfl.ch/studyplan/fr/bachelor/"
URL_ROOT = 'https://edu.epfl.ch/'

page = requests.get(URL_BA)
soup = BeautifulSoup(page.content, "html.parser")
sections = [x.get('href') for x in soup.find('main').find('ul').findAll('a')]
plans_etudes = []
for section in sections:
    page = requests.get(URL_ROOT + section)
    soup = BeautifulSoup(page.content, "html.parser")
    section_name = ' '.join(soup.find('main').find('header').find('h2').text.split(' ')[:-1])
    for cours in soup.find('main').findAll('div', class_="line"):
        if (cours != None):
            code = cours.find('div', class_='cours-info').text.split('/')[0].replace(" ", "")
            if (code != ''):
                for sem in cours.findAll('div', class_='bachlor'):
                    issemester = False
                    for cep in sem.findAll('div', class_='cep'):
                        if (cep.text != '-'):
                            issemester = True
                    if (issemester == True):
                        semester = sem.attrs['data-title']
                if (section_name in map_section):
                    plans_etudes.append({
                        "code": code,
                        "promo": map_semester[semester],
                        "section": map_section[section_name]
                    })

In [250]:
for course in plans_etudes:
    course_db = db.courses.find_one({ 'code' : course['code']})
    if (course_db != None):
        db.plans.update_one({'section': course['section'], 'course': course_db['_id'], 'promo': course['promo']}, {'$set' : {'section': course['section'], 'course': course_db['_id'], 'promo': course['promo']}}, upsert=True)
    else:
        print(course)


{'code': 'AR-301(al)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(ab)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(ac)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(p)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(ad)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(j)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(b)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(k)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(n)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(ae)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(af)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-301(h)', 'promo': 'BA5', 'section': 'AR'}
{'code': 'AR-302(al)', 'promo': 'BA6', 'section': 'AR'}
{'code': 'AR-302(ab)', 'promo': 'BA6', 'section': 'AR'}
{'code': 'AR-302(ad)', 'promo': 'BA6', 'section': 'AR'}
{'code': 'AR-302(j)', 'promo': 'BA6', 'section': 'AR'}
{'code': 'AR-302(ag)', 'promo': 'BA6', 'section': 'AR'}
{'code': 'AR-302(b)', 'promo': 'BA6', 'section': 'AR'}


In [248]:
for course in plans_etudes:
    if (course == None):
        print(course)

In [212]:
cours_semester = db.courses.find({ 'code' : { '$in' : results} })
booking_semester = list(db.booking.find({ 'course': {'$in' : list(map(lambda x: x['_id'], cours_semester))}}))
for schedule in booking_semester:
    course = db.courses.find_one({ '_id': schedule['course'] })
    schedule['course'] = course
    room = db.rooms.find_one({ '_id': schedule['room'] })
    schedule['room'] = room

In [277]:
st = time.time()

section = 'GM'
promo = 'BA6'
courses_ids = list(map(lambda x: x['course'], list(db.plans.find({ 'section': section, 'promo': promo }))))
print('courses_id DB' + (time.time() - st))
courses = list(db.courses.find({ '_id' : { '$in' : courses_ids} }))
print('courses DB' + (time.time() - st))
if (len(courses) == 0):
    print('no courses')

bookings = list(db.booking.find({ 'course': {'$in' : list(map(lambda x: x['_id'], courses))}}))
print('bookings DB' + (time.time() - st))
for schedule in bookings:
    course = db.courses.find_one({ '_id': schedule['course'] })
    schedule['course'] = course
    room = db.rooms.find_one({ '_id': schedule['room'] })
    schedule['room'] = room

days = ['Lu', 'Ma', 'Me', 'Je', 'Ve']
times = range(8, 20)
timetable = dict()
colspan = {'Lu' : 1, 'Ma' : 1, 'Me' : 1, 'Je' : 1, 'Ve' : 1}

print('start timetable' + (time.time() - st))
for i, time in enumerate(times):
    timetable[f'{time}-{time+1}'] = dict()
    for j, day in enumerate(days):
        timetable[f'{time}-{time+1}'][day] = []
        for schedule in bookings:
            if (schedule['day'] == day and schedule['time'] == f'{time}-{time+1}'):
                solved = False
                # List all slots at that time-day
                for i_slot, slot in enumerate(timetable[f'{time}-{time+1}'][day]):
                    # If the same course then add room to list
                    if ('course' in slot and schedule['course']['code'] == slot['course']['code']):
                        timetable[f'{time}-{time+1}'][day][i_slot]['rooms'].append(schedule['room']['name'])
                        solved = True

                # If conflict not solved then append new slot
                if (solved == False):
                    timetable[f'{time}-{time+1}'][day].append({
                            'course' : {
                                'code': schedule['course']['code'],
                                'name': schedule['course']['name']
                            },
                            'time': schedule['time'],
                            'day': schedule['day'],
                            'label': schedule['label'],
                            'duration': schedule['duration'],
                            'rooms': [schedule['room']['name']],
                            'colspan' : 1
                        })

        for k in range(i):
            k_time = f'{times[k]}-{times[k]+1}'
            if (len(timetable[k_time][day]) > 0):
                for slot in timetable[k_time][day]:
                    if ('duration' in slot and slot['duration'] > i - k):
                        timetable[f'{time}-{time+1}'][day].append({'skip': True})
print('end_timetable' + (time.time() - st))
# Find max colspan per day
for time in times:
    for day in days:
        cols = len(timetable[f'{time}-{time+1}'][day])
        if (cols > colspan[day]):
            colspan[day] = cols

# Fill colspan
for time in times:
    for day in days:
        cols = len(timetable[f'{time}-{time+1}'][day])
        if (cols > 0 and cols < colspan[day]):
            max_cols = cols
            if ('skip' not in timetable[f'{time}-{time+1}'][day][0]):
                for k in range(timetable[f'{time}-{time+1}'][day][0]['duration']):
                    if (len(timetable[f'{k+time}-{k+time+1}'][day]) > max_cols):
                        max_cols = len(timetable[f'{k+time}-{k+time+1}'][day])
            timetable[f'{time}-{time+1}'][day][0]['colspan'] = colspan[day] - max_cols + 1
print('END' + (time.time() - st))

CPU times: total: 0 ns
Wall time: 2.32 s


In [267]:
colspan

{'Lu': 3, 'Ma': 1, 'Me': 2, 'Je': 2, 'Ve': 1}

In [273]:
timetable['15-16']

{'Lu': [{'skip': True, 'colspan': 3}],
 'Ma': [{'course': {'code': 'ME-351',
    'name': 'Thermodynamics and energetics II'},
   'time': '15-16',
   'day': 'Ma',
   'label': 'cours',
   'duration': 2,
   'rooms': ['MAA112'],
   'colspan': 1}],
 'Me': [{'skip': True}, {'skip': True}],
 'Je': [{'course': {'code': 'ME-311',
    'name': 'Dynamique des systèmes mécaniques'},
   'time': '15-16',
   'day': 'Je',
   'label': 'projet',
   'duration': 1,
   'rooms': ['CM1105', 'CM1120'],
   'colspan': 1},
  {'skip': True}],
 'Ve': []}