# 1. HTTP Request with Postman
<br>
Querrying IS-Academia for "Informatique, 2007-2008, Bachelor semestre 1" gives the following parameters on Postman :<br>
ww_x_GPS : 71297531<br>
ww_i_reportModel : 133685247<br>
ww_i_reportModelXsl : 133685270<br>
ww_x_UNITE_ACAD : 249847<br>
ww_x_PERIODE_ACAD : 978181<br>
ww_x_PERIODE_PEDAGO : 249108<br>
ww_x_HIVERETE : null<br>


So here are the parameters that we are mostly interesting in :<br>
ww_x_UNITE_ACAD  <- Informatique<br>
ww_x_PERIODE_ACAD  <- 2007 - 2016<br>
ww_x_PERIODE_PEDAGO  <- Bachelor semestre 1 and Bachelor semestre 6<br>

In [1]:
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from bs4 import BeautifulSoup

sns.set_context('notebook')

In [2]:
form_url = "http://isa.epfl.ch/imoniteur_ISAP/!GEDPUBLICREPORTS.filter"
base_url = 'http://isa.epfl.ch/imoniteur_ISAP/!GEDPUBLICREPORTS.html'
get_parameters = {
    'ww_i_reportModel': '133685247',  # Report Model for registered students by section and semester
    'ww_i_reportModelXsl': '133685270',  # HTML output
}
r  = requests.get(form_url, get_parameters)
soup = BeautifulSoup(r.text, 'html.parser')

In [3]:
# Extract the appropriate parameters from the html
academic_unit = {'ww_x_UNITE_ACAD': soup.find('option', string='Informatique')['value']}
print('Academic unit:', academic_unit, '\n')

academic_period_select = soup.find('select', attrs={'name': 'ww_x_PERIODE_ACAD'})
academic_period_dict = {option.string: option['value']
                        for option in academic_period_select
                        if option.string is not None}
print('Academic periods:', academic_period_dict, '\n')

pedag_period_select = soup.find('select', attrs={'name': 'ww_x_PERIODE_PEDAGO'})
searched_pedag_periods = {'Bachelor semestre 1', 'Bachelor semestre 6'}
pedag_period = {option.string: option['value']
                for option in pedag_period_select
                if option.string in searched_pedag_periods}
print('Pedagogic period:', pedag_period)

Academic unit: {'ww_x_UNITE_ACAD': '249847'} 

Academic periods: {'2015-2016': '213638028', '2013-2014': '213637754', '2012-2013': '123456101', '2009-2010': '978195', '2016-2017': '355925344', '2008-2009': '978187', '2010-2011': '39486325', '2011-2012': '123455150', '2007-2008': '978181', '2014-2015': '213637922'} 

Pedagogic period: {'Bachelor semestre 1': '249108', 'Bachelor semestre 6': '942175'}


In [4]:
get_parameters.update(academic_unit)  # Add academic unit to get parameters
get_parameters.update({'ww_x_GPS': '-1'})  # This parameters represents the "Tous" ("All") link returned by the form.

In [5]:
def build_dataframe(pedagogic_period: str) -> pd.DataFrame:
    """This function takes a list of academic periods (eg: ['2007-2008', '2008-2009', ...])
    and a pedagogic period (eg: 'Bachelor semestre 1') and builds a dataframe with all
    concerned students.
    """
    df = pd.DataFrame()
    for i, academic_period in enumerate(sorted(academic_period_dict.keys())):  # 2007 until 2016
        # Request GET parameters
        request_params = {**get_parameters,
                          'ww_x_PERIODE_ACAD': academic_period_dict.get(academic_period),
                          'ww_x_PERIODE_PEDAGO': pedag_period.get(pedagogic_period)}
        r = requests.get(base_url, request_params)
        temp_df = pd.read_html(r.text, header=1, index_col=10)[0]  # User sciper nº as index
        temp_df = temp_df[['Civilité', 'Nom Prénom']]  # Keep relevant columns only
        temp_df[pedagogic_period] = i + 2007  # Annotate the corresponding year for the pedagogic period
        df = pd.concat([df, temp_df])
    return df

# Load all CS students that did their first and last bachelor semesters
starting = build_dataframe('Bachelor semestre 1')
ending = build_dataframe('Bachelor semestre 6')

In [6]:
starting = starting[~starting.index.duplicated(keep='first')]  # Ignore repeated first years
ending = ending[~ending.index.duplicated(keep='last')]  # Keep last 6th semester only

# Merge both dataframes.
students = pd.merge(starting, ending, how='inner')
# The 6th semester is always in spring (year + 1)
students['Bachelor semestre 6'] = students['Bachelor semestre 6'] + 1
students.sample(10)

Unnamed: 0,Civilité,Nom Prénom,Bachelor semestre 1,Bachelor semestre 6
141,Monsieur,Coppex Gilles,2010,2013
346,Monsieur,Dunant Raphaël Paul,2013,2016
395,Monsieur,Vaucher Timoté Pascal,2014,2017
313,Madame,Sbai Marion Fadoi,2012,2015
305,Monsieur,Raemy Baptiste,2012,2015
387,Monsieur,Lamonato Yves,2014,2017
340,Monsieur,Collaud Jonathan,2013,2016
375,Monsieur,Zbinden Boris Adrien,2013,2016
241,Monsieur,Séguy Louis Marie James,2011,2015
41,Monsieur,Arnfred Jonas,2008,2011


In [7]:
students['Delta'] = (students['Bachelor semestre 6'] - students['Bachelor semestre 1']) * 12 # months in a year
print('Male (Monsieur):', students[students['Civilité'] == 'Monsieur'].shape[0])
print('Female (Madame):', students[students['Civilité'] == 'Madame'].shape[0])
students.groupby('Civilité')[['Delta']].mean()

Male (Monsieur): 368
Female (Madame): 29


Unnamed: 0_level_0,Delta
Civilité,Unnamed: 1_level_1
Madame,39.724138
Monsieur,41.771739


# 2

We consider that every entry corresponding to a semester corresponds to six months spent at EPFL. Therefore, for each master student, we compute the total number of semesters spent at EPFL, then multiply it to have the duration of the stay in months.

In [None]:
#Doing as previous but with master pedagogic periods
#Params of master periods
master_searched_pedag_periods = {'Master semestre 1', 'Master semestre 2', 'Master semestre 3', 'Projet Master automne', 'Projet Master printemps'}
master_pedag_period = {option.string: option['value']
                for option in pedag_period_select
                if option.string in master_searched_pedag_periods}
#print('Master Pedagogic period:', pedag_period)

#Redefining build_dataframe with the new columns that we are interested in
def build_master_dataframe(pedagogic_period: str) -> pd.DataFrame:
    """This function takes a list of academic periods (eg: ['2007-2008', '2008-2009', ...])
    and a pedagogic period (eg: 'Master semestre 1') and builds a dataframe with all
    concerned students.
    """
    df = pd.DataFrame()
    for i, academic_period in enumerate(sorted(academic_period_dict.keys())):  # 2007 until 2016
        # Request GET parameters
        request_params = {**get_parameters,
                          'ww_x_PERIODE_ACAD': academic_period_dict.get(academic_period),
                          'ww_x_PERIODE_PEDAGO': master_pedag_period.get(pedagogic_period)}
        r = requests.get(base_url, request_params)
        if('Civilité' in r.text): #check if there is a header i.e. any entries
            temp_df = pd.read_html(r.text, header=1, index_col=10)[0]  # User sciper nº as index
            temp_df = temp_df[['Civilité', 'Nom Prénom', 'Mineur']]  # Keep relevant columns only
            temp_df[pedagogic_period] = i + 2007  # Annotate the corresponding year for the pedagogic period
            df = pd.concat([df, temp_df])
    return df

# Load all CS students that did their first and last bachelor semesters
ma_1 = build_master_dataframe('Master semestre 1')
ma_2 = build_master_dataframe('Master semestre 2')
ma_3 = build_master_dataframe('Master semestre 3')
pdm_1 = build_master_dataframe('Projet Master automne')
#pdm_2 = build_master_dataframe('Projet Master printemps')

In [None]:
ma_1.head()

Here, we count the number of entries for each student in each semester dataframe, then combine and sum to find the total number of semsters per student.

In [43]:
# Merge dataframes.

ma_1_count = ma_1.groupby('Nom Prénom').count()['Master semestre 1']
ma_2_count = ma_2.groupby('Nom Prénom').count()['Master semestre 2']
ma_3_count = ma_3.groupby('Nom Prénom').count()['Master semestre 3']
pdm_1_count = pdm_1.groupby('Nom Prénom').count()['Projet Master automne']
pdm_1_count = pdm_1.groupby('Nom Prénom').count()['Projet Master printemps']

#Concatenate data from all semesters
students = pd.concat([ma_1_count, ma_2_count, ma_3_count, pdm_1_count], axis = 1)

#Sum all columns to find the total number of semesters per student
students_sem_count = students.sum(axis = 1)

students_sem_count.head()

Abbadi Hajar        3.0
Abelenda Diego      4.0
Abi Akar Nora       3.0
Adamsons Valdis     1.0
Adams Marc Alain    1.0
dtype: float64

We then average over all students to find the average number of semesters, and multiply by 6 to find the average number of months spent at EPFL for master students.

In [41]:
students_sem_count.mean()*6

17.20777891504606