# 1. HTTP Request with Postman
<br>
Querrying IS-Academia for "Informatique, 2007-2008, Bachelor semestre 1" gives the following parameters on Postman :<br>
ww_x_GPS : 71297531<br>
ww_i_reportModel : 133685247<br>
ww_i_reportModelXsl : 133685270<br>
ww_x_UNITE_ACAD : 249847<br>
ww_x_PERIODE_ACAD : 978181<br>
ww_x_PERIODE_PEDAGO : 249108<br>
ww_x_HIVERETE : null<br>


So here are the parameters that we are mostly interesting in :<br>
ww_x_UNITE_ACAD  <- Informatique<br>
ww_x_PERIODE_ACAD  <- 2007 - 2016<br>
ww_x_PERIODE_PEDAGO  <- Bachelor semestre 1 and Bachelor semestre 6<br>

In [16]:
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from bs4 import BeautifulSoup

sns.set_context('notebook')

In [17]:
form_url = "http://isa.epfl.ch/imoniteur_ISAP/!GEDPUBLICREPORTS.filter"
base_url = 'http://isa.epfl.ch/imoniteur_ISAP/!GEDPUBLICREPORTS.html'
get_parameters = {
    'ww_i_reportModel': '133685247',  # Report Model for registered students by section and semester
    'ww_i_reportModelXsl': '133685270',  # HTML output
}
r  = requests.get(form_url, get_parameters)
soup = BeautifulSoup(r.text, 'html.parser')

In [18]:
# Extract the appropriate parameters from the html
academic_unit = {'ww_x_UNITE_ACAD': soup.find('option', string='Informatique')['value']}
print('Academic unit:', academic_unit, '\n')

academic_period_select = soup.find('select', attrs={'name': 'ww_x_PERIODE_ACAD'})
academic_period_dict = {option.string: option['value']
                        for option in academic_period_select
                        if option.string is not None}
print('Academic periods:', academic_period_dict, '\n')

pedag_period_select = soup.find('select', attrs={'name': 'ww_x_PERIODE_PEDAGO'})
searched_pedag_periods = {'Bachelor semestre 1', 'Bachelor semestre 6'}
pedag_period = {option.string: option['value']
                for option in pedag_period_select
                if option.string in searched_pedag_periods}
print('Pedagogic period:', pedag_period)

Academic unit: {'ww_x_UNITE_ACAD': '249847'} 

Academic periods: {'2012-2013': '123456101', '2015-2016': '213638028', '2016-2017': '355925344', '2013-2014': '213637754', '2014-2015': '213637922', '2008-2009': '978187', '2007-2008': '978181', '2010-2011': '39486325', '2011-2012': '123455150', '2009-2010': '978195'} 

Pedagogic period: {'Bachelor semestre 1': '249108', 'Bachelor semestre 6': '942175'}


In [19]:
get_parameters.update(academic_unit)  # Add academic unit to get parameters
get_parameters.update({'ww_x_GPS': '-1'})  # This parameters represents the "Tous" ("All") link returned by the form.

In [20]:
def build_dataframe(pedagogic_period: str) -> pd.DataFrame:
    """This function takes a list of academic periods (eg: ['2007-2008', '2008-2009', ...])
    and a pedagogic period (eg: 'Bachelor semestre 1') and builds a dataframe with all
    concerned students.
    """
    df = pd.DataFrame()
    for i, academic_period in enumerate(sorted(academic_period_dict.keys())):  # 2007 until 2016
        # Request GET parameters
        request_params = {**get_parameters,
                          'ww_x_PERIODE_ACAD': academic_period_dict.get(academic_period),
                          'ww_x_PERIODE_PEDAGO': pedag_period.get(pedagogic_period)}
        r = requests.get(base_url, request_params)
        temp_df = pd.read_html(r.text, header=1, index_col=10)[0]  # User sciper nº as index
        temp_df = temp_df[['Civilité', 'Nom Prénom']]  # Keep relevant columns only
        temp_df[pedagogic_period] = i + 2007  # Annotate the corresponding year for the pedagogic period
        df = pd.concat([df, temp_df])
    return df

# Load all CS students that did their first and last bachelor semesters
starting = build_dataframe('Bachelor semestre 1')
ending = build_dataframe('Bachelor semestre 6')

In [21]:
starting = starting[~starting.index.duplicated(keep='first')]  # Ignore repeated first years
ending = ending[~ending.index.duplicated(keep='last')]  # Keep last 6th semester only

# Merge both dataframes.
students = pd.merge(starting, ending, how='inner')
# The 6th semester is always in spring (year + 1)
students['Bachelor semestre 6'] = students['Bachelor semestre 6'] + 1
students.sample(10)

Unnamed: 0,Civilité,Nom Prénom,Bachelor semestre 1,Bachelor semestre 6
134,Monsieur,Beguet Eric,2010,2013
315,Monsieur,Steinmann Raphaël Benjamin,2012,2016
234,Monsieur,Robert Arnaud,2011,2014
298,Monsieur,Ottet Loïc,2012,2015
307,Monsieur,Resin David,2012,2017
55,Monsieur,Gardiol Loïc,2008,2011
279,Monsieur,Haprian Vlad Ioan,2012,2015
287,Monsieur,Le Bail-Collet Simon Pierre Yvick,2012,2016
90,Monsieur,Bourban Fabien,2009,2013
320,Monsieur,Torche Jérôme William,2012,2015


In [22]:
students['Delta'] = (students['Bachelor semestre 6'] - students['Bachelor semestre 1']) * 12 # months in a year
print('Male (Monsieur):', students[students['Civilité'] == 'Monsieur'].shape[0])
print('Female (Madame):', students[students['Civilité'] == 'Madame'].shape[0])
students.groupby('Civilité')[['Delta']].mean()

Male (Monsieur): 368
Female (Madame): 29


Unnamed: 0_level_0,Delta
Civilité,Unnamed: 1_level_1
Madame,39.724138
Monsieur,41.771739


# 2

In [23]:
#Doing as previous but with master pedagogic periods
#Params of master periods
master_searched_pedag_periods = {'Master semestre 1', 'Master semestre 2', 'Master semestre 3', 'Projet Master automne', 'Projet Master printemps'}
master_pedag_period = {option.string: option['value']
                for option in pedag_period_select
                if option.string in master_searched_pedag_periods}
#print('Master Pedagogic period:', pedag_period)

#Redefining build_dataframe with the new columns that we are interested in
def build_master_dataframe(pedagogic_period: str) -> pd.DataFrame:
    """This function takes a list of academic periods (eg: ['2007-2008', '2008-2009', ...])
    and a pedagogic period (eg: 'Master semestre 1') and builds a dataframe with all
    concerned students.
    """
    df = pd.DataFrame()
    for i, academic_period in enumerate(sorted(academic_period_dict.keys())):  # 2007 until 2016
        # Request GET parameters
        request_params = {**get_parameters,
                          'ww_x_PERIODE_ACAD': academic_period_dict.get(academic_period),
                          'ww_x_PERIODE_PEDAGO': master_pedag_period.get(pedagogic_period)}
        r = requests.get(base_url, request_params)
        temp_df = pd.read_html(r.text, header=1, index_col=10)[0]  # User sciper nº as index
        temp_df = temp_df[['Civilité', 'Nom Prénom', 'Mineur']]  # Keep relevant columns only
        temp_df[pedagogic_period] = i + 2007  # Annotate the corresponding year for the pedagogic period
        df = pd.concat([df, temp_df])
    return df

# Load all CS students that did their first and last bachelor semesters
#ma_1 = build_master_dataframe('Master semestre 1')
#ma_2 = build_master_dataframe('Master semestre 2')
#ma_3 = build_master_dataframe('Master semestre 3')
pdm_1 = build_master_dataframe('Projet Master automne')
#pdm_2 = build_master_dataframe('Projet Master printemps')

In [24]:
pdm_1.head()

Unnamed: 0_level_0,Civilité,Nom Prénom,Mineur,Projet Master automne
No Sciper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
173527,Monsieur,Stewart Conail,,2007
180027,Madame,Agarwal Megha,,2008
159852,Monsieur,Brutsche Florian,,2008
166805,Monsieur,Fleury Marc-Olivier,,2008
172264,Monsieur,Garg Nikhil,,2008


In [None]:
starting = starting[~starting.index.duplicated(keep='first')]  # Ignore repeated first years
ending = ending[~ending.index.duplicated(keep='last')]  # Keep last 6th semester only

# Merge both dataframes.
students = pd.merge(starting, ending, how='inner')
# The 6th semester is always in spring (year + 1)
students['Bachelor semestre 6'] = students['Bachelor semestre 6'] + 1
students.sample(10)