### **Installing dependencies**
This will install the following dependencies:
- requests
- openpyxl
- pandas

In [16]:
pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


### **Importing dependencies**

In [17]:
from openpyxl import Workbook
from datetime import datetime
import os
import requests
import pandas as pd

### **Declaring API links**
These variables has the links of the API which will be used to extract the data. Here's the explanation for each one:
- **institutions_uri**: returns all institutions which may have vacancies.
- **institution_offers_uri**: returns all course offering of each institution found previouly.
- **institution_offers_selected_uri**: returns all acceptances for each offer

In [18]:
institutions_uri = 'https://sisu-api.sisu.mec.gov.br/api/v1/oferta/instituicoes'
institution_offers_uri = 'https://sisu-api.sisu.mec.gov.br/api/v1/oferta/instituicao/{co_ies}'
institution_offer_selected_uri = 'https://sisu-api.sisu.mec.gov.br/api/v1/oferta/{co_oferta}/selecionados'

### **Requesting data**

By using the previous variables, each function right below will request data, convert/process if necessary and finally return.

In [19]:
def get_institutions_data():
  response = requests.get(institutions_uri);
  return response.json()

def get_institution_offers_data(institution):
  response = requests.get(institution_offers_uri.replace("{co_ies}", institution["co_ies"]))
  return dict(list(response.json().items())[1:])

def get_institution_offer_selected_data(institution_offer):
  response = requests.get(institution_offer_selected_uri.replace("{co_oferta}", institution_offer["co_oferta"]))
  return response.json()

### **Running**

This function will extract the data, process, concat and finally save into a csv file. It is important to remember that sisu occurs every year and the api information can be renewed or removed.

In [29]:
def run():
  wb = Workbook()
  ws = wb.active
  data = []
  
  institutions_data = get_institutions_data()
  for i, institution in enumerate(institutions_data, start=1):
    print(f"Processing institution {i}/{len(institutions_data)}")
    
    institution_offers_data = get_institution_offers_data(institution)
    for j, (_, institution_offer) in enumerate(institution_offers_data.items(), start=1):
      print(f"   Processing offer {j}/{len(institution_offers_data)}")
      
      institution_offer_selected_data = get_institution_offer_selected_data(institution_offer)
      for institution_offer_selected in institution_offer_selected_data:
        if not data:
          data.append(list(institution.keys()) + list(institution_offer.keys()) + list(institution_offer_selected.keys()))
        
        data.append(list(institution.values()) + list(institution_offer.values()) + list(institution_offer_selected.values()))

  current_datetime=datetime.now()

  for row in data:
    ws.append(row)
  wb.save(f"out/SISUv{current_datetime.year}.csv")

run()

Processing institution 1/127
  Processing offer 1/24
  Processing offer 2/24
  Processing offer 3/24
  Processing offer 4/24
  Processing offer 5/24
  Processing offer 6/24
  Processing offer 7/24
  Processing offer 8/24
  Processing offer 9/24
  Processing offer 10/24
  Processing offer 11/24
  Processing offer 12/24
  Processing offer 13/24
  Processing offer 14/24
  Processing offer 15/24
  Processing offer 16/24
  Processing offer 17/24
  Processing offer 18/24
  Processing offer 19/24
  Processing offer 20/24
  Processing offer 21/24
  Processing offer 22/24
  Processing offer 23/24
  Processing offer 24/24
Processing institution 2/127
  Processing offer 1/31
  Processing offer 2/31
  Processing offer 3/31
  Processing offer 4/31
  Processing offer 5/31
  Processing offer 6/31
  Processing offer 7/31
  Processing offer 8/31
  Processing offer 9/31
  Processing offer 10/31
  Processing offer 11/31
  Processing offer 12/31
  Processing offer 13/31
  Processing offer 14/31
  Processi

KeyboardInterrupt: 