In [16]:
%pip install selenium bs4 tqdm sqlalchemy tabula-py psycopg2-binary lxml python-dotenv

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [17]:
import os
import requests
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from urllib.parse import urlparse
from bs4 import BeautifulSoup 
from datetime import datetime
import uuid

# Create the "tables" folder if it doesn't exist
tables_folder_path = "./tables"
if not os.path.exists(tables_folder_path):
    os.makedirs(tables_folder_path)


# Create the "matrizes" folder if it doesn't exist
pdfs_folder_path = "./courses_pdfs"
if not os.path.exists(pdfs_folder_path):
    os.makedirs(pdfs_folder_path)


In [21]:
from sqlalchemy import create_engine, inspect
from dotenv import load_dotenv
import os

load_dotenv()

# Create a connection to the PostgreSQL database
conn_str = os.getenv('DB_URL')
print(conn_str)
engine = create_engine(conn_str)

# create inspector
inspector = inspect(engine)

# get table names
unwanted = {'Session', 'Account', 'User', 'VerificationToken'}
table_names = [e for e in inspector.get_table_names() if e not in unwanted]
print(table_names)

# initialize an empty dictionary to hold the data
existing_data = {}

# iterate over all table names
for table in table_names:
    # read the data from the table and save it to the dictionary
    existing_data[table] = pd.read_sql(f'SELECT * FROM {table}', engine)


postgresql://postgres:12345678@localhost:5432/gradeufop_db
['prerequisite', 'discipline_class', 'discipline_class_schedule', 'course', 'discipline_course', 'department', 'discipline']


In [22]:
import re
import unicodedata

def store_df(df, table):
    # Store the DataFrame in database
    df.to_sql(table, engine, index=False, if_exists='append')
    
    # Append the DataFrame to its CSV file
    df.to_csv(
        f"{tables_folder_path}/{table}.csv",
        mode="a",
        header=not os.path.exists(f"{tables_folder_path}/{table}.csv"),
        index=False,
    )


def format_course_name(text):
    # Remove accent marks
    text = ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')
    
    # Replace symbols with a hyphen
    text = re.sub(r'[^a-zA-Z0-9]+', '-', text)
    
    # Remove leading and trailing hyphens
    text = text.strip('-')
    
    # Convert to lowercase
    text = text.lower()
    
    return text

### Busca cursos e salva os .pdfs na pasta /matrizes

In [23]:
from tqdm import tqdm

# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")

# Set up Chrome driver service
chromedriver_path = (
    "./chromedriver"  # Replace with the path to your chromedriver executable
)
service = Service()

# Set up Chrome driver
driver = webdriver.Chrome(service=service, options=chrome_options)

# Navigate to the URL
url = "https://www.escolha.ufop.br/cursos"
driver.get(url)


# Find elements with class "ufop-glossary-row"
elements = driver.find_elements(By.CLASS_NAME, "ufop-glossary-row")

# Extract the href links from child anchor 'a' tags
links = []
courses_dict = {"id": [], "code": [], "name": []}

print("Buscando a lista de cursos da UFOP...")
with tqdm(total=len(elements), desc="Progresso", ascii=True) as pbar:
    for element in elements:
        link_element = element.find_element(By.TAG_NAME, "a")
        href = link_element.get_attribute("href")
        links.append(href)

        courses_dict["id"].append(str(uuid.uuid4()))
        courses_dict["code"].append(format_course_name(link_element.text))
        courses_dict["name"].append(link_element.text)
        pbar.set_postfix(curso=f"{link_element.text}")
        pbar.update(1)

indexes_to_delete = []
course_pdfs = []

# Navigate to each link and download PDF files
print("\nBuscando os links .pdf ...")
with tqdm(total=len(links), desc="Progresso", ascii=True) as pbar:
    for i, link in enumerate(links):
        driver.get(link)

        matriz_elements = driver.find_elements(
            By.CLASS_NAME, "field-name-field-matriz-curricular"
        )

        for element in matriz_elements:
            link_elements = element.find_elements(By.TAG_NAME, "a")

            if len(link_elements) == 1:
                href = link_elements[0].get_attribute("href")
                course_pdfs.append({"course": courses_dict["code"][i], "link": href})
                continue

            for link_element in link_elements:
                href = link_element.get_attribute("href")
                course_type = link_element.text
                course_name = f"{courses_dict['name'][i]} ({course_type})"
                course_code = format_course_name(course_name)
                courses_dict["id"].append(str(uuid.uuid4()))
                courses_dict["name"].append(course_name)
                courses_dict["code"].append(course_code)

                course_pdfs.append({"course": course_code, "link": href})
            indexes_to_delete.append(i)

        pbar.set_postfix(curso=f"{courses_dict['name'][i]}")
        pbar.update(1)

courses_dict["id"] = [
    item for i, item in enumerate(courses_dict["id"]) if i not in indexes_to_delete
]
courses_dict["code"] = [
    item for i, item in enumerate(courses_dict["code"]) if i not in indexes_to_delete
]
courses_dict["name"] = [
    item for i, item in enumerate(courses_dict["name"]) if i not in indexes_to_delete
]

valid_pdf_substrings = [".pdf", "codCurso="]
for pdf in course_pdfs:
    import os

print("\nSalvando os arquivos .pdf dos cursos")
with tqdm(total=len(course_pdfs), desc="Progresso", ascii=True) as pbar:

    for pdf in course_pdfs:
        if any(text in pdf["link"] for text in valid_pdf_substrings):
            response = requests.get(pdf["link"])
            parsed_url = urlparse(pdf["link"])
            filename = f"{pdf['course']}.pdf"
            file_path = os.path.join(pdfs_folder_path, filename)

            # Check if the file already exists in the folder
            if not os.path.exists(file_path):
                with open(file_path, "wb") as file:
                    file.write(response.content)
        
        pbar.set_postfix(salvando=f"{pdf['course']}.pdf")
        pbar.update(1)
        
# Quit the driver
driver.quit()


Buscando a lista de cursos da UFOP...


Progresso: 100%|##########| 44/44 [00:00<00:00, 55.80it/s, curso=Turismo]                           



Buscando os links .pdf ...


Progresso: 100%|##########| 44/44 [00:45<00:00,  1.04s/it, curso=Turismo]                           



Salvando os arquivos .pdf dos cursos


Progresso: 100%|##########| 52/52 [00:21<00:00,  2.40it/s, salvando=turismo.pdf]                           


### Salva os cursos encontrados no banco de dados

In [24]:
course_df = pd.DataFrame(courses_dict).sort_values(by='code', ascending=True)
course_df['created_at'] = datetime.now()

existing_course_codes = []
# Check if 'course' key exists in existing_data
if 'course' in existing_data:
    existing_course_codes = set(existing_data['course']['code'])

course_df = course_df[~course_df['code'].isin(existing_course_codes)]

store_df(course_df, 'course')
print("Cursos salvos com sucesso!")


Cursos salvos com sucesso!


In [25]:
from datetime import datetime

URL = "https://zeppelin10.ufop.br/HorarioAulas/"

desired_departments = ["DECSI", "DECEA", "DEELT", "DEENP", "DEETE"]
semester = "23.2"


def get_HTML_content(URL, department):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    driver.get(URL)
    elem = driver.find_element(By.XPATH, "//*[text()='{}']".format(department))
    elem.click()
    URL = driver.current_url
    html_source = driver.page_source
    soup = BeautifulSoup(html_source, "lxml")
    driver.quit()
    return soup


def parse_schedule_string(schedule_string):
    entries = []
    
    if schedule_string == '':
        return entries
    
    schedule_parts = schedule_string.split(" / ")
    
    if len(schedule_parts) == 0:
        schedule_parts.append(schedule_string)

    for part in schedule_parts:
        day, time_info = part.split(" ")
        start_time, end_time = time_info.split("-")
        class_type = end_time[-2]  # T for theoretical, P for practical
        end_time = end_time[:-3]  # Remove the class type from end_time

        entry = {
            "day_of_week": day,
            "start_time":  datetime.strptime(start_time, "%H:%M").time(),
            "end_time": datetime.strptime(end_time, "%H:%M").time(),
            "class_type": class_type,
        }
        entries.append(entry)

    return entries


def get_field_list(html_content, field):
    field_list = []
    table = html_content.find("table", {"id": "formPrincipal:tabela"})
    if table:
        tbody = table.find("tbody")
        tr_elements = tbody.find_all("tr")

        for i, tr in enumerate(tr_elements):
            if field == "descricao":
                span = tr.find(
                    "span", {"id": "formPrincipal:tabela:{}:{}".format(i, "disciplina")}
                )
                title = span.find_parent("a").get(
                    "title"
                )  # Extract the 'title' attribute of the parent <a> tag
                field_list.append(title)
                continue

            span = tr.find(
                "span", {"id": "formPrincipal:tabela:{}:{}".format(i, field)}
            )
            field_list.append(span.text)

    return field_list


def get_departments():
    r = requests.get(URL)
    departments_list = []
    soup = BeautifulSoup(r.text, "html.parser")  # Use 'html.parser' as the parser

    # Find the table with the specified id
    table = soup.find("table", {"id": "formPrincipal:tabela"})
    if table:
        tbody = table.find("tbody")

        # Find all <tr> elements within <tbody>
        tr_elements = tbody.find_all("tr")

        for i, tr in enumerate(tr_elements):
            tableCode = tr.find(
                "span", {"id": "formPrincipal:tabela:{}:codigoDepartamento".format(i)}
            )
            tableName = tr.find(
                "span", {"id": "formPrincipal:tabela:{}:descricao".format(i)}
            )

            
            departments_list.append(
                {"id": uuid.uuid4(), "code": tableCode.text.strip(), "name": tableName.text.strip()}
            )
            

        departments_df = pd.DataFrame(departments_list)
        departments_df["created_at"] = datetime.now()
        return departments_df, departments_list


def get_discipline_tables(departments_list):
    discipline_dict = {"id": [], "code": [], "name": [], "description": [], "department_id": []}
    class_dict = {
        "id": [],
        "class_number": [],
        "discipline_id": [],
        "professor": [],
    }
    schedule_dict = {
        "id": [],
        "discipline_class_id": [],
        "day_of_week": [],
        "start_time": [],
        "end_time": [],
        "class_type": [],
    }

    discipline_code_to_id = {}  # To store unique IDs for each discipline code
    
    print("\nBuscando disciplinas")
    with tqdm(total=len(departments_list), desc="Progresso", ascii=True) as pbar:
        for department in departments_list:
            if department['code'] not in desired_departments:
                continue

            html_content = get_HTML_content(URL, department['code'])
            columns_list = [
                "codigo",
                "disciplina",
                "descricao",
                "turma",
                "horario",
                "professores",
            ]
            columns_dict_list = {column_name: get_field_list(html_content, column_name) for column_name in columns_list}

            for i in range(len(columns_dict_list["codigo"])):
                code = columns_dict_list["codigo"][i]
                if code not in discipline_code_to_id:
                    discipline_id = str(uuid.uuid4())
                    discipline_code_to_id[code] = discipline_id
                    discipline_dict["id"].append(discipline_id)
                    discipline_dict["code"].append(code)
                    discipline_dict["name"].append(columns_dict_list["disciplina"][i])
                    discipline_dict["description"].append(columns_dict_list["descricao"][i])
                    discipline_dict["department_id"].append(department['id'])

                discipline_class_id = str(uuid.uuid4())
                class_dict["id"].append(discipline_class_id)
                class_dict["class_number"].append(columns_dict_list["turma"][i])
                class_dict["discipline_id"].append(discipline_code_to_id[code])
                class_dict["professor"].append(columns_dict_list["professores"][i])

                schedule_entries = parse_schedule_string(columns_dict_list["horario"][i])
                for entry in schedule_entries:
                    schedule_dict["id"].append(str(uuid.uuid4()))
                    schedule_dict["discipline_class_id"].append(discipline_class_id)
                    schedule_dict["day_of_week"].append(entry["day_of_week"])
                    schedule_dict["start_time"].append(entry["start_time"])
                    schedule_dict["end_time"].append(entry["end_time"])
                    schedule_dict["class_type"].append(entry["class_type"])
            
            pbar.set_postfix(info=f"Salvando disciplinas de {department['code']}...")
            pbar.update(1)
            
    discipline_df = pd.DataFrame(discipline_dict)
    class_df = pd.DataFrame(class_dict)
    schedule_df = pd.DataFrame(schedule_dict)

    currentTime = datetime.now()

    discipline_df["created_at"] = currentTime
    class_df["created_at"] = currentTime
    class_df["semester"] = semester
    schedule_df["created_at"] = currentTime
    return discipline_df, class_df, schedule_df


department_df, departments_list = get_departments()
existing_department_codes = []
# Check if 'department' key exists in existing_data
if 'department' in existing_data:
    existing_department_codes = set(existing_data['department']['code'])
department_df = department_df[~department_df['code'].isin(existing_department_codes)]
store_df(department_df, 'department')
print("Departamentos salvos com sucesso!")

(
    discipline_df,
    discipline_class_df,
    discipline_class_schedule_df,
) = get_discipline_tables(departments_list)

existing_discipline_codes = []
# Check if 'discipline' key exists in existing_data
if 'discipline' in existing_data:
    existing_discipline_codes = set(existing_data['discipline']['code'])
discipline_df = discipline_df[~discipline_df['code'].isin(existing_discipline_codes)]
store_df(discipline_df, 'discipline')

remaining_discipline_ids = set(discipline_df['id'])
discipline_class_df = discipline_class_df[discipline_class_df['discipline_id'].isin(remaining_discipline_ids)]
store_df(discipline_class_df, 'discipline_class')

remaining_discipline_class_ids = set(discipline_class_df['id'])
discipline_class_schedule_df = discipline_class_schedule_df[discipline_class_schedule_df['discipline_class_id'].isin(remaining_discipline_class_ids)]
store_df(discipline_class_schedule_df, 'discipline_class_schedule')

print("\nDados armazenados com sucesso!")


Departamentos salvos com sucesso!

Buscando disciplinas


Progresso:  11%|#         | 5/46 [00:06<00:49,  1.20s/it, info=Salvando disciplinas de DEETE...]


Dados armazenados com sucesso!





In [26]:
import tabula
import pandas as pd
import uuid
import re

code_pattern = r"[A-Z]{3}\d{3}"
subject_pattern = r"\b[A-Z]+\b"
classes_pattern = r"^(T P|T|P)$"
prerequisite_pattern = r"[A-Z]{3}\d{3}|\d+\s+horas"
chs_che_pattern = r"^\d+\/\d+$"

discipline_course_dict = {"id": [], "discipline_id": [], "course_id": [], "period": [], "mandatory": [], "created_at": []}
prerequisite_dict = {"id": [], "discipline_course_id": [], "prerequisite_discipline_id": [], "created_at": []}
empty_discipline_course_df = pd.DataFrame(data=discipline_course_dict)
empty_prerequisite_df = pd.DataFrame(data=prerequisite_dict)

def get_col_idx(df_value, pattern):
    indexes = []
    for i, item in enumerate(df_value):
        if i == 0:
            continue
        match = re.search(pattern, str(item), re.UNICODE)
        if match is not None:
            indexes.append(i)

    return indexes


def get_prerequisites(df_value):
    prerequisites = []
    prereq_idx = get_col_idx(df_value, prerequisite_pattern)
    if len(prereq_idx) > 0:
        prerequisites = [df_value[i] for i in prereq_idx]

    return format_prerequisites(prerequisites)


def get_discipline(df_value):
    subject_idx = get_col_idx(df_value, subject_pattern)
    if len(subject_idx) > 0:
        return re.sub(code_pattern, "", df_value[subject_idx[0]])
    return ""


def get_chs_che(df_value):
    chs_che_idx = get_col_idx(df_value, chs_che_pattern)

    if len(chs_che_idx) > 0:
        chs_che_list = df_value[chs_che_idx[0]].split("/")
        return tuple(map(int, chs_che_list))

    return ("", "")


def get_classes(df_value, classes_idx):
    classes = []
    if len(classes_idx) > 0:
        classes = [df_value[i] for i in classes_idx]
        return " ".join(classes)

    return ""

def get_period(df_value, mandatory):
    if not mandatory or not df_value[0]:
        return ""

    for item in reversed(df_value):
        if item:
            return int(item)
    return ""


def get_cha(df_value, chs):
    if not chs:
        return ""

    chs_alt = chs * 1.2

    if len(df_value) == 0:
        return ""

    for text in df_value:
        try:
            formatted_text = int(text)
            if formatted_text == chs or formatted_text == chs_alt:
                return formatted_text
        except ValueError:
            continue

    return ""


def format_prerequisites(df_value):
    if len(df_value) == 0:
        return ""

    requisites = []
    for text in df_value:
        if not text:
            continue

        matches = re.findall(code_pattern, text)
        requisites.extend(matches)

    joined_matches = " ".join(requisites)
    return joined_matches


def get_prerequisite_df(discipline_course_df):
    prerequisite_dict = {"id": [], "discipline_course_id": [], "prerequisite_discipline_id": []}
    discipline_course_dict = discipline_course_df.to_dict('records')

    for discipline_course in discipline_course_dict:   
        prerequisites = discipline_course['prerequisites'].split()
         # create a dictionary where the keys are the codes and the values are the ids
        id_map = discipline_df.set_index('code')['id'].to_dict()

        # use the dictionary to map the codes to ids
        discipline_ids = [id_map[code] for code in prerequisites if code in id_map]
        if len(discipline_ids) > 0:
            for prerequisite in discipline_ids:
                prerequisite_id = uuid.uuid4()
                prerequisite_dict['id'].append(prerequisite_id)
                prerequisite_dict['discipline_course_id'].append(discipline_course['id'])
                prerequisite_dict['prerequisite_discipline_id'].append(prerequisite)
   

    prerequisite_df = pd.DataFrame(prerequisite_dict)
    return prerequisite_df


def get_discipline_course_tables(df, course_id, mandatory):
    df = df.replace({r"\r": " "}, regex=True)
    df_struct = {"id": [], "discipline_id": [], "period": [], "prerequisites": []}
    discipline_course_df = pd.DataFrame(data=df_struct)

    iterIdx = -1

    for idx, value in enumerate(df.values):
        if idx == 0:
            iterIdx = -1
            discipline_course_df.at[0, "id"] = ""
            discipline_course_df.at[0, "discipline_id"] = ""
            discipline_course_df.at[0, "period"] = ""
            discipline_course_df.at[0, "prerequisites"] = ""
            continue

        discipline_ids = discipline_df[discipline_df['code'] == value[0]]['id']
        discipline_course_id = ''
        discipline_id = ''

        if(len(discipline_ids.values) > 0):
            discipline_id = discipline_ids.values[0]
        
        if value[0]:
            discipline_course_id = str(uuid.uuid4())

        discipline_course_df.at[idx, "id"] = discipline_course_id
        discipline_course_df.at[idx, "discipline_id"] = discipline_id
        discipline_course_df.at[idx, "period"] = get_period(value, mandatory)
        discipline_course_df.at[idx, "prerequisites"] = get_prerequisites(value)

        if value[0] != "":
            iterIdx = -1
            continue

        if iterIdx == -1:
            iterIdx = idx - 1

        discipline_course_df.loc[iterIdx, :] = [
            f"{item1} {item2}".strip()
            for item1, item2 in zip(
                discipline_course_df.values[iterIdx], discipline_course_df.values[idx]
            )
        ]

    discipline_course_df.drop(
        discipline_course_df[discipline_course_df["discipline_id"] == ""].index,
        inplace=True,
    )
    discipline_course_df["mandatory"] = mandatory
    discipline_course_df["course_id"] = course_id
    discipline_course_df['period'] = discipline_course_df['period'].replace("", 0)

    prerequisite_df = pd.DataFrame()
    if not discipline_course_df.empty:
        prerequisite_df = get_prerequisite_df(discipline_course_df)

    discipline_course_df.drop("prerequisites", axis=1, inplace=True)
    return discipline_course_df, prerequisite_df


def scrape_table_from_pdf(pdf_path):
    
    # Read the table from the PDF file
    df_list = tabula.read_pdf(pdf_path, pages="all")

    course_code = os.path.splitext(os.path.basename(pdf_path))[0]
    course_ids = course_df[course_df['code'] == course_code]['id']

    
    if(len(course_ids.values) == 0):
        
        return empty_discipline_course_df, empty_prerequisite_df

    course_id = course_ids.values[0]
    discipline_course_dfs = []
    prerequisite_dfs = []

    for df in df_list:
        df.fillna("", inplace=True)
        header = df.columns.to_list()
       
        # Filter and select the desired columns based on the header
        if "DISCIPLINAS OBRIGATÓRIAS" in header:
            discipline_course_df, prerequisite_df = get_discipline_course_tables(
                df, course_id, True
            )
            discipline_course_dfs.append(discipline_course_df)
            prerequisite_dfs.append(prerequisite_df)

        elif (
            "DISCIPLINAS ELETIVAS" in header
            or "DISCIPLINAS ELETIVAS PRÉ-REQUISITO" in header
        ):
            discipline_course_df, prerequisite_df = get_discipline_course_tables(
                df, course_id, False
            )
            discipline_course_dfs.append(discipline_course_df)
            prerequisite_dfs.append(prerequisite_df)
        else:
            continue

    combined_discipline_course_df = pd.concat(discipline_course_dfs, ignore_index=True)
    combined_prerequisite_df = pd.concat(prerequisite_dfs, ignore_index=True)

    discipline_course_df = combined_discipline_course_df[
        combined_discipline_course_df["discipline_id"].isin(discipline_df["id"])
    ]

    prerequisite_df = combined_prerequisite_df
    if not combined_prerequisite_df.empty:
        prerequisite_df = combined_prerequisite_df[
            combined_prerequisite_df["discipline_course_id"].isin(
                discipline_course_df["id"]
            )
        ]
    discipline_course_df["created_at"] = datetime.now()
    prerequisite_df["created_at"] = datetime.now()

    return discipline_course_df, prerequisite_df


files = os.listdir(pdfs_folder_path)
pdf_files = [
    os.path.join(pdfs_folder_path, file) for file in files if file.endswith(".pdf")
]

for pdf_file in [
    "./courses_pdfs/engenharia-de-computacao.pdf",
    "./courses_pdfs/engenharia-de-producao-jm.pdf",
    "./courses_pdfs/sistemas-de-informacao.pdf",
    "./courses_pdfs/engenharia-eletrica.pdf",
]:
    print(f"Buscando disciplinas de {pdf_file}")
    discipline_course_df, prerequisite_df = scrape_table_from_pdf(pdf_file)
    
    discipline_course_df = discipline_course_df[discipline_course_df['discipline_id'].isin(remaining_discipline_ids)]
    store_df(discipline_course_df, 'discipline_course')
    print("Disciplinas do curso armazenadas com sucesso!")

    remaining_discipline_course_ids = set(discipline_course_df['id'])
    prerequisite_df = prerequisite_df[prerequisite_df['discipline_course_id'].isin(remaining_discipline_course_ids)]
    store_df(prerequisite_df, 'prerequisite')
    print("Prerequisitos armazenados com sucesso!\n")



Buscando disciplinas de ./courses_pdfs/engenharia-de-computacao.pdf


Got stderr: May 05, 2024 4:53:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:15 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:16 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:17 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:17 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:17 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>

  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplac

Disciplinas do curso armazenadas com sucesso!
Prerequisitos armazenados com sucesso!

Buscando disciplinas de ./courses_pdfs/engenharia-de-producao-jm.pdf


Got stderr: May 05, 2024 4:53:18 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:18 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:18 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:18 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:19 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:19 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:19 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>

  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplac

Disciplinas do curso armazenadas com sucesso!
Prerequisitos armazenados com sucesso!

Buscando disciplinas de ./courses_pdfs/sistemas-de-informacao.pdf


Got stderr: May 05, 2024 4:53:20 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:20 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:20 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:20 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>

  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  disciplin

Disciplinas do curso armazenadas com sucesso!
Prerequisitos armazenados com sucesso!

Buscando disciplinas de ./courses_pdfs/engenharia-eletrica.pdf


Got stderr: May 05, 2024 4:53:21 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:21 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 05, 2024 4:53:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>
May 05, 2024 4:53:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>



Disciplinas do curso armazenadas com sucesso!
Prerequisitos armazenados com sucesso!



  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
  discipline_course_df.at[0, "id"] = ""
  discipline_course_df.at[0, "discipline_id"] = ""
  discipline_course_df.at[0, "period"] = ""
  discipline_course_df.at[0, "prerequisites"] = ""
  df.fillna("", inplace=True)
