In [None]:
#Manipulação de Dados
import pandas as pd
import numpy as np
#Visualização de Dados
import matplotlib.pyplot as plt
import seaborn as sns

#Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

#Alteração de nomes de arquivos e leitura de diretórios
import os

In [14]:
df = pd.read_csv("data/feedback.csv")

critical_columns = [
    "Nome Completo",
    "Endereço de e-mail",
    "Em qual grupo você está?"
]

lost_data = 0

for i in range(len(critical_columns)):
    lost_data += df[critical_columns[i]].isna().sum()

df_normalized = df.dropna(subset=critical_columns)

df_normalized = df_normalized.reset_index(drop=True)

print(f"Número de dados removidos: {lost_data}")

df_normalized.to_csv("data/feedback_normalized.csv", index=False)
print("Arquivo salvo")

Número de dados removidos: 36
Arquivo salvo


Uses `data/classworks` as the base directory, with subfolders `turma 1`, `turma 2`, and `turma 3` representing different class groups.

In [None]:
base_path = "data/classworks"

subfolders = ["turma 1", "turma 2", "turma 3"]

Checking subfolders for `.xls` files and creating corresponding `.csv` files if they do not already exist.

In [None]:

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    if not os.path.exists(folder_path):
        print(f"Pasta não encontrada: {folder_path}")
        continue

    files_in_folder = os.listdir(folder_path)

    xls_files = [file for file in files_in_folder if file.endswith(".xls")]

    for xls_file in xls_files:
        csv_file = f"{os.path.splitext(xls_file)[0]}.csv"

        if csv_file not in files_in_folder:
            xls_path = os.path.join(folder_path, xls_file)
            csv_path = os.path.join(folder_path, csv_file)

            try:
                df = pd.read_excel(xls_path)
                df.to_csv(csv_path, index=False)
                print(f"Arquivo criado: {csv_path}")
            except Exception as e:
                print(f"Erro ao processar {xls_path}: {e}")
        else:
            print(f"Arquivo já existe: {csv_file}")

Checking for `.xls` files that do not have a corresponding `.csv` file (for verification purposes).

In [None]:

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    if not os.path.exists(folder_path):
        print(f"Pasta não encontrada: {folder_path}")
        continue

    files_in_folder = os.listdir(folder_path)

    xls_files = [file for file in files_in_folder if file.endswith(".xls")]

    xls_without_csv = [
        file for file in xls_files if f"{os.path.splitext(file)[0]}.csv" not in files_in_folder
    ]

    print(f"\nArquivos .xls sem correspondência em .csv na pasta '{subfolder}':")
    if xls_without_csv:
        for file in xls_without_csv:
            print(file)
    else:
        print("Nenhum arquivo encontrado.")

Processes `.csv` and `.xls(x)` files in each class folder by skipping the first two rows and overwriting the originals.

In [None]:

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    if not os.path.exists(folder_path):
        print(f"Pasta não encontrada: {folder_path}")
        continue

    files_in_folder = os.listdir(folder_path)

    # Filtrar arquivos com extensão .csv e .xls
    csv_files = [file for file in files_in_folder if file.endswith(".csv")]
    xls_files = [file for file in files_in_folder if file.endswith((".xls", ".xlsx"))]

    for csv_file in csv_files:
        csv_path = os.path.join(folder_path, csv_file)
        try:

            df = pd.read_csv(csv_path, skiprows=2)

            df.to_csv(csv_path, index=False)
            print(f"Processado: {csv_path}")
        except Exception as e:
            print(f"Erro ao processar {csv_path}: {e}")
 
    for xls_file in xls_files:
        xls_path = os.path.join(folder_path, xls_file)
        try:

            df = pd.read_excel(xls_path, skiprows=2)
            # Sobrescrever o arquivo original
            df.to_excel(xls_path, index=False)
            print(f"Processado: {xls_path}")
        except Exception as e:
            print(f"Erro ao processar {xls_path}: {e}")

Extracts metadata and student grades from `.csv` files, restructures and cleans the data, then saves it as a corrected file.

In [None]:

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    if not os.path.exists(folder_path):
        print(f"Pasta não encontrada: {folder_path}")
        continue

    files_in_folder = os.listdir(folder_path)

    csv_files = [file for file in files_in_folder if file.endswith(".csv")]

    for csv_file in csv_files:
        csv_path = os.path.join(folder_path, csv_file)
        try:

            df_raw = pd.read_csv(csv_path, header=None)

            titulo = df_raw.iloc[0, 1]  
            pontuacao = df_raw.iloc[0, 4]  
            grupo = df_raw.iloc[2, 1]  
            data = df_raw.iloc[2, 3] if not pd.isna(df_raw.iloc[2, 3]) else None  

            alunos_notas = df_raw.iloc[5:, [0, 4]] 
            alunos_notas.columns = ["ALUNO", "NOTA"]  

            alunos_notas["NOTA"] = pd.to_numeric(alunos_notas["NOTA"], errors="coerce")

            # Padronizar os nomes dos alunos (capitalizar)
            alunos_notas["ALUNO"] = alunos_notas["ALUNO"].str.title()

            alunos_notas["TÍTULO"] = titulo
            alunos_notas["PONTUAÇÃO"] = pontuacao
            alunos_notas["GRUPO"] = grupo
            alunos_notas["DATA"] = data

            final_df = alunos_notas[["TÍTULO", "PONTUAÇÃO", "GRUPO", "DATA", "ALUNO", "NOTA"]].reset_index(drop=True)

            output_path = os.path.join(folder_path, f"{os.path.splitext(csv_file)[0]}-Corrigido.csv")
            final_df.to_csv(output_path, index=False)
            print(f"Arquivo processado e salvo: {output_path}")

        except Exception as e:
            print(f"Erro ao processar {csv_path}: {e}")

Renames `-Corrigido.csv` files by replacing `'Atvdd'` with `'Atividade'` in their filenames across all class folders. 

In [None]:

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    if not os.path.exists(folder_path):
        print(f"Pasta não encontrada: {folder_path}")
        continue

    files_in_folder = os.listdir(folder_path)

    corrigido_files = [file for file in files_in_folder if file.endswith("-Corrigido.csv")]

    for corrigido_file in corrigido_files:
        old_path = os.path.join(folder_path, corrigido_file)
        # Substituir 'Atvdd' por 'Atividade' no nome do arquivo
        new_file_name = corrigido_file.replace("Atvdd", "Atividade")
        new_path = os.path.join(folder_path, new_file_name)

        try:

            os.rename(old_path, new_path)
            print(f"Renomeado: {old_path} -> {new_path}")
        except Exception as e:
            print(f"Erro ao renomear {old_path}: {e}")

Renames files starting with `Atividade` and ending with `-Corrigido.csv` by removing the `-Corrigido` suffix.  

In [None]:

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    if not os.path.exists(folder_path):
        print(f"Pasta não encontrada: {folder_path}")
        continue

    files_in_folder = os.listdir(folder_path)

    # Filtrar arquivos que começam com "Atividade" e terminam com "-Corrigido.csv"
    corrigido_files = [
        file for file in files_in_folder if file.startswith("Atividade") and file.endswith("-Corrigido.csv")
    ]

    for corrigido_file in corrigido_files:
        old_path = os.path.join(folder_path, corrigido_file)
        new_file_name = corrigido_file.replace("-Corrigido", "")
        new_path = os.path.join(folder_path, new_file_name)

        try:

            os.rename(old_path, new_path)
            print(f"Renomeado: {old_path} -> {new_path}")
        except Exception as e:
            print(f"Erro ao renomear {old_path}: {e}")