In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid", palette="muted", font_scale=1.1)

Importando base de dados

In [None]:
df_students = pd.read_csv("student-db/student-final.csv")
display(df_students)

Tabela de medidas de tendência central e disperção

In [None]:
def descritive_measures(df:pd.DataFrame):    
    numeric_cols = ["failures_mat", "failures_por", "absences_mat", "absences_por",  "G1_mat", "G1_por", "G2_mat", "G2_por", "G3_mat", "G3_por"]
    numeric_df = df[numeric_cols]

    # Medidas de tendência central
    description = pd.DataFrame({
        "Média": numeric_df.mean(),
        "Mediana": numeric_df.median(),
        "Moda": numeric_df.mode().iloc[0],
        "Variância": numeric_df.var(),
        "Desvio Padrão": numeric_df.std(),
        "Amplitude": numeric_df.max() - numeric_df.min()
    })

    return description

description = descritive_measures(df_students)
print("Medidas Descritivas")
display(description)


Histogramas

In [None]:
def plot_histogram(df:pd.DataFrame, col:str):
    plt.figure(figsize=(6,4))
    sns.histplot(df[col], kde=True, bins=15)
    plt.title(f"Histograma de {col}")
    plt.xlabel(col)
    plt.ylabel("Frequência")
    plt.show()

plot_histogram(df_students, "absences_mat")
plot_histogram(df_students, "absences_por")
plot_histogram(df_students, "G1_por")
plot_histogram(df_students, "G2_por")
plot_histogram(df_students, "G3_por")
plot_histogram(df_students, "G1_mat")
plot_histogram(df_students, "G2_mat")
plot_histogram(df_students, "G3_mat")

Boxplots de variáveis categóricas por nota

In [None]:
def plot_boxplot_all_exams(df: pd.DataFrame, group_col: str):
    grade_cols = ["G1_mat", "G2_mat", "G3_mat", "G1_por", "G2_por", "G3_por"]
    df_melted = df.melt(id_vars=[group_col], value_vars=grade_cols, var_name="Exam", value_name="Grade")

    plt.figure(figsize=(10,6))
    sns.boxplot(x="Exam", y="Grade", hue=group_col, data=df_melted)
    plt.title(f"Grades x {group_col}")
    plt.legend(title=group_col, loc="upper center", bbox_to_anchor=(0.5, -0.12), ncol=3)
    plt.show()

plot_boxplot_all_exams(df_students, "sex")
plot_boxplot_all_exams(df_students, "studytime")
plot_boxplot_all_exams(df_students, "reason")
plot_boxplot_all_exams(df_students, "higher")
plot_boxplot_all_exams(df_students, "paid_mat")
plot_boxplot_all_exams(df_students, "paid_por")

Gráficos de dispersão entre notas

In [None]:
def plot_scatterplot_simple(df:pd.DataFrame, x:str, y:str):
    plt.figure(figsize=(5,5))
    sns.scatterplot(x=x, y=y, data=df)
    plt.title(f"{y} x {x}")
    plt.show()

plot_scatterplot_simple(df_students, "G1_mat", "G2_mat")
plot_scatterplot_simple(df_students, "G1_mat", "G3_mat")
plot_scatterplot_simple(df_students, "G2_mat", "G3_mat")
plot_scatterplot_simple(df_students, "G1_por", "G2_por")
plot_scatterplot_simple(df_students, "G1_por", "G3_por")
plot_scatterplot_simple(df_students, "G2_por", "G3_por")
plot_scatterplot_simple(df_students, "G1_mat", "G1_por")
plot_scatterplot_simple(df_students, "G2_mat", "G2_por")
plot_scatterplot_simple(df_students, "G3_mat", "G3_por")

Gráficos de dispersão de notas por faltas

In [None]:
def plot_scatterplot_all_math(df: pd.DataFrame, x: str):
    grade_cols = ["G1_mat", "G2_mat", "G3_mat"]
    df_melted = df.melt(id_vars=[x], value_vars=grade_cols, var_name="Exam", value_name="Grade")

    plt.figure(figsize=(7,5))
    sns.scatterplot(x=x, y="Grade", hue="Exam", data=df_melted, palette="Set1")
    plt.title(f"Math grade x {x}")
    plt.legend(title="Exam")
    plt.show()

def plot_scatterplot_all_portuguese(df: pd.DataFrame, x: str):
    grade_cols = ["G1_por", "G2_por", "G3_por"]
    df_melted = df.melt(id_vars=[x], value_vars=grade_cols, var_name="Exam", value_name="Grade")

    plt.figure(figsize=(7,5))
    sns.scatterplot(x=x, y="Grade", hue="Exam", data=df_melted, palette="Set1")
    plt.title(f"Portuguese grade x {x}")
    plt.legend(title="Exam")
    plt.show()

plot_scatterplot_all_math(df_students, "absences_mat")
plot_scatterplot_all_portuguese(df_students, "absences_por")
plot_scatterplot_all_math(df_students, "failures_mat")
plot_scatterplot_all_portuguese(df_students, "failures_por")
