In [8]:
import pandas as pd
import altair as alt

In [9]:
df = pd.read_csv("student_cleaned.csv")

numeric_cols = [
    "Admission grade",
    "Previous qualification (grade)",
    "Curricular units 1st sem (enrolled)",
    "Curricular units 1st sem (evaluations)",
    "Curricular units 1st sem (approved)",
    "Curricular units 1st sem (grade)"
]
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [10]:
color_scale = alt.Scale(domain=['Graduate', 'Dropout', 'Enrolled'],
                        range=["#2ca02c", "#d62728", "#ff7f0e"])



In [None]:
points = alt.Chart(df).mark_circle(size=35, opacity=0.7).encode(
    x=alt.X("Curricular units 1st sem (evaluations):Q",
            title="Courses Evaluated in 1st Semester",
            font='Georgia',
            scale=alt.Scale(domain=(-0.2, df["Curricular units 1st sem (evaluations)"].max() + 0.5))),
    y=alt.Y("Curricular units 1st sem (approved):Q",
            title="Courses Passed in 1st Semester",
            scale=alt.Scale(domain=(-0.2, df["Curricular units 1st sem (approved)"].max() + 0.5))),
    color=alt.Color("Target:N", scale=color_scale, title="Student Outcome"),
    tooltip=[
        alt.Tooltip("Target:N", title="Outcome"),
        alt.Tooltip("Curricular units 1st sem (evaluations):Q", title="Evaluations"),
        alt.Tooltip("Curricular units 1st sem (approved):Q", title="Approved"),
        alt.Tooltip("Admission grade:Q", title="Admission Grade"),
        alt.Tooltip("Debtor:N", title="Debtor"),
        alt.Tooltip("Tuition fees up to date:N", title="Tuition"),
        alt.Tooltip("Scholarship holder:N", title="Scholarship")
    ]
)

In [12]:
# adding in the trend lines
lines = alt.Chart(df).mark_line(size=3).encode(
    x="Curricular units 1st sem (evaluations):Q",
    y=alt.Y("mean(Curricular units 1st sem (approved)):Q"),
    color=alt.Color("Target:N", scale=color_scale),
    detail="Target:N"
)

In [13]:
#combining lines and points charts
chart = (points + lines).properties(
    width=650,
    height=450,
    title={
        "text": "1st Semester Performance: Evaluations vs Approved Units by Target",
        "subtitle": "Each dot = a student; lines show average trend per outcome"
    }
).configure_title(fontSize=16, font='Georgia', anchor='start').interactive()

In [14]:
chart.save("scatter_outcomes.html")