In [1]:
import pandas as pd
import numpy as np
import os

# Make pandas output cleaner
pd.set_option("display.max_columns", None)

# Load dataset
unilorin_df = pd.read_csv("../../Data/UNILORIN.csv")
print("Shape:", unilorin_df.shape)

# Quick look
print("\nPreview:")
print(unilorin_df.head())

print("\nInfo:")
unilorin_df.info()

print("\nDescription:")
print(unilorin_df.describe())

# Check missing values and duplicates
print("\nMissing values:\n", unilorin_df.isnull().sum())
print("Duplicates:", unilorin_df.duplicated().sum())

# Encode Admission Status: admitted → 1, not admitted → 0
unilorin_df["admitted_numeric"] = unilorin_df["Admission_Status"].map({
    "admitted": 1,
    "not admitted": 0
})
print("\nAdmission numeric counts:")
print(unilorin_df["admitted_numeric"].value_counts(dropna=False))

# Convert O'Level validity (True/False) to 1/0
unilorin_df["Olevel_Valid"] = unilorin_df["Olevel_Valid"].astype(int)
print("\nOlevel_Valid sample:")
print(unilorin_df[["Olevel_Valid"]].head())

# Function to compute average O’level points using UNILORIN’s scale (A1=4.0, C6=2.0)
def olevel_avg(grades_str):
    scale = {
        "A1": 4.0, "B2": 3.6, "B3": 3.2,
        "C4": 2.8, "C5": 2.4, "C6": 2.0
    }
    grades = grades_str.split(", ")
    return np.mean([scale[g] for g in grades])

# Apply the function
unilorin_df["olevel_avg_points"] = unilorin_df["Olevel_Grades"].apply(olevel_avg)
print("\nOlevel average sample:")
print(unilorin_df[["Olevel_Grades", "olevel_avg_points"]].head())

# Keep only relevant processed columns
processed_df = unilorin_df[[
    "Faculty", "Department", "UTME_Score", "Post_UTME_Score",
    "Aggregate_Score", "Olevel_Valid", "olevel_avg_points", "admitted_numeric"
]]

print("\nProcessed dataset preview:")
print(processed_df.head())

# Save processed dataset
os.makedirs("../../Data", exist_ok=True)
processed_path = "../../Data/UNILORIN_processed.csv"
processed_df.to_csv(processed_path, index=False)

print(f"\n✅ Processed dataset saved to {processed_path}")


Shape: (10000, 9)

Preview:
                  Faculty                   Department  UTME_Score  \
0     Management Sciences                      Finance         226   
1  Environmental Sciences  Urban and Regional Planning         224   
2       Clinical Sciences         Medicine and Surgery         218   
3     Veterinary Medicine          Veterinary Medicine         322   
4     Veterinary Medicine          Veterinary Medicine         296   

   Post_UTME_Score  Aggregate_Score  Olevel_Valid  Sittings Admission_Status  \
0            27.08            69.33          True         1         admitted   
1             6.76            50.76          True         1         admitted   
2             7.56            49.61          True         2     not admitted   
3             8.82            65.07          True         2         admitted   
4             6.93            55.53          True         1         admitted   

        Olevel_Grades  
0  C6, C4, C4, A1, C5  
1  C5, C5, A1, A1, B3 