In [1]:
import pandas as pd
import numpy as np

# Make pandas output cleaner
pd.set_option("display.max_columns", None)

lasu_df = pd.read_csv("../../Data/LASU.csv")
print("Shape:", lasu_df.shape)
lasu_df.head()
lasu_df.info()
lasu_df.describe()

print("Missing values:\n", lasu_df.isnull().sum())
print("Duplicates:", lasu_df.duplicated().sum())

Shape: (10000, 7)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   faculty          10000 non-null  object 
 1   department       10000 non-null  object 
 2   utme_score       10000 non-null  int64  
 3   screening_score  10000 non-null  float64
 4   olevel_passed    10000 non-null  bool   
 5   admitted         10000 non-null  object 
 6   olevel_grades    10000 non-null  object 
dtypes: bool(1), float64(1), int64(1), object(4)
memory usage: 478.6+ KB
Missing values:
 faculty            0
department         0
utme_score         0
screening_score    0
olevel_passed      0
admitted           0
olevel_grades      0
dtype: int64
Duplicates: 0


In [3]:
# Convert Admitted/Rejected to numeric
#lasu_df['admitted_numeric'] = lasu_df['admitted'].map({"Admitted": 1, "Rejected": 0})
#lasu_df[['admitted', 'admitted_numeric']].head()

lasu_df['admitted_numeric'] = lasu_df['admitted'].map({
    "admitted": 1,
    "not admitted": 0
})

print(lasu_df['admitted_numeric'].value_counts(dropna=False))

# Convert True/False to 1/0
lasu_df['olevel_passed'] = lasu_df['olevel_passed'].astype(int)
lasu_df[['olevel_passed']].head()

# Example: compute average O'level points
def olevel_avg(grades_str):
    scale = {'A1': 10, 'B2': 9, 'B3': 8, 'C4': 7, 'C5': 6, 'C6': 5}
    grades = grades_str.split(", ")
    return np.mean([scale[g] for g in grades])

lasu_df['olevel_avg_points'] = lasu_df['olevel_grades'].apply(olevel_avg)
lasu_df[['olevel_grades', 'olevel_avg_points']].head()

processed_df = lasu_df[[
    "faculty", "department", "utme_score", "screening_score",
    "olevel_passed", "olevel_avg_points", "admitted_numeric"
]]
processed_df.head()

# Cell 9: Save Processed Dataset
import os

# Ensure Data folder exists one level up
os.makedirs("../Data", exist_ok=True)
processed_path = "../../Data/LASU_processed.csv"
processed_df.to_csv(processed_path, index=False)
print(f"Processed dataset saved to {processed_path}")


admitted_numeric
0    5064
1    4936
Name: count, dtype: int64
Processed dataset saved to ../../Data/LASU_processed.csv
