In [None]:
# Section 13: Visualizations and simple admissions dashboard
import matplotlib.pyplot as plt
import seaborn as sns

# Assume 'final_df' is available in the notebook environment
try:
    fig, ax = plt.subplots(1,2, figsize=(12,4))
    sns.countplot(x='AssignedChoice', data=final_df, ax=ax[0])
    ax[0].set_title('Assigned Choices')
    sns.histplot(final_df['Aggregate'], bins=15, ax=ax[1])
    ax[1].set_title('Aggregate distribution')
    plt.tight_layout()
    plt.show()
except NameError:
    print('Run the processing cells first to create final_df')

In [None]:
# Section 12: Unit tests and edge-case handling
import subprocess
print('Running pytest for package tests...')
res = subprocess.run(['C:/Bug_Bash/25_12_05/v-coralhuang_25_12_05_case1/.venv/Scripts/python.exe', '-m', 'pytest', '-q'], capture_output=True, text=True)
print(res.stdout)
print(res.stderr)

In [None]:
# Section 11: Interactive single-student checker (simple CLI/input version)
from process_admissions import load_data, validate_and_clean, attach_choices_and_evaluate
from pathlib import Path

data_file = Path('admissions_data.xlsx')
if data_file.exists():
    students, choices, programs = load_data(data_file)
    students = validate_and_clean(students)
    final_df = attach_choices_and_evaluate(students, choices, programs)
else:
    print('Run data generation cell to create admissions_data.xlsx first')


def check_student(student_id=None):
    if student_id is None:
        student_id = input('Enter StudentID (e.g., UG20250001): ').strip()
    row = final_df[final_df['StudentID'] == student_id]
    if row.empty:
        print('Student not found')
        return
    row = row.iloc[0]
    print(f"Student: {row['Name']} | Aggregate: {row['Aggregate']}")
    print('AssignedChoice:', row['AssignedChoice'])
    print('SuggestedProgram:', row['SuggestedProgram'])

# Example: check_student('UG20250001')

In [None]:
# Section 10: Export final student sheet to Excel with basic formatting
try:
    out_file = Path('admissions_result.xlsx')
    with pd.ExcelWriter(out_file, engine='openpyxl') as writer:
        final_df.to_excel(writer, sheet_name='final', index=False)
        summary = final_df['AssignedChoice'].value_counts().rename_axis('Choice').reset_index(name='Count')
        summary.to_excel(writer, sheet_name='summary', index=False)
    print('Wrote', out_file)
except NameError:
    print('final_df not found — run previous cells to compute results')

In [None]:
# Section 9: Add result columns (First/Second/Third = 'Yes'/'No' and SuggestedProgram)
# Use the processing module to compute final_df if needed
try:
    final_df
except NameError:
    from process_admissions import load_data, validate_and_clean, attach_choices_and_evaluate
    students, choices, programs = load_data(Path('admissions_data.xlsx'))
    students = validate_and_clean(students)
    final_df = attach_choices_and_evaluate(students, choices, programs)

# Show a sample with new columns
final_df[['StudentID','Name','Aggregate','AssignedChoice','FirstChoice','SecondChoice','ThirdChoice','SuggestedProgram']].head()

In [None]:
# Section 8: Suggest alternative programs that match student aggregate
from admissions import suggest_programs
progs = pd.read_excel('admissions_data.xlsx', sheet_name='programs')

# Example: list suggested programs for students who got 'None'
none_students = final_df[final_df['AssignedChoice']=='None']
if not none_students.empty:
    for idx, r in none_students.head(5).iterrows():
        matches = suggest_programs(r['Aggregate'], progs.to_dict('records'))
        if matches:
            best = matches[-1]
            print(r['StudentID'], r['Aggregate'], '->', best.get('Program') or best.get('name'))
        else:
            print(r['StudentID'], r['Aggregate'], '-> No direct match; consider fee-paying options')
else:
    print('No students without an assignment in the sample')

In [None]:
# Section 7: Qualification logic using if / elif (explicit example)
from admissions import qualify_choice

# Example: explicit if/elif/else for a single student
def decide_for_student(aggregate, choice_cutoffs):
    # choice_cutoffs expected as [first_cutoff, second_cutoff, third_cutoff]
    if aggregate >= choice_cutoffs[0]:
        return 'First'
    elif aggregate >= choice_cutoffs[1]:
        return 'Second'
    elif aggregate >= choice_cutoffs[2]:
        return 'Third'
    else:
        return 'None'

# Vectorized: apply to dataframe sample
sample = final_df.head(5)
sample['Decision'] = sample.apply(lambda r: decide_for_student(r['Aggregate'], [
    progs.loc[progs['Program']==r['Choice1'],'Cutoff'].values[0],
    progs.loc[progs['Program']==r['Choice2'],'Cutoff'].values[0],
    progs.loc[progs['Program']==r['Choice3'],'Cutoff'].values[0]
]), axis=1)
sample[['StudentID','Aggregate','Choice1','Decision']]

In [None]:
# Section 6: Compute WASSCE aggregates / normalize grades if needed
from admissions import compute_aggregate
# If scores are numeric already, aggregate is sum (higher is better)
final_df['RecomputedAggregate'] = final_df[['Eng','Math','Sci','Soc','Rel','Elec']].sum(axis=1)
# Verify match
(final_df['Aggregate'] == final_df['RecomputedAggregate']).all()

In [None]:
# Section 5: Data validation & preprocessing
import re
# Validate IDs
final_df['ID_valid_format'] = final_df['StudentID'].astype(str).str.match(r'^UG2025\d{4}$')
# Flag missing scores
score_cols = ['Eng','Math','Sci','Soc','Rel','Elec']
final_df['MissingScores'] = final_df[score_cols].isnull().any(axis=1)
# Convert program cutoffs to numeric (already numeric in our generated file)
progs['Cutoff'] = pd.to_numeric(progs['Cutoff'], errors='coerce')
final_df[['StudentID','ID_valid_format','MissingScores']].head()

In [None]:
# Section 4: Load prepared Excel files with pandas
from pathlib import Path
file = Path('admissions_data.xlsx')
if not file.exists():
    print('Data file not found — run the data generation cell to create it')
else:
    students_df = pd.read_excel(file, sheet_name='students')
    choices_df = pd.read_excel(file, sheet_name='choices')
    programs_df = pd.read_excel(file, sheet_name='programs')
    print('Students:', students_df.shape)
    display(students_df.head())
    print('\nChoices:', choices_df.shape)
    display(choices_df.head())
    print('\nPrograms:', programs_df.shape)
    display(programs_df.head())

In [None]:
# Section 3: Create choices sheet and programs + cutoffs sheet
from generate_data import generate_students, generate_choices, generate_programs
students = generate_students(n=60)
choices = generate_choices(students)
programs = generate_programs()
# Write to Excel
with pd.ExcelWriter('admissions_data.xlsx', engine='openpyxl') as writer:
    students.to_excel(writer, sheet_name='students', index=False)
    choices.to_excel(writer, sheet_name='choices', index=False)
    programs.to_excel(writer, sheet_name='programs', index=False)
print('Created admissions_data.xlsx with students, choices, programs')
display(students.head())

In [None]:
# Section 2: Generate synthetic student dataset (>=50) and export Excel sheet
from generate_data import generate_students
students = generate_students(n=60, seed=2025)
# Quick checks
assert students['StudentID'].str.match(r'^UG2025\d{4}$').all()
students.head()

In [None]:
# Section 1: Environment setup & imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

np.random.seed(2025)
print('Pandas version:', pd.__version__)
print('NumPy version:', np.__version__)
print('Python executable:', sys.executable)

# Admissions matching notebook

This notebook implements the WASSCE admissions matching algorithm required in the assignment.

- It generates a synthetic dataset of 60 students (IDs as UG2025xxxx).
- It creates choices and program cutoff sheets and saves them to an Excel workbook.
- It computes aggregates, validates data, runs an if/elif-based qualification logic for first/second/third choices, assigns yes/no columns, suggests alternative programs, and outputs a final Excel report with highlights.

Note: Use the cells in order. The qualification logic explicitly uses if / elif / else statements in code cells.