In [1]:
import pandas as pd
import re

# ---------------- Load Raw Excel Data ----------------
file_path = "student_results.xlsx"
df = pd.read_excel(file_path, header=None)

# ---------------- Clean Data ----------------
# Drop completely empty rows
df = df.dropna(how="all")

# Find the last filled row and slice DataFrame accordingly
last_row = df.last_valid_index()
df = df.loc[:last_row]

# ---------------- Combine Multi-line Records ----------------
# Each student has 2 lines; combine them into a single line per student
records = []
for i in range(0, len(df), 2):
    line1 = str(df.iloc[i, 0])
    line2 = str(df.iloc[i+1, 0]) if i+1 < len(df) else ""
    combined = line1 + " " + line2
    records.append(combined)

# ---------------- Parse Combined Records into Structured Data ----------------
data = []
for rec in records:
    parts = rec.split(" ", 1)
    roll = parts[0]  # first token = roll number
    subjects = parts[1].split(",") if len(parts) > 1 else []

    subj_results = {}
    for sub in subjects:
        match = re.match(r"(\w+)\((.*?)\)", sub.strip())
        if match:
            code, grade = match.groups()
            subj_results[code] = grade
    subj_results["Roll No"] = roll
    data.append(subj_results)

# ---------------- Create Clean DataFrame ----------------
df_clean = pd.DataFrame(data).set_index("Roll No")

# ---------------- Pass/Fail Helper Function ----------------
def is_pass(grade):
    grade = str(grade).strip()  # normalize
    if grade in ["F", "FE"]:     # fail conditions
        return 0
    if grade in ["Absent"] or "Withheld" in grade:  # exclude from percentage
        return None
    return 1  # everything else = pass

# ---------------- Student-wise Summary ----------------
summary_cols = []
for idx, row in df_clean.iterrows():
    total = 0
    passed = 0
    failed = 0
    withheld_absent = 0
    for grade in row.dropna():
        status = is_pass(grade)
        if status is None:
            withheld_absent += 1
            continue
        total += 1
        if status == 1:
            passed += 1
        else:
            failed += 1
    pass_percent = round((passed / (total + failed)) * 100, 2) if (total + failed) > 0 else None
    summary_cols.append([total + failed + withheld_absent, passed, failed, withheld_absent, pass_percent])

df_clean["Total Subjects"] = [x[0] for x in summary_cols]
df_clean["Passed Subjects"] = [x[1] for x in summary_cols]
df_clean["Failed Subjects"] = [x[2] for x in summary_cols]
df_clean["Withheld/Absent"] = [x[3] for x in summary_cols]
df_clean["Pass %"] = [x[4] for x in summary_cols]

# ---------------- Subject-wise Pass Percentage ----------------
result_stats = {}
for col in df_clean.columns[:-5]:  # skip summary columns
    passes = 0
    total = 0
    for grade in df_clean[col].dropna():
        status = is_pass(grade)
        if status is None:
            continue
        total += 1
        if status == 1:
            passes += 1
    if total > 0:
        result_stats[col] = round((passes / total) * 100, 2)
    else:
        result_stats[col] = None

df_subjectwise = pd.DataFrame.from_dict(result_stats, orient="index", columns=["Pass %"])
df_subjectwise = df_subjectwise.sort_values("Pass %", ascending=False)

# ---------------- Save Outputs ----------------
df_clean.to_excel("parsed_student_results.xlsx")   # detailed + summary
df_subjectwise.to_excel("subjectwise_pass_percentage.xlsx")

# Student-wise summary in a separate file
df_summary = df_clean.reset_index()
df_summary.to_excel("student_summary.xlsx", index=False)

print("✅ Project completed!")
print("Student-wise results saved in 'parsed_student_results.xlsx'")
print("Subject-wise pass percentage saved in 'subjectwise_pass_percentage.xlsx'")
print("Student summary saved in 'student_summary.xlsx'")


✅ Project completed!
Student-wise results saved in 'parsed_student_results.xlsx'
Subject-wise pass percentage saved in 'subjectwise_pass_percentage.xlsx'
Student summary saved in 'student_summary.xlsx'
