In [1]:
import pandas as pd
import numpy as np


In [2]:
df = pd.read_csv('school_students.csv')
print(f"Loaded {len(df)} student records\n")

Loaded 200 student records



Check ghost students

In [3]:
print("="*60)
print("Check 1 Ghos students")
print("="*60)

ghost = df[
    (df['avg_quarterly_grade'] == 0) &
    (df['final_grade'] == 0 ) &
    (df['days_present'] == 0)
]

print(f"Found {len(ghost)} ghost student")
if len(ghost) > 0:
    print(ghost[['student_id','full_name','grade_level',
    'avg_quarterly_grade','days_present','has_guardian']])





Check 1 Ghos students
Found 4 ghost student
    student_id         full_name  grade_level  avg_quarterly_grade  \
184   STU-0185       Liza Flores            8                  0.0   
187   STU-0188  Carlos Dela Cruz           10                  0.0   
192   STU-0193        Pedro Cruz           10                  0.0   
193   STU-0194       Juan Torres           12                  0.0   

     days_present  has_guardian  
184             0         False  
187             0         False  
192             0         False  
193             0         False  


In [4]:
duplicates = df[df.duplicated(subset=['full_name','birthdate'],keep=False)]
duplicate_sorted = duplicates.sort_values(['full_name','birthdate'])
print(f"Found: {len(duplicates)} potential duplicate rocords")

if len(duplicates) > 0:
    print(duplicate_sorted[['student_id','full_name','birthdate','grade_level']])

Found: 10 potential duplicate rocords
    student_id        full_name   birthdate  grade_level
137   STU-0138  Isabella Garcia  2010-12-26           10
188   STU-0189  Isabella Garcia  2010-12-26           11
34    STU-0035        Juan Cruz  2008-07-26           12
179   STU-0180        Juan Cruz  2008-07-26            8
57    STU-0058      Juan Flores  2009-11-12           11
191   STU-0192      Juan Flores  2009-11-12            9
89    STU-0090    Miguel Rivera  2009-03-06           11
186   STU-0187    Miguel Rivera  2009-03-06            7
63    STU-0064      Rosa Rivera  2008-06-23           12
198   STU-0199      Rosa Rivera  2008-06-23           11


In [5]:
df['grade_jump'] = df['final_grade'] - df['avg_quarterly_grade']


In [6]:
print(df.head(3))

  student_id     full_name   birthdate  age  grade_level section  \
0   STU-0001   Diego Reyes  2013-04-08   12            7       C   
1   STU-0002    Ana Garcia  2009-09-07   16           11       A   
2   STU-0003  Juan Mendoza  2012-06-09   13            8       D   

   avg_quarterly_grade  final_grade  days_present  total_days  \
0                 69.2         65.2           184         200   
1                 86.5         88.5           176         200   
2                 69.7         74.3           171         200   

   attendance_rate  tuition_fee  amount_paid  has_scholarship  has_guardian  \
0             92.0     18042.86     14913.80             True          True   
1             88.0     22715.42     19916.42            False          True   
2             85.5     18577.36     15121.57            False          True   

   grade_jump  
0        -4.0  
1         2.0  
2         4.6  


In [7]:
grade_manip = df[df['grade_jump'] > 20]
print(f'Found:{len(grade_manip)} suspicious grade jumps (>20 points)')
if len(grade_manip) > 0:
    print(grade_manip[['student_id','full_name','avg_quarterly_grade','final_grade','grade_jump']])

Found:2 suspicious grade jumps (>20 points)
    student_id          full_name  avg_quarterly_grade  final_grade  \
181   STU-0182      Carlos Flores                 64.9         92.2   
194   STU-0195  Rafael Villanueva                 56.9         95.9   

     grade_jump  
181        27.3  
194        39.0  


In [8]:
print("=" * 60)
print("Check 5: Financial Anomalies")
print("=" * 60)

Check 5: Financial Anomalies


In [9]:
financial_fraud = df[
    (df['amount_paid'] == 0) &
    (df['has_scholarship'] == False)
]

In [10]:
print(f"Found: {len(financial_fraud)} student paid P with no scholarship")
if len(financial_fraud) > 0:
    print(financial_fraud[['student_id','full_name','tuition_fee',
    'amount_paid','has_scholarship']])

Found: 2 student paid P with no scholarship
    student_id     full_name  tuition_fee  amount_paid  has_scholarship
190   STU-0191    Elena Cruz     31062.01          0.0            False
196   STU-0197  Miguel Ramos     22075.49          0.0            False


In [11]:
print("=" * 100)
print("Check 6: Age Anomalies")
print("=" * 100)

df["expected_age"] = df["grade_level"] + 5
df["age_diff"] = abs(df["age"] - df["expected_age"])
age_fraud = df[df["age_diff"] > 3]
print(f"Found: {len(age_fraud)} student with suspicious age")

if len(age_fraud) > 0:
    print(age_fraud[['student_id','full_name','age','grade_level','expected_age','age_diff']])

Check 6: Age Anomalies
Found: 2 student with suspicious age
    student_id       full_name  age  grade_level  expected_age  age_diff
180   STU-0181  Miguel Mendoza   30           11            16        14
185   STU-0186      Liza Lopez   35            9            14        21


In [13]:
print("=" * 100)
print("check 7: Statistical Outliers (Z-score)")
print("=" * 100)

active = df[df['final_grade'] > 0].copy()
active['grade_zscore'] = (active['final_grade'] - active['final_grade'].mean()) / active['final_grade'].std()
active['tuition_zscore'] = (active['amount_paid'] - active['amount_paid'].mean()) /active['amount_paid'].std()

outliers = active[(active['grade_zscore'].abs() > 2) | (active['tuition_zscore'].abs() > 2)]
print(f"Found: {len(outliers)} statistical outliers (|z| > 2)")
if len(outliers) > 0:
    print(outliers[['student_id','full_name','final_grade','grade_zscore',
                    "amount_paid",'tuition_zscore']].round(2))




check 7: Statistical Outliers (Z-score)
Found: 4 statistical outliers (|z| > 2)
    student_id       full_name  final_grade  grade_zscore  amount_paid  \
4     STU-0005   Rafael Flores         63.7         -1.75     47334.97   
22    STU-0023  Rafael Mendoza         98.7          2.05     32288.07   
190   STU-0191      Elena Cruz         87.6          0.85         0.00   
196   STU-0197    Miguel Ramos         71.7         -0.88         0.00   

     tuition_zscore  
4              2.04  
22             0.45  
190           -2.96  
196           -2.96  
