In [1]:
import pandas as pd
import numpy as np
import re

directory_path = "/Users/bao.vo/PycharmProjects/PythonProject/Data"

In [4]:
# Load the necessary data
hs_grades_cleaned = pd.read_csv("/Users/bao.vo/PycharmProjects/PythonProject/Final_DF.csv")
courses_active = pd.read_csv("/Users/bao.vo/PycharmProjects/PythonProject/Data/Courses.csv")
grad_summary_filtered = pd.read_csv("/Users/bao.vo/PycharmProjects/PythonProject/Data/GraduationAreaSummary.csv")

# Clean up the 'Mark' column by converting it to a numeric type
hs_grades_cleaned['Mark'] = pd.to_numeric(hs_grades_cleaned['Mark'], errors='coerce')

  hs_grades_cleaned = pd.read_csv("/Users/bao.vo/PycharmProjects/PythonProject/Final_DF.csv")


In [8]:
grad_summary_filtered = grad_summary_filtered.rename(columns={'mask_studentpersonkey': 'student_id'})

In [7]:
display(grad_summary_filtered.head())

Unnamed: 0,mask_studentpersonkey,CurrentSchoolDetailFCSId,SchoolId,SchoolYearNumberFall,SubjectArea,SubjectAreaCreditRequired,AreaCredits,AreaCreditStillNeeded
0,350335,741,115,2023,Math,4,1.5,2.5
1,1946451,870,80,2022,Health/ PersonalFitness,1,0.5,0.5
2,1987664,880,81,2024,World Language/ FineArts/ CareerTech,3,1.0,2.0
3,1521002,804,143,2024,World Language/ FineArts/ CareerTech,3,1.5,1.5
4,317341,751,74,2021,Social Studies,3,2.5,0.5


In [9]:
def extract_grade_level_from_course(course_name):
    """
    Extracts a numerical grade level (9, 10, 11, 12) from a course name string.
    Uses regular expressions to find common patterns like '9th', 'G9', or roman numerals.
    """
    if not isinstance(course_name, str):
        return 0  # Default for non-string inputs

    # Roman numerals for grades 9-12
    roman_map = {'IX': 9, 'X': 10, 'XI': 11, 'XII': 12, 'I': 9, 'II': 10, 'III': 11}
    for numeral, grade in roman_map.items():
        # Match Roman numerals as whole words to avoid partial matches
        if re.search(r'\b' + numeral + r'\b', course_name, re.IGNORECASE):
            return grade

    # Arabic numerals (e.g., '9th', 'G9', 'Grade 9')
    match = re.search(r'\b(G|Grade\s*)?([9]|10|11|12)\b', course_name, re.IGNORECASE)
    if match:
        return int(match.group(2))

    return 0  # Default if no grade level is found


def assign_difficulty_rank(row):
    """
    Assigns a numerical rank to a course based on its honors description,
    creating a clear hierarchy for recommendations.
    """
    honors_desc = row['HonorsDesc']
    if pd.isna(honors_desc):
        return 1  # Standard

    honors_desc = honors_desc.lower()
    if 'ib' in honors_desc:
        return 4  # International Baccalaureate
    if 'ap' in honors_desc or 'dual' in honors_desc:
        return 3  # Advanced Placement or Dual Enrollment
    if 'honors' in honors_desc or 'hr' in honors_desc:
        return 2  # Honors
    return 1  # Standard/Default


def get_student_profile(student_id, grades_df):
    """
    Creates a profile for a student, including their current grade level
    and average mark, to inform personalized recommendations.
    """
    student_grades = grades_df[grades_df['student_id'] == student_id].copy()
    if student_grades.empty:
        return None

    # Determine current grade level by taking the most recent entry
    student_grades['GradeLevel'] = pd.to_numeric(student_grades['GradeLevel'], errors='coerce')
    current_grade = student_grades.sort_values(by='SchoolYear', ascending=False)['GradeLevel'].iloc[0]

    # Calculate the average mark, excluding missing or zero values
    valid_marks = student_grades[(student_grades['Mark'].notna()) & (student_grades['Mark'] > 0)]
    average_mark = valid_marks['Mark'].mean() if not valid_marks.empty else 75  # Default if no grades

    return {'grade_level': current_grade, 'average_mark': average_mark}


# --- Data Preparation ---

# Enhance the course catalog with grade level and difficulty
courses_active_enhanced = courses_active.copy()
courses_active_enhanced['course_grade_level'] = courses_active_enhanced['coursename'].apply(extract_grade_level_from_course)
courses_active_enhanced['difficulty_rank'] = courses_active_enhanced.apply(assign_difficulty_rank, axis=1)

# Map subject areas to department descriptions
subjectarea_map = {
    "Math": "MATH",
    "Health/ PersonalFitness": "HEALTH EDUCATION",
    "World Language/ FineArts/ CareerTech": "CAREER TECHNICAL AND AGRICULTURAL EDUCATION",
    "Social Studies": "SOCIAL SCIENCES",
    "ELA": "LANGUAGE ARTS",
    "Science": "SCIENCE",
    "Electives": "ELECTIVE COURSES"
}

# --- Recommendation Engine ---

def generate_recommendations(student_id, summary_df, grades_df, courses_df):
    """
    Generates intelligent and personalized course recommendations for a student
    based on their academic profile and credit needs.
    """
    profile = get_student_profile(student_id, grades_df)
    if not profile:
        return pd.DataFrame()

    student_grade = profile['grade_level']
    student_avg_mark = profile['average_mark']

    # Identify credit gaps for the student
    credit_gaps = summary_df[(summary_df['student_id'] == student_id) & (summary_df['AreaCreditStillNeeded'] > 0)]
    if credit_gaps.empty:
        return pd.DataFrame()

    # Determine the minimum difficulty for course recommendations
    min_difficulty = 2 if student_avg_mark >= 90 else 1

    recommendations = []
    for _, gap in credit_gaps.iterrows():
        subject_area = gap['SubjectArea']
        department = subjectarea_map.get(subject_area)
        if not department:
            continue

        # Filter courses by the required department
        possible_courses = courses_df[courses_df['DepartmentDesc'] == department]

        # Get a list of courses the student has already taken
        taken_course_numbers = set(grades_df[grades_df['student_id'] == student_id]['CourseNumber'])

        for _, course in possible_courses.iterrows():
            # --- Filtering Logic ---
            # 1. Exclude courses already taken
            if course['siscourseidentifier'] in taken_course_numbers:
                continue

            # 2. Match grade level: recommend at or above student's level
            course_grade = course['course_grade_level']
            if course_grade != 0 and course_grade < student_grade:
                continue

            # 3. Match difficulty: recommend challenging courses to high achievers
            if course['difficulty_rank'] < min_difficulty:
                continue

            recommendations.append({
                'Student ID': student_id,
                'Current Grade': student_grade,
                'Average Mark': f"{student_avg_mark:.2f}",
                'Needed Subject': subject_area,
                'Credits Needed': gap['AreaCreditStillNeeded'],
                'Recommended Course': course['coursename'],
                'Course Level': course['HonorsDesc'] or 'Standard',
                'Course Grade Target': f"G{course_grade}" if course_grade > 0 else "Any"
            })

    return pd.DataFrame(recommendations)

# --- Example Usage ---

# Select a student to test the recommendation engine
test_student_id = 1988932  # An example student ID

# Generate the improved recommendations
final_recommendations = generate_recommendations(test_student_id, grad_summary_filtered, hs_grades_cleaned, courses_active_enhanced)

# Display the recommendations for the test student
if not final_recommendations.empty:
    print(f"✅ Personalized Recommendations for Student: {test_student_id}")
    display(final_recommendations)
else:
    print(f"ℹ️ No recommendations needed or generated for Student: {test_student_id}")

✅ Personalized Recommendations for Student: 1988932


Unnamed: 0,Student ID,Current Grade,Average Mark,Needed Subject,Credits Needed,Recommended Course,Course Level,Course Grade Target
0,1988932,12,83.33,ELA,0.5,AP Lang/Comp,Advanced Placement,Any
1,1988932,12,83.33,ELA,0.5,TR AP Lang/Comp,,Any
2,1988932,12,83.33,ELA,0.5,AP Lang/Amer Lit,,Any
3,1988932,12,83.33,ELA,0.5,9th Lit/Comp H,Honors,Any
4,1988932,12,83.33,ELA,0.5,9th Lit/Comp H M,Honors,Any
...,...,...,...,...,...,...,...,...
11850,1988932,12,83.33,Math,1.5,College Algebra,,Any
11851,1988932,12,83.33,Math,1.5,History of Math,,Any
11852,1988932,12,83.33,Math,1.5,Algebra: C&C H,,Any
11853,1988932,12,83.33,Math,1.5,Advanced Algebra: C&C H,,Any
