In [54]:
import pandas as pd
import numpy as np
import random
from collections import defaultdict
import math

class Student:
    def __init__(self, student_id, level_of_understanding, preferences, warning_status, cgpa, current_semester):
        self.student_id = student_id
        self.level_of_understanding = level_of_understanding
        self.preferences = preferences
        self.warning_status = warning_status
        self.cgpa = cgpa
        self.current_semester = current_semester

class Course:
    def __init__(self, course_id, name, category, chain_courses, grade, semester_offer, credit_hours, repeat_status):
        self.course_id = course_id
        self.name = name
        self.category = category
        self.chain_courses = chain_courses
        self.grade = grade
        self.semester_offer = semester_offer
        self.credit_hours = credit_hours
        self.repeat_status = repeat_status

class Transcript:
    def __init__(self, student_id, courses_taken):
        self.student_id = student_id
        self.courses_taken = courses_taken  # List of Course objects

def load_data():
    student_df = pd.read_csv('dataset/Students.csv')
    course_df = pd.read_csv('dataset/courses.csv')
    return student_df, course_df

def categorize_students(student):
    if student.cgpa >= 1.85:
        return 'near_2.0'
    elif student.cgpa >= 1.5:
        return 'near_1.5'
    else:
        return 'less_than_1.5'

def classify_student_type(transcript):
    grades = [course.grade for course in transcript.courses_taken]
    if sum(grades) / len(grades) > 2.5:
        return 'good'
    elif sum(grades) / len(grades) > 1.5:
        return 'average'
    else:
        return 'below_average'

def get_courses_taken(student_id, transcripts):
    for transcript in transcripts:
        if transcript.student_id == student_id:
            return transcript.courses_taken
    return []

def knowledge_based_filtering(student, available_courses, transcripts):
    recommendations = []
    taken_courses = get_courses_taken(student.student_id, transcripts)
    taken_course_ids = [course.course_id for course in taken_courses]

    for course in available_courses:
        # if course.semester_offer > student.current_semester + 1:
        #     continue  # Ensure we recommend courses only up to the next semester

        score = 0
        if course.category in student.level_of_understanding:
            score += 10
        if course.name in student.preferences:
            score += 5
        # check if course is taken and if yes then check if its score is less than others so that it can be repeated
        if course.course_id in taken_course_ids:
            for taken_course in taken_courses:
                if taken_course.course_id == course.course_id and taken_course.grade < 2.0 and taken_course.credit_hours > 1:
                    score += 30
        
        # check if course is not taken then its prerequisites should be met and if met then add the score        
        prerequisites_met = all(prerequisite in taken_course_ids for prerequisite in course.chain_courses)
        if prerequisites_met and course.course_id not in taken_course_ids:
            score += 15
            # for taken_course in taken_courses:
            #     if taken_course.course_id in course.chain_courses:
            #         # print(grade_to_score(taken_course.grade), 'grade')
            #         score += taken_course.grade
        
        # Consider historical course performance trends
        # historical_performance_score = calculate_historical_performance(course.course_id)
        # score += historical_performance_score
        
        recommendations.append((course, score))
    
    recommendations.sort(key=lambda x: x[1], reverse=True)
    return [(course,score) for course, score in recommendations]

def collaborative_filtering(student, all_students, transcripts):
    similar_students = find_similar_students(student, all_students, transcripts)
    course_scores = defaultdict(int)
    
    for similar_student, _ in similar_students:
        for course in get_courses_taken(similar_student.student_id, transcripts):
            course_scores[course] += course.grade
    
    sorted_courses = sorted(course_scores.items(), key=lambda x: x[1], reverse=True)
    return [course for course, _ in sorted_courses[:5]]

def find_similar_students(target_student, all_students, transcripts):
    similar_students = []
    
    for student in all_students:
        if student.student_id == target_student.student_id:
            continue

        target_transcript = get_courses_taken(target_student.student_id, transcripts)
        student_transcript = get_courses_taken(student.student_id, transcripts)

        similarity_score = calculate_similarity(target_transcript, student_transcript)
        similar_students.append((student, similarity_score))
    
    similar_students.sort(key=lambda x: x[1], reverse=True)
    return similar_students

def calculate_similarity(transcript1, transcript2):
    def filter_transcript(transcript):
        # Create a dictionary to store the highest repeat status or grade for each course
        course_dict = {}
        for course in transcript:
            if course and (course.course_id not in course_dict or course.repeat_status > course_dict[course.course_id].repeat_status):
                if not math.isnan(course.grade):
                    course_dict[course.course_id] = course
        # convert the dictionary to a list of courses
        course_dict = list(course_dict.values())
        return course_dict

    # Filter both transcripts
    filtered_transcript1 = filter_transcript(transcript1)
    filtered_transcript2 = filter_transcript(transcript2)

    print(len(filtered_transcript1), len(filtered_transcript2 ), 'lens')


    print(filtered_transcript1, filtered_transcript2, 'filtered')
    common_courses = set([course.course_id for course in filtered_transcript1]).intersection(
        [course.course_id for course in filtered_transcript2])
    
    print(len(common_courses), 'common')
    if not common_courses:
        return 0

    grades1 = [course.grade for course in filtered_transcript1 if course.course_id in common_courses]
    grades2 = [course.grade for course in filtered_transcript2 if course.course_id in common_courses]

    print(len(grades1), len(grades2), 'grades')

    mean_grade1 = sum(grades1) / len(grades1)
    mean_grade2 = sum(grades2) / len(grades2)

    numerator = sum((grades1[i] - mean_grade1) * (grades2[i] - mean_grade2) for i in range(len(grades1)))
    denominator1 = sum((grades1[i] - mean_grade1) ** 2 for i in range(len(grades1)))
    denominator2 = sum((grades2[i] - mean_grade2) ** 2 for i in range(len(grades2)))

    if denominator1 == 0 or denominator2 == 0:
        return 0
    
    return numerator / ((denominator1 * denominator2) ** 0.5)

def grade_to_score(grade):
    grade_conversion = {'A+': 4.0, 'A': 4.0, 'A-': 3.67,  'B+': 3.33, 'B': 3.0, 'B-': 2.67, 
                        'C+': 2.33, 'C': 2.0, 'C-': 1.67, 'D+': 1.33,  'D': 1.0, 'F': 0.0}
    return grade_conversion.get(grade, 0.0)


def generate_final_recommendations(student, all_students, transcripts, available_courses):
    # Knowledge-based filtering
    kn_recommendations = knowledge_based_filtering(student, available_courses, transcripts)
    
    # Collaborative filtering
    cf_recommendations = collaborative_filtering(student, all_students, transcripts)
    
    # Combine and filter recommendations
    
    # print kn_recommendations, cf_recommendations, combined_recommendations
    print("KN RECONMMENDATIONS")
    for course, score in kn_recommendations:
        print(course.course_id, course.name, score)

        kn_rec = [course for course, score in kn_recommendations]
        
    combined_recommendations = kn_rec + cf_recommendations
    print("CF RECONMMENDATIONS")
    for course in cf_recommendations:
        print(course.course_id, course.name)

    available_course_ids = {course.course_id for course in available_courses}
    unique_recommendations = {course.course_id: course for course in combined_recommendations if course.course_id in available_course_ids}.values()
    
    # Apply additional filters based on warning status, course load, etc.
    final_recommendations = []
    for course in unique_recommendations:
        if course.semester_offer > student.current_semester + 1:
            continue  # Skip courses offered in later semesters
        if student.warning_status == 2 and course.credit_hours > 3:
            continue  # Skip high credit courses for students with Warning 2
        final_recommendations.append(course)

    return final_recommendations

# Load data
student_df, course_df = load_data()

# Initialize students and transcripts
all_students = []
all_transcripts = []

for _, row in student_df.iterrows():
    student = Student(row['StudentID'], None, [], row['WarningStatus'], row['CGPA'], row['CurrentSemester'])
    courses_taken = []
    student_file = f'dataset/allstudentdata/k{row["StudentID"][0:2]}{row["StudentID"].split("-")[1]}.csv'
    student_data_df = pd.read_csv(student_file)
    for _, st_row in student_data_df.iterrows():
        course_info = course_df[course_df['CourseID'] == st_row['CourseID']]
        chain_courses = []
        
        if not course_info.empty:
            course_info = course_info.iloc[0]
            chain_courses = course_info['Chain'] if not pd.isnull(course_info['Chain']) else []
        course_category = course_info['Category']
        course = Course(st_row['CourseID'], st_row['CourseName'], course_category, chain_courses, st_row['GPA'], course_info['Semester Offer'], course_info['CreditHours'], st_row['Repeat Status'])
        courses_taken.append(course)
    
    transcript = Transcript(row['StudentID'], courses_taken)
    all_transcripts.append(transcript)
    all_students.append(student)

# Example usage
student = all_students[1]
print(student.student_id)
student.level_of_understanding = ['Programming', 'Social', 'Language']
student.preferences = []

available_courses = []
total_courses = len(course_df)
num_random_courses = 10 
random_indices = [16,17,18,19,20,21,22]

for index in random_indices:
    row = course_df.iloc[index]
    chain_courses = []
    
    if not row.empty:
        chain_courses = [row['Chain']] if not pd.isnull(row['Chain']) else []  # Populate chain_courses based on your actual logic
    course = Course(row['CourseID'], row['CourseName'], row['Category'], chain_courses, 0, row['Semester Offer'], row['CreditHours'], 0)
    available_courses.append(course)

final_recommendations = generate_final_recommendations(student, all_students, all_transcripts, available_courses)

print("Available Courses:")
for course in available_courses:
    print(course.course_id, course.name)
print("\n")

final_courses = []
final_coure_ids = []
print("Final Recommendations:")
for course in final_recommendations:
    final_courses.append(course.name)
    final_coure_ids.append(course.course_id)

# reverse the order of the list
final_courses = final_courses[::-1]
final_coure_ids = final_coure_ids[::-1]


for ids, course in zip(final_coure_ids, final_courses):
    print(f"{ids} - {course}")

21k-2345
22 16 lens
[<__main__.Course object at 0x000001CC57FAEE90>, <__main__.Course object at 0x000001CC580746D0>, <__main__.Course object at 0x000001CC589ED090>, <__main__.Course object at 0x000001CC589EF890>, <__main__.Course object at 0x000001CC588E3C10>, <__main__.Course object at 0x000001CC57FA17D0>, <__main__.Course object at 0x000001CC57C83790>, <__main__.Course object at 0x000001CC588E3AD0>, <__main__.Course object at 0x000001CC57BBCDD0>, <__main__.Course object at 0x000001CC57FB11D0>, <__main__.Course object at 0x000001CC5814CE90>, <__main__.Course object at 0x000001CC58AF29D0>, <__main__.Course object at 0x000001CC582F6490>, <__main__.Course object at 0x000001CC582F7110>, <__main__.Course object at 0x000001CC57F92750>, <__main__.Course object at 0x000001CC582F5F10>, <__main__.Course object at 0x000001CC5819BA90>, <__main__.Course object at 0x000001CC57F86750>, <__main__.Course object at 0x000001CC57F87510>, <__main__.Course object at 0x000001CC581998D0>, <__main__.Course ob