In [4]:
import pandas as pd

students_data = {
    "StudentID": [1,2,3,4,5],
    "Name": ["Alice","Bob","Charlie","David","Eva"],
    "Department": ["Computer Science","Physics","Mathematics","Computer Science","Biology"]
}
df_students = pd.DataFrame(students_data)

enrollment_data = {
    "StudentID": [1,2,3,4,5,6],
    "CourseID": [101,102,103,101,104,105],
    "CourseName": ["Data Structures","Quantum Mechanics","Calculus","Algorithms","Genetics","Chemistry"]
}
df_enrollment = pd.DataFrame(enrollment_data)

faculty_data = {
    "CourseID": [101,102,103,104,105],
    "FacultyName": ["Dr. Smith","Dr. Lee","Dr. Brown","Dr. Taylor","Dr. Wilson"]
}
df_faculty = pd.DataFrame(faculty_data)




In [5]:
# 4️ Merge Student Data with Course Enrollment
df_student_courses = pd.merge(df_students, df_enrollment, on="StudentID", how="left")
print("\nStudent + Course Enrollment:\n", df_student_courses)

# 5️ Merge Course Enrollment with Faculty Details
df_courses_faculty = pd.merge(df_enrollment, df_faculty, on="CourseID", how="left")
print("\nCourse Enrollment + Faculty:\n", df_courses_faculty)

# 6️ Concatenate New Student Enrollment Records
new_enrollment = pd.DataFrame({
    "StudentID":[7,8],
    "CourseID":[101,103],
    "CourseName":["Data Structures","Calculus"]
})
df_enrollment_all = pd.concat([df_enrollment, new_enrollment], ignore_index=True)
print("\nConcatenated Enrollment:\n", df_enrollment_all)

# 7️ Join Student Data with Course Enrollment using join()
# First set index to StudentID
df_students_indexed = df_students.set_index("StudentID")
df_enrollment_indexed = df_enrollment.set_index("StudentID")
df_joined = df_students_indexed.join(df_enrollment_indexed, how="left", lsuffix='_student', rsuffix='_enroll')
print("\nJoined Student & Enrollment:\n", df_joined)

# 8️ Handle duplicate column names using suffixes
df_merged_with_suffix = pd.merge(df_students, df_enrollment, on="StudentID", how="left", suffixes=('_student','_enroll'))
print("\nMerged with suffixes:\n", df_merged_with_suffix)

# 9️ Sort the merged dataset based on StudentID
df_sorted = df_merged_with_suffix.sort_values(by="StudentID")
print("\nSorted by StudentID:\n", df_sorted)

# 10️ Filter out students from Computer Science
df_cs = df_sorted[df_sorted["Department"]=="Computer Science"]
print("\nComputer Science Students:\n", df_cs)

# 11️ Group by Department and count students
department_counts = df_students.groupby("Department")["StudentID"].count().reset_index(name="StudentCount")
print("\nNumber of students per department:\n", department_counts)

# 12️ Fill missing values for students without course enrollments
df_filled = df_sorted.fillna({"CourseID":0, "CourseName":"Not Enrolled"})
print("\nFilled missing course data:\n", df_filled)

# 13️ Export the final cleaned DataFrame to CSV
df_filled.to_csv("final_student_course_data.csv", index=False)
print("✅ Final cleaned data exported to final_student_course_data.csv")



Student + Course Enrollment:
    StudentID     Name        Department  CourseID         CourseName
0          1    Alice  Computer Science       101    Data Structures
1          2      Bob           Physics       102  Quantum Mechanics
2          3  Charlie       Mathematics       103           Calculus
3          4    David  Computer Science       101         Algorithms
4          5      Eva           Biology       104           Genetics

Course Enrollment + Faculty:
    StudentID  CourseID         CourseName FacultyName
0          1       101    Data Structures   Dr. Smith
1          2       102  Quantum Mechanics     Dr. Lee
2          3       103           Calculus   Dr. Brown
3          4       101         Algorithms   Dr. Smith
4          5       104           Genetics  Dr. Taylor
5          6       105          Chemistry  Dr. Wilson

Concatenated Enrollment:
    StudentID  CourseID         CourseName
0          1       101    Data Structures
1          2       102  Quantum Mec