In [1]:
# Step 1: Import Library
import pandas as pd

# Step 2: Create Student DataFrame
df_students = pd.DataFrame({
    "StudentID": [1, 2, 3, 4],
    "Name": ["Alice", "Bob", "Charlie", "David"],
    "Department": ["Computer Science", "Physics", "Mathematics", "Computer Science"]
})
print("Student Data:\n", df_students)

# Step 3: Create Course Enrollment DataFrame
df_enrollments = pd.DataFrame({
    "StudentID": [1, 2, 2, 3],
    "CourseID": [101, 102, 103, 101],
    "CourseName": ["Python", "Quantum Mechanics", "Data Science", "Python"]
})
print("\nCourse Enrollments:\n", df_enrollments)

# Step 4: Create Faculty DataFrame
df_faculty = pd.DataFrame({
    "CourseID": [101, 102, 103],
    "FacultyName": ["Dr. Smith", "Dr. Johnson", "Dr. Lee"]
})
print("\nFaculty Details:\n", df_faculty)

# Step 5: Merge Student Data with Course Enrollment
student_courses = pd.merge(df_students, df_enrollments, on="StudentID", how="left")
print("\nStudents with Course Enrollments:\n", student_courses)

# Step 6: Merge Course Enrollment with Faculty Details
course_faculty = pd.merge(df_enrollments, df_faculty, on="CourseID", how="left")
print("\nCourse Enrollments with Faculty Details:\n", course_faculty)

# Step 7: Concatenate New Student Enrollment Records
new_enrollments = pd.DataFrame({
    "StudentID": [4, 5],
    "CourseID": [103, 101],
    "CourseName": ["Data Science", "Python"]
})
df_enrollments_updated = pd.concat([df_enrollments, new_enrollments], ignore_index=True)
print("\nUpdated Enrollments:\n", df_enrollments_updated)

# Step 8: Join Student Data with Course Enrollment (requires setting index)
df_students_indexed = df_students.set_index("StudentID")
df_enrollments_indexed = df_enrollments.set_index("StudentID")
student_join = df_students_indexed.join(df_enrollments_indexed, how="left")
print("\nStudent Data Joined with Enrollments:\n", student_join)

# Step 9: Handle duplicate column names after merging using suffixes
merged_with_suffix = pd.merge(df_students, df_enrollments, on="StudentID", how="left", suffixes=('_Student', '_Enroll'))
print("\nMerged Data with Suffixes:\n", merged_with_suffix)

# Step 10: Sort the merged dataset based on StudentID
sorted_data = merged_with_suffix.sort_values(by="StudentID")
print("\nSorted Data by StudentID:\n", sorted_data)

# Step 11: Filter out students from Computer Science department
cs_students = df_students[df_students["Department"] == "Computer Science"]
print("\nComputer Science Students:\n", cs_students)

# Step 12: Group data by Department and count students
dept_count = df_students.groupby("Department")["StudentID"].count()
print("\nNumber of Students per Department:\n", dept_count)

# Step 13: Fill missing values for students without course enrollments
student_courses_filled = student_courses.fillna({"CourseID": 0, "CourseName": "Not Enrolled"})
print("\nStudents with Missing Enrollments Filled:\n", student_courses_filled)

# Step 14: Export final cleaned DataFrame to CSV
student_courses_filled.to_csv("final_student_data.csv", index=False)
print("\nFinal cleaned data exported to 'final_student_data.csv'")


Student Data:
    StudentID     Name        Department
0          1    Alice  Computer Science
1          2      Bob           Physics
2          3  Charlie       Mathematics
3          4    David  Computer Science

Course Enrollments:
    StudentID  CourseID         CourseName
0          1       101             Python
1          2       102  Quantum Mechanics
2          2       103       Data Science
3          3       101             Python

Faculty Details:
    CourseID  FacultyName
0       101    Dr. Smith
1       102  Dr. Johnson
2       103      Dr. Lee

Students with Course Enrollments:
    StudentID     Name        Department  CourseID         CourseName
0          1    Alice  Computer Science     101.0             Python
1          2      Bob           Physics     102.0  Quantum Mechanics
2          2      Bob           Physics     103.0       Data Science
3          3  Charlie       Mathematics     101.0             Python
4          4    David  Computer Science       NaN    