Step 1 – Import & Load

In [5]:
import pandas as pd

grades = pd.read_csv("student_grades.csv")
attendance = pd.read_csv("attendance.csv")

print("Student Grades Data:")
print(grades.head(), "\n")

print("Attendance Data:")
print(attendance.head(), "\n")


Student Grades Data:
    name  subject  marks
0   Riya     Math     88
1  Aryan  English     75
2  Sneha  Science     92
3   Riya  English     79
4  Aryan  Science     85 

Attendance Data:
    name  attendance_percentage
0   Riya                     92
1  Aryan                     85
2  Sneha                     97 



Step 2 – Clean & Aggregate

In [6]:
# Average marks per student
avg_marks = grades.groupby('name')['marks'].mean().reset_index()
avg_marks.columns = ['name', 'average_marks']

print("Average Marks:")
print(avg_marks)


Average Marks:
    name  average_marks
0  Aryan      82.000000
1   Riya      86.666667
2  Sneha      92.000000


Step 3 – Merge Datasets

In [9]:
merged = pd.merge(avg_marks, attendance, on='name', how='outer')


print("Merged Data:")
print(merged)


Merged Data:
    name  average_marks  attendance_percentage
0  Aryan      82.000000                     85
1   Riya      86.666667                     92
2  Sneha      92.000000                     97


Step 4 – Handle Missing Values

In [8]:
merged['average_marks'] = merged['average_marks'].fillna(merged['average_marks'].mean())
merged['attendance_percentage'] = merged['attendance_percentage'].fillna(merged['attendance_percentage'].mean())


Step 5 – Compute Performance Score

In [None]:
# Calculate performance score as a weighted sum of average_marks and attendance_percentage
merged['performance_score'] = (0.7 * merged['average_marks']) + (0.3 * merged['attendance_percentage'])
print("Final Performance Scores:")  
print(merged[['name', 'performance_score']])0

Final Performance Scores:
    name  performance_score
0  Aryan          82.900000
1   Riya          88.266667
2  Sneha          93.500000


Step 6 – Identify Top Performers

In [13]:
top=merged.nlargest(1,'performance_score')
print("Top 5 Students by Performance Score:")
print(top[['name', 'performance_score']])

Top 5 Students by Performance Score:
    name  performance_score
2  Sneha               93.5


#Step 7 – Save Output

In [14]:
merged.to_csv("merged_cleaned_dataset.csv", index=False)
print("✅ Cleaned merged data saved as merged_cleaned_dataset.csv")


✅ Cleaned merged data saved as merged_cleaned_dataset.csv
