In [6]:
import pandas as pd

data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eva"],
    "Math": [78, 45, 88, 60, 35],
    "Science": [85, 50, 92, 58, 40],
    "English": [90, 40, 84, 65, 30],
    "Attendance": [92, 80, 95, 85, 70]
}

df = pd.DataFrame(data)
df.to_csv("students.csv", index=False)

print("students.csv created successfully!")

students.csv created successfully!


In [None]:
import pandas as pd
import numpy as np

# --- 1. CSV Reading and DataFrame Basics ---
# Read the student data from a CSV file into a pandas DataFrame
try:
    df = pd.read_csv('student_data.csv')
    print("Original DataFrame:")
    print(df.head())
    print("-" * 30)
except FileNotFoundError:
    print("Error: student_data.csv not found. Please create the file.")
    exit()

# --- 2. New Derived Columns (Average Marks per Student) ---
# Calculate the average score for each student using pandas .mean(axis=1)
# axis=1 ensures the mean is calculated across columns (subjects) for each row (student)
subject_cols = ['Math', 'Science', 'English']
df['Average_Score'] = df[subject_cols].mean(axis=1) 
print("\nDataFrame with Average Scores:")
print(df[['Name', 'Average_Score']].head())
print("-" * 30)

# --- 3. Pass/Fail Classification (Logic & Conditions) ---
# Define a passing threshold
PASS_THRESHOLD = 60

# Use a Python function and apply it to the DataFrame to classify students
def classify_pass_fail(score):
    """Classifies a score as Pass or Fail based on a threshold."""
    if score >= PASS_THRESHOLD:  # Python logic/condition
        return 'Pass'
    else:
        return 'Fail'

# Create a new column 'Pass_Status' using the classify_pass_fail function and .apply()
df['Pass_Status'] = df['Average_Score'].apply(classify_pass_fail)
print("\nDataFrame with Pass/Fail Status:")
print(df[['Name', 'Average_Score', 'Pass_Status']].head())
print("-" * 30)

# --- 4. NumPy (Average, Max, Min Analysis Examples) ---
# Use NumPy functions for overall analysis
overall_avg = np.mean(df['Average_Score'])
overall_max = np.max(df['Average_Score'])
overall_min = np.min(df['Average_Score'])

print("\nOverall Performance Analysis (using NumPy):")
print(f"Average Score of all students: {overall_avg:.2f}")
print(f"Maximum Average Score: {overall_max:.2f}")
print(f"Minimum Average Score: {overall_min:.2f}")
print("-" * 30)

# --- 5. Filtering (Pass/Fail Students) ---
# Filter DataFrame for only 'Pass' students
passed_students = df[df['Pass_Status'] == 'Pass']
print("\nStudents who Passed:")
print(passed_students[['Name', 'Average_Score']])
print("-" * 30)

# --- 6. Analysis Example: Subject-wise Topper ---
# Find the topper for each subject using pandas groupby and idxmax
for subject in subject_cols:
    topper_index = df[subject].idxmax()
    topper_name = df.loc[topper_index, 'Name']
    topper_score = df.loc[topper_index, subject]
    print(f"Topper in {subject}: {topper_name} (Score: {topper_score})")
print("-" * 30)

# --- 7. Analysis Example: Attendance vs. Marks ---
# Analyze correlation between attendance and average marks to understand their relationship
correlation = df['Attendance_Days'].corr(df['Average_Score'])
print("\nAttendance vs. Marks Analysis:")
print(f"Correlation between Attendance Days and Average Score: {correlation:.2f}")
if correlation > 0.7: # Python condition for strong correlation
    print("Conclusion: There is a strong positive correlation, suggesting higher attendance is linked to higher marks.")
else:
    print("Conclusion: The correlation is weak or moderate, other factors might be more influential.")
print("-" * 30)


Error: student_data.csv not found. Please create the file.

DataFrame with Average Scores:
      Name  Average_Score
0    Alice      84.333333
1      Bob      45.000000
2  Charlie      88.000000
3    David      61.000000
4      Eva      35.000000
------------------------------

DataFrame with Pass/Fail Status:
      Name  Average_Score Pass_Status
0    Alice      84.333333        Pass
1      Bob      45.000000        Fail
2  Charlie      88.000000        Pass
3    David      61.000000        Pass
4      Eva      35.000000        Fail
------------------------------

Overall Performance Analysis (using NumPy):
Average Score of all students: 62.67
Maximum Average Score: 88.00
Minimum Average Score: 35.00
------------------------------

Students who Passed:
      Name  Average_Score
0    Alice      84.333333
2  Charlie      88.000000
3    David      61.000000
------------------------------
Topper in Math: Charlie (Score: 88)
Topper in Science: Charlie (Score: 92)
Topper in English: Alice (

KeyError: 'Attendance_Days'

: 