## 🐼 Pandas Basics 

#### 1. Importing Pandas

In [None]:
# pd is the standard alias for pandas.


import pandas as pd

#### 2. Creating a DataFrame

In [None]:
# Creates a simple table (DataFrame) from a dictionary.


data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)


#### 3. Reading a CSV File

In [None]:
# Reads data from a CSV file into a DataFrame.


df = pd.read_csv('datasets/StudentsPerformance.csv')

In [None]:
df.head(n=10)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


#### 4. Displaying the First / Last Rows

In [None]:
# Helps you preview the data.

# print(df.head())     # First 5 rows
df.tail(3)    # Last 3 rows


Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
997,female,group C,high school,free/reduced,completed,59,71,65
998,female,group D,some college,standard,completed,68,78,77
999,female,group D,some college,free/reduced,none,77,86,86


#### 5. Getting Basic Info

In [None]:
# .info() shows column types & missing values; .describe() gives stats.


# df.info()
df.describe()

Unnamed: 0,math score,reading score,writing score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [10]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


#### 6. Selecting Columns

In [None]:
# Access specific columns using brackets.


df[['math score','gender']]                  # Single column
# df[['Name', 'Math']]       # Multiple columns


Unnamed: 0,math score,gender
0,72,female
1,69,female
2,90,female
3,47,male
4,76,male
...,...,...
995,88,female
996,62,male
997,59,female
998,68,female


#### 7. Filtering Rows (Condition)

In [None]:


math_marks = df[df['math score'] > 70]
math_marks


Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
2,female,group B,master's degree,standard,none,90,95,93
4,male,group C,some college,standard,none,76,78,75
5,female,group B,associate's degree,standard,none,71,83,78
6,female,group B,some college,standard,completed,88,95,92
...,...,...,...,...,...,...,...,...
984,female,group C,some high school,standard,none,74,75,82
987,male,group E,some high school,standard,completed,81,75,76
990,male,group E,high school,free/reduced,completed,86,81,75
995,female,group E,master's degree,standard,completed,88,99,95


#### 8. Adding a New Column

In [None]:
# Creates a new column based on existing data.

df['Total Marks'] = df['math score'] + df['reading score'] + df['writing score']

In [18]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,Total Marks
0,female,group B,bachelor's degree,standard,none,72,72,74,218
1,female,group C,some college,standard,completed,69,90,88,247
2,female,group B,master's degree,standard,none,90,95,93,278
3,male,group A,associate's degree,free/reduced,none,47,57,44,148
4,male,group C,some college,standard,none,76,78,75,229


#### 9. Renaming Columns

In [None]:
# Changes column names.

df.rename(columns={'Name': 'Full Name'}, inplace=True)

In [19]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,Total Marks
0,female,group B,bachelor's degree,standard,none,72,72,74,218
1,female,group C,some college,standard,completed,69,90,88,247
2,female,group B,master's degree,standard,none,90,95,93,278
3,male,group A,associate's degree,free/reduced,none,47,57,44,148
4,male,group C,some college,standard,none,76,78,75,229


#### 10. Dropping Columns or Rows

In [None]:


df.drop('lunch', axis=1, inplace=True)   # Drop column
# df.drop(0, axis=0, inplace=True)       # Drop row with index 0

In [21]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,test preparation course,math score,reading score,writing score,Total Marks
0,female,group B,bachelor's degree,none,72,72,74,218
1,female,group C,some college,completed,69,90,88,247
2,female,group B,master's degree,none,90,95,93,278
3,male,group A,associate's degree,none,47,57,44,148
4,male,group C,some college,none,76,78,75,229


#### 11. Sorting Data

In [None]:

# Sorts the DataFrame by column values.


df.sort_values(by='English', ascending=False, inplace=True)


#### 12. Resetting Index

In [None]:

# Resets index after dropping or filtering rows.

df.reset_index(drop=True, inplace=True)


#### 13. Checking for Missing Values

In [None]:

# Shows how many missing (NaN) values are in each column.

print(df.isnull().sum())

#### 14. Filling Missing Values

In [None]:

# Fills missing values with 0 or any other value.

df.fillna(0, inplace=True)

#### 15. Group By and Aggregation

In [None]:

# Groups rows by a column and calculates mean.

# Calculate average marks per student
df['Average'] = df[['Math', 'English', 'Science']].mean(axis=1)

# Define Grades
def get_grade(avg):
    if avg >= 90:
        return 'A+'
    elif avg >= 80:
        return 'A'
    elif avg >= 70:
        return 'B'
    elif avg >= 60:
        return 'C'
    else:
        return 'D'


df['Grade'] = df['Average'].apply(get_grade)

# Group by Grade and count how many students fall into each
grade_counts = df.groupby('Grade')['Full Name'].count()
print("\n📊 Grade Distribution:")
print(grade_counts)

In [None]:
# Find top 3 students

top_students = df.sort_values(by='Average', ascending=False).head(3)

print("\n🏆 Top 3 Students:")
print(top_students[['Full Name', 'Average', 'Grade']])

# Step 6: Save the report
df.to_csv('datasets/student_report.csv', index=False)

### Small Pandas Project: Classroom Attendance & Performance Tracker

#### Goal:

- Track student attendance and grades

- Compute attendance percentage

- Identify students who need follow-up

- Export cleaned report


- Step 1: Load the data
    - `pd.read_csv`
- Step 2: Calculate average grade per student
    - `df[[...]].mean(axis=1)`
- Step 4: Identify students who need attention

    Criteria: grade < 70 or attendance < 75%
    - `calculate percentage`
- Step 5: Summary report
    - `print summary`
- Step 6: Save cleaned and analyzed data
    - `df.to_csv()`


In [None]:
import pandas as pd

`Step 1:` Load the data

In [None]:


df = pd.read_csv('datasets/classroom_data.csv')
print("📄 Original Data:")
df


📄 Original Data:


Unnamed: 0,Student,Math,Science,English,Days_Present,Total_Days
0,Alice,85.0,90.0,78.0,42,45
1,Bob,70.0,88.0,,41,45
2,Charlie,95.0,92.0,89.0,45,45
3,David,60.0,,80.0,35,45
4,Eva,88.0,76.0,85.0,40,45
5,Frank,,70.0,75.0,28,45
6,Grace,82.0,85.0,80.0,38,45


`Step 2:` Handle missing grades with column mean

In [None]:
df[['Math', 'Science', 'English']] = df[['Math', 'Science', 'English']].fillna(
    df[['Math', 'Science', 'English']].mean(numeric_only=True)
)


`Step 3:` Calculate average grade per student

In [None]:

df['Average_Grade'] = df[['Math', 'Science', 'English']].mean(axis=1)


In [27]:
df

Unnamed: 0,Student,Math,Science,English,Days_Present,Total_Days,Average_Grade
0,Alice,85.0,90.0,78.0,42,45,84.333333
1,Bob,70.0,88.0,81.166667,41,45,79.722222
2,Charlie,95.0,92.0,89.0,45,45,92.0
3,David,60.0,83.5,80.0,35,45,74.5
4,Eva,88.0,76.0,85.0,40,45,83.0
5,Frank,80.0,70.0,75.0,28,45,75.0
6,Grace,82.0,85.0,80.0,38,45,82.333333


`Step 4:` Calculate attendance percentage

In [None]:

df['Attendance_%'] = (df['Days_Present'] / df['Total_Days']) * 100



In [29]:
df

Unnamed: 0,Student,Math,Science,English,Days_Present,Total_Days,Average_Grade,Attendance_%
0,Alice,85.0,90.0,78.0,42,45,84.333333,93.333333
1,Bob,70.0,88.0,81.166667,41,45,79.722222,91.111111
2,Charlie,95.0,92.0,89.0,45,45,92.0,100.0
3,David,60.0,83.5,80.0,35,45,74.5,77.777778
4,Eva,88.0,76.0,85.0,40,45,83.0,88.888889
5,Frank,80.0,70.0,75.0,28,45,75.0,62.222222
6,Grace,82.0,85.0,80.0,38,45,82.333333,84.444444


`Step 5:` Identify students who need attention

In [None]:

# Criteria: grade < 70 or attendance < 75%
df['Needs_Followup'] = (df['Average_Grade'] < 70) | (df['Attendance_%'] < 75)


In [31]:
df

Unnamed: 0,Student,Math,Science,English,Days_Present,Total_Days,Average_Grade,Attendance_%,Needs_Followup
0,Alice,85.0,90.0,78.0,42,45,84.333333,93.333333,False
1,Bob,70.0,88.0,81.166667,41,45,79.722222,91.111111,False
2,Charlie,95.0,92.0,89.0,45,45,92.0,100.0,False
3,David,60.0,83.5,80.0,35,45,74.5,77.777778,False
4,Eva,88.0,76.0,85.0,40,45,83.0,88.888889,False
5,Frank,80.0,70.0,75.0,28,45,75.0,62.222222,True
6,Grace,82.0,85.0,80.0,38,45,82.333333,84.444444,False


`Step 6:` Summary report

In [None]:

print("\n📊 Students Needing Follow-up:")
print(df[df['Needs_Followup']][['Student', 'Average_Grade', 'Attendance_%']])




📊 Students Needing Follow-up:
  Student  Average_Grade  Attendance_%
5   Frank           75.0     62.222222


`Step 7:` Save cleaned and analyzed data

In [None]:

df.to_csv('class_report_cleaned.csv', index=False)
