In [2]:
import pandas as pd


In [3]:
# Sample data for school students
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Hannah', 'Ian', 'Jack'],
    'Age': [14, 15, 14, 15, 14, 15, 14, 15, 14, 15],
    'Grade': ['9', '10', '9', '10', '9', '10', '9', '10', '9', '10'],
    'English': [85, 78, 90, 87, 93, 75, 88, 85, 90, 80],
    'Maths': [95, 88, 92, 85, 90, 80, 85, 87, 92, 88],
    'Physics': [90, 85, 88, 92, 85, 89, 86, 90, 88, 84]
}

# Convert the dictionary to a DataFrame
df_students = pd.DataFrame(data)

# Calculate the total marks for each student
df_students['Score'] = df_students[['English', 'Maths', 'Physics']].apply(lambda x: x.sum(), axis=1)

# Calculate the percentage for each student
df_students['Percentage'] = df_students[['English', 'Maths', 'Physics']].apply(lambda x: (x.sum() / 300) * 100, axis=1)

df_students


Unnamed: 0,Name,Age,Grade,English,Maths,Physics,Score,Percentage
0,Alice,14,9,85,95,90,270,90.0
1,Bob,15,10,78,88,85,251,83.666667
2,Charlie,14,9,90,92,88,270,90.0
3,David,15,10,87,85,92,264,88.0
4,Eva,14,9,93,90,85,268,89.333333
5,Frank,15,10,75,80,89,244,81.333333
6,Grace,14,9,88,85,86,259,86.333333
7,Hannah,15,10,85,87,90,262,87.333333
8,Ian,14,9,90,92,88,270,90.0
9,Jack,15,10,80,88,84,252,84.0


##Filtering Rows
 - Filtering rows in a DataFrame can be done using boolean indexing.
 - Example 1: Filter by Age


In [4]:
# Filter students who are 15 years old
df_15_years_old = df_students[df_students['Age'] == 15]
df_15_years_old


Unnamed: 0,Name,Age,Grade,English,Maths,Physics,Score,Percentage
1,Bob,15,10,78,88,85,251,83.666667
3,David,15,10,87,85,92,264,88.0
5,Frank,15,10,75,80,89,244,81.333333
7,Hannah,15,10,85,87,90,262,87.333333
9,Jack,15,10,80,88,84,252,84.0


### Example 2: Filter by Percentage

In [5]:
# Filter students with a percentage greater than 85%
df_high_percentage = df_students[df_students['Percentage'] > 85]
df_high_percentage


Unnamed: 0,Name,Age,Grade,English,Maths,Physics,Score,Percentage
0,Alice,14,9,85,95,90,270,90.0
2,Charlie,14,9,90,92,88,270,90.0
3,David,15,10,87,85,92,264,88.0
4,Eva,14,9,93,90,85,268,89.333333
6,Grace,14,9,88,85,86,259,86.333333
7,Hannah,15,10,85,87,90,262,87.333333
8,Ian,14,9,90,92,88,270,90.0


###Example 3: Filter by Multiple Conditions
 - You can combine multiple conditions using logical operators (& for AND, | for OR).

In [6]:
# Filter students who are 15 years old and have a percentage greater than 85%
df_15_and_high_percentage = df_students[(df_students['Age'] == 15) & (df_students['Percentage'] > 85)]
df_15_and_high_percentage


Unnamed: 0,Name,Age,Grade,English,Maths,Physics,Score,Percentage
3,David,15,10,87,85,92,264,88.0
7,Hannah,15,10,85,87,90,262,87.333333


###Using the query Method
 - The query method allows you to filter DataFrame rows using a query expression.

 - Example 4: Query by Age

In [7]:
# Filter students who are 15 years old using query
df_15_years_old_query = df_students.query('Age == 15')
df_15_years_old_query


Unnamed: 0,Name,Age,Grade,English,Maths,Physics,Score,Percentage
1,Bob,15,10,78,88,85,251,83.666667
3,David,15,10,87,85,92,264,88.0
5,Frank,15,10,75,80,89,244,81.333333
7,Hannah,15,10,85,87,90,262,87.333333
9,Jack,15,10,80,88,84,252,84.0


### Example 5: Query by Percentage


In [8]:
# Filter students with a percentage greater than 85% using query
df_high_percentage_query = df_students.query('Percentage > 85')
df_high_percentage_query


Unnamed: 0,Name,Age,Grade,English,Maths,Physics,Score,Percentage
0,Alice,14,9,85,95,90,270,90.0
2,Charlie,14,9,90,92,88,270,90.0
3,David,15,10,87,85,92,264,88.0
4,Eva,14,9,93,90,85,268,89.333333
6,Grace,14,9,88,85,86,259,86.333333
7,Hannah,15,10,85,87,90,262,87.333333
8,Ian,14,9,90,92,88,270,90.0


### Example 6: Query by Multiple Conditions


In [None]:
# Filter students who are 15 years old and have a percentage greater than 85% using query
df_15_and_high_percentage_query = df_students.query('Age == 15 and Percentage > 85')
df_15_and_high_percentage_query


###Filtering Columns
 - To select specific columns, you can use the double square bracket notation or the loc method.

 - Example 7: Select Specific Columns

In [None]:
# Select the Name and Percentage columns
df_name_percentage = df_students[['Name', 'Percentage']]
df_name_percentage


### Example 8: Select Columns Using loc


In [None]:
# Select the Name and Percentage columns using loc
df_name_percentage_loc = df_students.loc[:, ['Name', 'Percentage']]
df_name_percentage_loc


###Combining Filtering and Column Selection
 - You can combine filtering rows and selecting columns.

 - Example 9: Filter Rows and Select Columns

In [None]:
# Filter students with a percentage greater than 85% and select Name and Percentage columns
df_filtered_columns = df_students[df_students['Percentage'] > 85][['Name', 'Percentage']]
df_filtered_columns


### Example 10: Using loc for Combined Filtering and Selection


In [None]:
# Filter students with a percentage greater than 85% and select Name and Percentage columns using loc
df_filtered_columns_loc = df_students.loc[df_students['Percentage'] > 85, ['Name', 'Percentage']]
df_filtered_columns_loc


In [None]:
###

###  df.filter() is another method in pandas that provides a flexible way to select columns based on their labels (names). Unlike df.query() which filters rows based on conditions, df.filter() is primarily used for column selection. Here’s how you can use df.filter():

###Basic Usage of df.filter()
 - The df.filter() method allows you to select columns in a DataFrame based on their labels.

- Example 1: Select Columns by Exact Name

In [None]:
# Select columns with exact names 'Name' and 'Age'
selected_columns = df_students.filter(items=['Name', 'Age'])
selected_columns


### Example 2: Select Columns by Partial Name Matching
 - You can use wildcard characters (*, ?) to match column names partially.

In [10]:
df_students

Unnamed: 0,Name,Age,Grade,English,Maths,Physics,Score,Percentage
0,Alice,14,9,85,95,90,270,90.0
1,Bob,15,10,78,88,85,251,83.666667
2,Charlie,14,9,90,92,88,270,90.0
3,David,15,10,87,85,92,264,88.0
4,Eva,14,9,93,90,85,268,89.333333
5,Frank,15,10,75,80,89,244,81.333333
6,Grace,14,9,88,85,86,259,86.333333
7,Hannah,15,10,85,87,90,262,87.333333
8,Ian,14,9,90,92,88,270,90.0
9,Jack,15,10,80,88,84,252,84.0


In [11]:
# Select columns whose names contain 'h'
selected_columns_partial = df_students.filter(like='h')
selected_columns_partial


Unnamed: 0,English,Maths,Physics
0,85,95,90
1,78,88,85
2,90,92,88
3,87,85,92
4,93,90,85
5,75,80,89
6,88,85,86
7,85,87,90
8,90,92,88
9,80,88,84


###Example 3: Select Columns by Regular Expression
- You can also use regular expressions to select columns.

In [12]:
# Select columns whose names start with 'E' or 'P'
selected_columns_regex = df_students.filter(regex='^E|^P')
selected_columns_regex


Unnamed: 0,English,Physics,Percentage
0,85,90,90.0
1,78,85,83.666667
2,90,88,90.0
3,87,92,88.0
4,93,85,89.333333
5,75,89,81.333333
6,88,86,86.333333
7,85,90,87.333333
8,90,88,90.0
9,80,84,84.0


###Notes on df.filter()
 - axis: By default, df.filter() operates on columns (axis=1). To filter rows, you need to specify axis=0.

 - like vs regex: filter(like=...) matches column names containing the specified string, while filter(regex=...) matches using a regular expression pattern.

 - Example Combining Filtering and df.filter()

In [None]:
# Select columns with 'e' in their name and filter rows where Age is greater than 14
filtered_data = df_students[df_students['Age'] > 14].filter(like='e')
filtered_data
