#Selection

In [2]:
#Column Selection
You can select columns using the column name.
import pandas as pd
# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'San Francisco', 'Los Angeles']
}
df = pd.DataFrame(data)

# Select a single column
name_column = df['Name']
print(name_column)

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object


In [3]:
#Row Selection
You can select rows using the index or conditions.
# Select multiple rows by index range
first_two_rows = df.iloc[0:2]
print(first_two_rows)

Name       Alice
Age           25
City    New York
Name: 0, dtype: object
    Name  Age           City
0  Alice   25       New York
1    Bob   30  San Francisco


In [4]:
#condition selection
#Selecting Rows Based on Single Condition
Example DataFrame
import pandas as pd

# Example DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 40, 45],
    'City': ['New York', 'San Francisco', 'Los Angeles', 'Chicago', 'Houston']
}
df = pd.DataFrame(data)

print(df)

      Name  Age           City
0    Alice   25       New York
1      Bob   30  San Francisco
2  Charlie   35    Los Angeles
3    David   40        Chicago
4      Eve   45        Houston


In [5]:
#Condition on a Single Column
# Select rows where 'Age' is greater than 30
age_above_30 = df[df['Age'] > 30]
print(age_above_30)

      Name  Age         City
2  Charlie   35  Los Angeles
3    David   40      Chicago
4      Eve   45      Houston


In [6]:
#Condition on a Single Column with Specific Value
# Select rows where 'City' is 'New York'
city_new_york = df[df['City'] == 'New York']
print(city_new_york)

    Name  Age      City
0  Alice   25  New York


In [None]:
Selecting Rows Based on Multiple Conditions
Using & for AND Condition
# Select rows where 'Age' is greater than 30 AND 'City' is 'Los Angeles'
age_above_30_and_city_la = df[(df['Age'] > 30) & (df['City'] == 'Los Angeles')]
print(age_above_30_and_city_la)

In [None]:
Using | for OR Condition
python
Copy code
# Select rows where 'Age' is greater than 30 OR 'City' is 'New York'
age_above_30_or_city_ny = df[(df['Age'] > 30) | (df['City'] == 'New York')]
print(age_above_30_or_city_ny)

In [None]:
#Using ~ for NOT Condition
# Select rows where 'Age' is NOT greater than 30
age_not_above_30 = df[~(df['Age'] > 30)]
print(age_not_above_30)

In [None]:
#Using query() Method
The query method allows for a more readable syntax when dealing with multiple conditions.

Single Condition with query()
# Using query to select rows where 'Age' is greater than 30
query_age_above_30 = df.query('Age > 30')
print(query_age_above_30)

In [None]:
#Multiple Conditions with query()
# Using query to select rows where 'Age' is greater than 30 AND 'City' is 'Los Angeles'
query_age_above_30_and_city_la = df.query('Age > 30 and City == "Los Angeles"')
print(query_age_above_30_and_city_la)

In [None]:
#Selecting Specific Columns After Filtering Rows
# Select specific columns after filtering rows where 'Age' is greater than 30
age_above_30_selected_cols = df[df['Age'] > 30][['Name', 'City']]
print(age_above_30_selected_cols)

In [None]:
#Using .loc with Conditions
You can also use .loc to select rows and specific columns simultaneously.
# Using .loc to select rows where 'Age' is greater than 30 and only the 'Name' and 'City' columns
loc_selection = df.loc[df['Age'] > 30, ['Name', 'City']]
print(loc_selection)

In [None]:
#Combining Multiple Conditions
You can combine multiple conditions using parentheses to ensure correct order of operations.
# Select rows where 'Age' is between 30 and 40 AND 'City' is NOT 'Chicago'
combined_conditions = df[(df['Age'] > 30) & (df['Age'] < 40) & (df['City'] != 'Chicago')]
print(combined_conditions)

In [None]:
#Using .isin() for Multiple Values
Use .isin() to filter rows based on multiple values in a column.
# Select rows where 'City' is either 'New York' or 'Los Angeles'
cities = ['New York', 'Los Angeles']
city_selection = df[df['City'].isin(cities)]
print(city_selection)

In [None]:
#Using str.contains() for String Matching
Use str.contains() for filtering rows based on partial string matches.
# Select rows where 'City' contains the substring 'San'
city_contains_san = df[df['City'].str.contains('San')]
print(city_contains_san)

In [2]:
import pandas as pd

# Create a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 40],
        'Gender': ['F', 'M', 'M', 'M']}
df = pd.DataFrame(data)

# Select rows where Age is greater than 30
filtered_df = df[df['Age'] > 30]

print(filtered_df)

      Name  Age Gender
2  Charlie   35      M
3    David   40      M


In [3]:
#Adding a Column:
You can add a new column to a DataFrame by simply assigning data to a new column name.
import pandas as pd

# Create a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 40]}
df = pd.DataFrame(data)

# Add a new column 'Gender'
df['Gender'] = ['F', 'M', 'M', 'M']

print(df)

      Name  Age Gender
0    Alice   25      F
1      Bob   30      M
2  Charlie   35      M
3    David   40      M


In [4]:
#Deleting a Column:
You can delete a column using the drop() method.
# Delete the 'Age' column
df = df.drop('Age', axis=1)

print(df)

      Name Gender
0    Alice      F
1      Bob      M
2  Charlie      M
3    David      M


In [5]:
#Updating a Column:
You can update values in an existing column using simple assignment.
# Update the 'Name' column
df['Name'] = ['Alice Smith', 'Bob Johnson', 'Charlie Brown', 'David White']

print(df)

            Name Gender
0    Alice Smith      F
1    Bob Johnson      M
2  Charlie Brown      M
3    David White      M


In [6]:
#Setting the Index:
You can set the index of a DataFrame using the set_index() method.
import pandas as pd

# Create a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 40]}
df = pd.DataFrame(data)

# Set the 'Name' column as the index
df.set_index('Name', inplace=True)

print(df)

         Age
Name        
Alice     25
Bob       30
Charlie   35
David     40


In [7]:
#Removing the Index:
You can reset the index or remove it entirely using the reset_index() method.
# Reset the index
df.reset_index(inplace=True)

print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [8]:
#Removing the Index Entirely:
To remove the index entirely and revert to the default integer index, you can use the reset_index() 
method with the drop parameter set to True.
# Remove the index entirely
df.reset_index(drop=True, inplace=True)

print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [9]:
#Addition (+):
import pandas as pd

# Create a sample DataFrame
data = {'A': [1, 2, 3, 4],
        'B': [5, 6, 7, 8]}
df = pd.DataFrame(data)

# Perform addition between columns 'A' and 'B'
df['A + B'] = df['A'] + df['B']

print(df)

   A  B  A + B
0  1  5      6
1  2  6      8
2  3  7     10
3  4  8     12


In [10]:
#Subtraction (-):
# Perform subtraction between columns 'A' and 'B'
df['A - B'] = df['A'] - df['B']

print(df)

   A  B  A + B  A - B
0  1  5      6     -4
1  2  6      8     -4
2  3  7     10     -4
3  4  8     12     -4


In [11]:
#Multiplication (*):
# Perform multiplication between columns 'A' and 'B'
df['A * B'] = df['A'] * df['B']

print(df)

   A  B  A + B  A - B  A * B
0  1  5      6     -4      5
1  2  6      8     -4     12
2  3  7     10     -4     21
3  4  8     12     -4     32


In [12]:
#Division (/):
# Perform division between columns 'A' and 'B'
df['A / B'] = df['A'] / df['B']

print(df)

   A  B  A + B  A - B  A * B     A / B
0  1  5      6     -4      5  0.200000
1  2  6      8     -4     12  0.333333
2  3  7     10     -4     21  0.428571
3  4  8     12     -4     32  0.500000
