In [2]:
import pandas as pd
import numpy as np
import seaborn as sns

In [3]:
penguins = (sns.load_dataset("penguins")
               .sample(100)
               .reset_index())
penguins.loc[:, "new_col"] = np.random.normal(loc=42,
                                              scale=7,
                                              size=100)

Ways to filter from data frame:
1. by single categorical data with `==` sign
2. by multiple categorical data with `isin()`
3. by multiple categorical data with `df.query()`
4. by single numerical data with `>` or `<`
5. by multiple numerical data with `np.select()`
6. by multiple numerical conditional `>` or `<`
7. by multiple numerical data with `df.query()`
8. by exclusion with `~` and `!=`

Indexing with `.iloc` and `.loc`:
1. `df.iloc[positional_num, positional_num]`
2. `df.loc[index, column]`

More info: https://stackoverflow.com/questions/17071871/how-do-i-select-rows-from-a-dataframe-based-on-column-values

In [None]:
# single categorical data with == sign
def run_func():
    mask3 = penguins["species"] == "Chinstrap"
    return penguins[mask3]

run_func()

# penguins[penguins["species"] == "Chinstrap"]

In [None]:
# by multiple categorical conditions using isin()
conditions = {"species": ["Adelie", "Chinstrap"],
              "island": ["Dream"]}
(penguins[penguins.loc[:, ["species", "island"]]
                  .isin(conditions)
                  .all(1)])

In [None]:
# multiple categorical conditions with df.query()
penguins.query("island == 'Dream' and species == 'Adelie'")

In [None]:
# create new column by multiple conditions using np.select()
conditions = [
    (penguins["bill_length_mm"] < 45) & (penguins["flipper_length_mm"] < 200),
    (penguins["bill_length_mm"] > 45) & (penguins["flipper_length_mm"] < 200)
]
choices = ["group1", "group2"]
penguins.loc[:, "grouping"] = np.select(conditions, choices, default="group3")


In [None]:
# by multiple numerical conditions with &
penguins[(penguins["bill_length_mm"] < 45) & (penguins["flipper_length_mm"] > 200)]

In [None]:
# by querying columnn
penguins.query("bill_length_mm > new_col")
penguins.query("bill_length_mm > 50 and bill_depth_mm > 19")

In [None]:
# by exclusions 
penguins[~penguins["species"].isin(["Adelie", "Chinstrap"])]                # ~
penguins[(penguins["species"] != "Adelie") & (penguins["sex"] != "Male")]   # !=
