# Grouping


In [96]:
import pandas as pd

In [None]:
drinks = pd.read_csv("./data/drinks.csv")
drinks.head()

In [None]:
# Inspecting data to see if there are any missing values.  (Yes, there are)
drinks.info()

In [None]:
# Inspect the missing rows
filt = drinks.isna().any(axis=1)
drinks[filt]

In [None]:
# Fill the missing values with the default string
drinks["continent"] = drinks["continent"].fillna("UNKNOWN")
drinks.info()

In [101]:
drinks_grouped = drinks.groupby("continent")

In [None]:
# Get the number of groups
drinks_grouped.ngroups

In [None]:
# Get the number of rows in each group
drinks_grouped.size()

# Another way to get the number of rows in each group.
# The aggregate function .count() counts only the non-null values from each column, 
# whereas .size() simply returns the number of rows available in each group irrespective of presence or absence of values.

# drinks_grouped.count()

In [None]:
# Get the number of unique values in each column for each continent
drinks_grouped[["beer_servings"]].nunique()

# Notice the difference in output from this command will be Series.
# drinks_grouped["beer_servings"].nunique()


In [None]:
# Sample a row from each group
# drinks_grouped.first()
# drinks_grouped.last()

# Get the second row from each group
drinks_grouped.nth(1)


In [None]:
# Get the mean of beer_servings for each continent
drinks_grouped[["beer_servings"]].mean()

In [None]:
# Get the group "AF"
drinks_grouped.get_group("AF").head()

In [None]:
# Get the first group
group_list = list(drinks_grouped.groups.keys())
drinks_grouped.get_group(group_list[0]).head()

In [None]:
# Loop through the groups
for key, group in drinks_grouped:
    print(key)
    display(group.head(1))

In [None]:
# Get the mean of beer_servings and wine_servings for each continent
drinks_grouped[["beer_servings", "wine_servings"]].mean()

In [None]:
# Get the stats of wine_serveing for each continent
drinks_grouped[["wine_servings"]].describe()

In [None]:
# Get the stats of beer_servings and wine_servings for each continent
drinks_grouped[["beer_servings", "wine_servings"]].agg(["mean", "min", "max", "median"])

In [None]:
# Get the stats of all columns for each continent
drinks_grouped.agg(
    {
        "beer_servings": ["min", "max"],
        "wine_servings": ["mean", "median"],
        "spirit_servings": ["mean", "median"],
        "total_litres_of_pure_alcohol": ["mean", "min"],
        "country": ["count"],
    }
)
