## 9.1 Creating a GroupBy Object from Scratch

In [None]:
import pandas as pd

In [None]:
food_data = {
    "Item": ["Banana", "Cucumber", "Orange", "Tomato", "Watermelon"],
    "Type": ["Fruit", "Vegetable", "Fruit", "Vegetable", "Fruit"],
    "Price": [0.99, 1.25, 0.25, 0.33, 3.00]
}

supermarket = pd.DataFrame(data = food_data)

supermarket

In [None]:
groups = supermarket.groupby("Type")
groups

In [None]:
groups.get_group("Fruit")

In [None]:
groups.get_group("Vegetable")

In [None]:
groups.mean()

## 9.2 Creating a GroupBy Object from a Dataset

In [None]:
fortune = pd.read_csv("fortune1000.csv")
fortune

In [None]:
in_retailing = fortune["Sector"] == "Retailing"
retail_companies = fortune[in_retailing]
retail_companies.head()

In [None]:
retail_companies["Revenues"].head()

In [None]:
retail_companies["Revenues"].mean()

In [None]:
sectors = fortune.groupby("Sector")

In [None]:
sectors

In [None]:
len(sectors)

In [None]:
fortune["Sector"].head()

In [None]:
fortune["Sector"].nunique()

In [None]:
sectors.size()

In [None]:
fortune["Sector"].value_counts()

## 9.3 Attributes and Methods on a GroupBy Object

In [None]:
sectors.groups

In [None]:
fortune.loc[26, "Sector"]

In [None]:
sectors.first()

In [None]:
sectors.last()

In [None]:
sectors.nth(0)

In [None]:
sectors.nth(3)

In [None]:
fortune[fortune["Sector"] == "Apparel"].head()

In [None]:
sectors.head(2)

In [None]:
sectors.tail(3)

In [None]:
sectors.get_group("Energy").head()

## 9.4 Aggregate Operations

In [None]:
sectors.sum().head(10)

In [None]:
sectors.get_group("Aerospace & Defense").head()

In [None]:
sectors.get_group("Aerospace & Defense").loc[:,"Revenues"].head()

In [None]:
sectors.get_group("Aerospace & Defense").loc[:, "Revenues"].sum()

In [None]:
sectors.mean().head()

In [None]:
sectors["Revenues"]

In [None]:
sectors["Revenues"].sum().head()

In [None]:
sectors["Employees"].mean().head()

In [None]:
sectors["Profits"].max().head()

In [None]:
sectors["Employees"].min().head()

In [None]:
aggregations = {
    "Revenues": "min",
    "Profits": "max",
    "Employees": "mean"
}

sectors.agg(aggregations).head()

## 9.5 Applying a Custom Operation to all Groups

In [None]:
fortune.nlargest(n = 5, columns = "Profits")

In [None]:
def get_largest_row(df):
    return df.nlargest(1, "Revenues")

In [None]:
sectors.apply(get_largest_row).head()

## 9.6 Grouping by Multiple Columns

In [None]:
sector_and_industry = fortune.groupby(by = ["Sector", "Industry"])

In [None]:
sector_and_industry.size()

In [None]:
sector_and_industry.get_group(("Business Services", "Education"))

In [None]:
sector_and_industry.sum().head()

In [None]:
sector_and_industry["Revenues"].mean().head(5)

## 9.7 Coding Challenge

In [None]:
cereals = pd.read_csv("cereals.csv")
cereals.head()

In [None]:
manufacturers = cereals.groupby("Manufacturer")

In [None]:
len(manufacturers)

In [None]:
manufacturers.size()

In [None]:
manufacturers.get_group("Nabisco")

In [None]:
manufacturers.mean()

In [None]:
manufacturers["Sugars"].max()

In [None]:
manufacturers["Fiber"].min()

In [None]:
def smallest_sugar_row(df):
    return df.nsmallest(1, "Sugars")

In [None]:
manufacturers.apply(smallest_sugar_row)

## 9.8 Summary