For Pandas reference, see the [Python Pandas website](http://pandas.pydata.org/) and book *'Python for Data Analysis'* by Wes McKinney.

In [1]:
import pandas as pd
import numpy as np

*Question:* How do I filter and aggregate rows in a pandas dataframe?

In [2]:
def get_product_df():
    "get product dataframe"
    df = pd.DataFrame([['Dove', 18.2, 30, '2017-05-01'], ['Dove', 23.2, 40, '2017-06-01'],
            ['Dove', 21.4, 32, '2017-06-03'], ['Spam', 7.2, 20, '2017-06-11']])
    df.columns = ['Name', 'Price', 'Shares', 'Date']
    return df

In [3]:
df = get_product_df()  # get original dataframe
print(df)

   Name  Price  Shares        Date
0  Dove   18.2      30  2017-05-01
1  Dove   23.2      40  2017-06-01
2  Dove   21.4      32  2017-06-03
3  Spam    7.2      20  2017-06-11


In [4]:
df = df[df['Date'] > '2017-05-31']       # filter by Date
df['Cost'] = df['Price'] * df['Shares']  # compute Cost, set into new column
print(df)

   Name  Price  Shares        Date   Cost
1  Dove   23.2      40  2017-06-01  928.0
2  Dove   21.4      32  2017-06-03  684.8
3  Spam    7.2      20  2017-06-11  144.0


In [5]:
df2 = df.groupby('Name').agg(np.sum)    # group by Name and apply sum aggregate function
df2 = df2[['Shares', 'Cost']]       # filter out Price column, get total Shares and Cost
print(df2)

      Shares    Cost
Name                
Dove      72  1612.8
Spam      20   144.0


See [Pandas groupby docs](https://pandas.pydata.org/pandas-docs/stable/groupby.html) for more details.