## Python Pandas examples

In [44]:
import pandas as pd

# Don't put line breaks in output
pd.set_option('display.expand_frame_repr', False)

In [45]:
# Read from a csv file into a dataframe
df = pd.read_csv("InputData.csv")
df

Unnamed: 0,Name,Age,Shelf location
0,Eggs,2,A1
1,Bread,6,A4
2,Milk,14,A1
3,Bananas,1,B5
4,Cheese,12,A4
5,Yogurt,12,A4


In [46]:
# Sort first by shelf location and then by age
df.sort_values(['Shelf location','Age'])

Unnamed: 0,Name,Age,Shelf location
0,Eggs,2,A1
2,Milk,14,A1
1,Bread,6,A4
4,Cheese,12,A4
5,Yogurt,12,A4
3,Bananas,1,B5


In [47]:
# Group and then find addresses which don't have adults 
gk = df.groupby(['Shelf location'])

# gk is a GroupBy object

In [48]:
# For each shelf, find the age of the oldest object
output = gk['Age'].max()
# Note that Name has been dropped as a "nuisance column"
output

Shelf location
A1    14
A4    12
B5     1
Name: Age, dtype: int64

In [49]:
# <output> is a Pandas series i.e. a one-dimensional ndarray with axis labels
isinstance(output, pd.Series)

True

In [50]:
# Now to find the names of these objects in output
# We can use .isin, and to do that the values of the series need to be the index of the df dataframe
# so 
df.set_index('Shelf location', inplace=True)
df

Unnamed: 0_level_0,Name,Age
Shelf location,Unnamed: 1_level_1,Unnamed: 2_level_1
A1,Eggs,2
A4,Bread,6
A1,Milk,14
B5,Bananas,1
A4,Cheese,12
A4,Yogurt,12


In [51]:
df2 = df.isin(output)
df2

Unnamed: 0_level_0,Name,Age
Shelf location,Unnamed: 1_level_1,Unnamed: 2_level_1
A1,False,False
A4,False,False
A1,False,True
B5,False,True
A4,False,True
A4,False,True


In [52]:
df2.shape

(6, 2)

In [53]:
df3=df[df2['Age']==True]
df3

Unnamed: 0_level_0,Name,Age
Shelf location,Unnamed: 1_level_1,Unnamed: 2_level_1
A1,Milk,14
B5,Bananas,1
A4,Cheese,12
A4,Yogurt,12
