## Pivot

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'rand':np.random.randint(low=5,size=5),
                   'ints':np.arange(5),
                   'strgs':['Hi'+str(i) for i in range(5)],
                   'let':['L','L','D','G','L']})
print(df)

In [None]:
# With no repeated index the Pivot command just generate a 
# DataFrame with values in the diagonal

dfp = df.pivot(index='ints',columns='strgs',values='rand')
print(dfp)

In [None]:
#  repeated indices fill corresponding columns 
dfri = df.pivot(index='let',columns='strgs',values='rand')
print(dfri)

In [None]:
# repeated columns 
dfrc = df.pivot(index='let',columns='rand',values='strgs')
print(dfrc)

In [None]:
df2 = pd.DataFrame({'Item': ['Item0','Item0','Item0','Item1'],
                      'CType': ['gold','bronze','gold','silver'], 
                      'USD': ['1', '2', '3', '4'],
                      'EU': ['1', '2', '3', '4']})
print(df2)

In [None]:
# index and column cannot be duplicated
dfe = df2.pivot(index='Item', columns='CType', values='USD')
print(dfe)

## Groupby

In [None]:
dfg = pd.DataFrame({'key1': ['a','a','b','b','a'],
                  'key2': ['one','two','one','two','one'], 
                  'data1': np.random.randn(5),
                  'data2': np.random.randn(5)})
print(dfg)

In [None]:
# groupby generates an object
print(dfg.groupby('key1'))
# to see the groups
print(dfg.groupby('key1').groups)

In [None]:
# groups can be created using multiple indices
print(dfg.groupby(['key1','key2']).groups)

In [None]:
# interacting with groupby object
g = dfg.groupby('key1')

for name,group in g:
    print(name)
    print(group)
    
print(g.get_group('a'))

In [None]:
# computing the mean of data1 column using group labels from key1
g = dfg['data1'].groupby(dfg['key1'])
for name,group in g:
    print(name)
    print(group)

print(g.mean())

## Transformation

In [None]:
people = pd.DataFrame(np.arange(25).reshape(5,5),
               columns=['a','b','c','d','e'], 
               index=['Joe','Steve','Wes','Jim','Travis'])
print(people)

In [None]:
# Agregating using an "external" array
key=['one','one','two','two','one']
print(people.groupby(key).mean())

In [None]:
# result using transform
print(people.groupby(key).transform(np.mean))

## Apply

In [None]:
people2 = pd.DataFrame(np.random.randint(low=4,size=25).reshape(5,5),
               columns=['a','b','c','d','e'], 
               index=['Joe','Steve','Wes','Jim','Travis'])
print(people2)

In [None]:
# sorting by column 'c' and getting the two last rows
def top(df):
    return df.sort_values(by='c')[-2:]

print(top(people2))

In [None]:
print(people2.groupby('a').apply(top))

In [None]:
## introducing NaNs
ijrand = np.random.randint(low=5,size=(3,2))
i = people2.index[ijrand[:,0]]
j = people2.columns[ijrand[:,1]]
people2.set_value(i,j,np.nan)
print(people2)

In [None]:
# filling the NaNs with averages per group
fill_mean = lambda g : g.fillna(g.mean())
people3 = people2.apply(fill_mean)
print(people3)
