In [1]:
import numpy as np
import pandas as pd

In [2]:
outside = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2',]
inside = [1, 2, 3, 1, 2, 3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [3]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [4]:
inside

[1, 2, 3, 1, 2, 3]

In [6]:
hier_index = list(zip(outside, inside))

In [7]:
hier_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [8]:
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [9]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [12]:
df = pd.DataFrame(np.random.randn(6,2), index=hier_index, columns = ['A','B'])

In [13]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.814601,-0.529508
G1,2,0.343556,0.758817
G1,3,-2.525756,0.046668
G2,1,0.714228,0.616679
G2,2,-1.722851,1.398777
G2,3,1.226678,-0.035926


In [14]:
df['A']

G1  1   -0.814601
    2    0.343556
    3   -2.525756
G2  1    0.714228
    2   -1.722851
    3    1.226678
Name: A, dtype: float64

In [17]:
df.loc['G1']

Unnamed: 0,A,B
1,-0.814601,-0.529508
2,0.343556,0.758817
3,-2.525756,0.046668


In [18]:
df.loc['G1'].loc[1]

A   -0.814601
B   -0.529508
Name: 1, dtype: float64

In [19]:
df.index.names

FrozenList([None, None])

In [27]:
df.index.names = ['Grupo', 'Número']

In [28]:
df.index.names

FrozenList(['Grupo', 'Número'])

In [29]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grupo,Número,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.814601,-0.529508
G1,2,0.343556,0.758817
G1,3,-2.525756,0.046668
G2,1,0.714228,0.616679
G2,2,-1.722851,1.398777
G2,3,1.226678,-0.035926


In [30]:
df.xs('G1')

Unnamed: 0_level_0,A,B
Número,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.814601,-0.529508
2,0.343556,0.758817
3,-2.525756,0.046668


In [31]:
df.xs(1,level='Número')

Unnamed: 0_level_0,A,B
Grupo,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.814601,-0.529508
G2,0.714228,0.616679


In [32]:
d = {'A':[1,2,np.nan], 'B':[5,np.nan,np.nan], 'C':[1,2,3]}

In [33]:
d

{'A': [1, 2, nan], 'B': [5, nan, nan], 'C': [1, 2, 3]}

In [34]:
df = pd.DataFrame(d)

In [35]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [36]:
df.dropna()

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [38]:
df.dropna()

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [42]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [43]:
df.fillna(value='Fill na')

Unnamed: 0,A,B,C
0,1,5,1
1,2,Fill na,2
2,Fill na,Fill na,3


In [48]:
df.fillna(value=df['A'].mean())

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,1.5,2
2,1.5,1.5,3


In [47]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

In [49]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [50]:
df.fillna(method='ffill')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,5.0,2
2,2.0,5.0,3


In [51]:
data = {'Empresa':['GOOG','GOOG','MSFT','MSFT','FB','FB'],'Nome':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],'Venda':[200,120,340,124,243,350]}

In [52]:
data

{'Empresa': ['GOOG', 'GOOG', 'MSFT', 'MSFT', 'FB', 'FB'],
 'Nome': ['Sam', 'Charlie', 'Amy', 'Vanessa', 'Carl', 'Sarah'],
 'Venda': [200, 120, 340, 124, 243, 350]}

In [55]:
dfr = pd.DataFrame(data)

In [56]:
dfr

Unnamed: 0,Empresa,Nome,Venda
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [58]:
group = dfr.groupby('Empresa')

In [59]:
group

<pandas.core.groupby.DataFrameGroupBy object at 0x7ff24a23cf28>

In [60]:
group.sum()

Unnamed: 0_level_0,Venda
Empresa,Unnamed: 1_level_1
FB,593
GOOG,320
MSFT,464


In [64]:
group.mean()

Unnamed: 0_level_0,Venda
Empresa,Unnamed: 1_level_1
FB,296.5
GOOG,160.0
MSFT,232.0


In [65]:
group.describe()

Unnamed: 0_level_0,Venda,Venda,Venda,Venda,Venda,Venda,Venda,Venda
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Empresa,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [66]:
group.count()

Unnamed: 0_level_0,Nome,Venda
Empresa,Unnamed: 1_level_1,Unnamed: 2_level_1
FB,2,2
GOOG,2,2
MSFT,2,2


In [67]:
group = dfr.groupby('Nome')

In [69]:
group.sum()

Unnamed: 0_level_0,Venda
Nome,Unnamed: 1_level_1
Amy,340
Carl,243
Charlie,120
Sam,200
Sarah,350
Vanessa,124


In [70]:
group.sum().loc['Amy']

Venda    340
Name: Amy, dtype: int64