## DataFrame

In [2]:
import pandas as pd
import numpy as np

In [3]:
np.random.seed(101)

In [4]:
df = pd.DataFrame(np.random.randn(5,4), index='A B C D E'.split(), columns='W X Y Z'.split())

In [5]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


### Selection and indexing

In [6]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [7]:
type(df['W'])

pandas.core.series.Series

In [8]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [9]:
df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [10]:
df['new'] = df['W'] + df['X']

In [11]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [12]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [13]:
df.drop('new', axis=1, inplace=True)

In [14]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [15]:
# index and columns
df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [16]:
df.loc[['A','B'], ['X','Y','Z']]

Unnamed: 0,X,Y,Z
A,0.628133,0.907969,0.503826
B,-0.319318,-0.848077,0.605965


In [17]:
df.iloc[1:4, 2:]

Unnamed: 0,Y,Z
B,-0.848077,0.605965
C,0.528813,-0.589001
D,-0.933237,0.955057


### Conditional data selection

In [18]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [19]:
bol = df > 0

In [20]:
df[bol]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [21]:
df[df['W'] > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [22]:
df[df['W'] > 0]['Y']

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [23]:
bol = df['W'] > 0
df2 = df[bol]
df2['Y']

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [24]:
df[(df['W'] > 0) & (df['Y'] > 1)]

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


### Reset index

In [25]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [26]:
df.reset_index(inplace=True)

In [27]:
col = 'RS RJ SP AM SC'.split()

In [28]:
col

['RS', 'RJ', 'SP', 'AM', 'SC']

In [29]:
df['Estado'] = col

In [30]:
df

Unnamed: 0,index,W,X,Y,Z,Estado
0,A,2.70685,0.628133,0.907969,0.503826,RS
1,B,0.651118,-0.319318,-0.848077,0.605965,RJ
2,C,-2.018168,0.740122,0.528813,-0.589001,SP
3,D,0.188695,-0.758872,-0.933237,0.955057,AM
4,E,0.190794,1.978757,2.605967,0.683509,SC


In [31]:
df.set_index('Estado', inplace=True)

In [32]:
df

Unnamed: 0_level_0,index,W,X,Y,Z
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RS,A,2.70685,0.628133,0.907969,0.503826
RJ,B,0.651118,-0.319318,-0.848077,0.605965
SP,C,-2.018168,0.740122,0.528813,-0.589001
AM,D,0.188695,-0.758872,-0.933237,0.955057
SC,E,0.190794,1.978757,2.605967,0.683509


### Index and Multi-Index

In [43]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [40]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [47]:
inside

[1, 2, 3, 1, 2, 3]

In [48]:
hier_index = list(zip(outside,inside))

In [49]:
hier_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [50]:
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [51]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [54]:
df = pd.DataFrame(np.random.randn(6,2), index = hier_index, columns = ['A', 'B'])

In [55]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.497104,-0.75407
G1,2,-0.943406,0.484752
G1,3,-0.116773,1.901755
G2,1,0.238127,1.996652
G2,2,-0.993263,0.1968
G2,3,-1.136645,0.000366


In [57]:
df.loc['G1'].loc[1]

A   -0.497104
B   -0.754070
Name: 1, dtype: float64

In [59]:
df.index.names = ['Grupo', 'Numero']

In [60]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grupo,Numero,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.497104,-0.75407
G1,2,-0.943406,0.484752
G1,3,-0.116773,1.901755
G2,1,0.238127,1.996652
G2,2,-0.993263,0.1968
G2,3,-1.136645,0.000366


In [62]:
df.xs(1, level = 'Numero')

Unnamed: 0_level_0,A,B
Grupo,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.497104,-0.75407
G2,0.238127,1.996652
