In [2]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [3]:
np.random.seed(101)

In [4]:
df = pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'],  ['X', 'Y', 'Z', 'W'])

In [5]:
df

Unnamed: 0,X,Y,Z,W
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [6]:
df['W']

A    0.503826
B    0.605965
C   -0.589001
D    0.955057
E    0.683509
Name: W, dtype: float64

In [7]:
type(df['W'])

pandas.core.series.Series

In [8]:
df[['X', 'Y']]

Unnamed: 0,X,Y
A,2.70685,0.628133
B,0.651118,-0.319318
C,-2.018168,0.740122
D,0.188695,-0.758872
E,0.190794,1.978757


In [10]:
df['New'] = df['W'] + df['Z'] 

In [13]:
df

Unnamed: 0,X,Y,Z,W,New
A,2.70685,0.628133,0.907969,0.503826,1.411795
B,0.651118,-0.319318,-0.848077,0.605965,-0.242112
C,-2.018168,0.740122,0.528813,-0.589001,-0.060187
D,0.188695,-0.758872,-0.933237,0.955057,0.021819
E,0.190794,1.978757,2.605967,0.683509,3.289476


Must specify axis to drop, otherwise
error.
Also must set inplace to True, otherwise actual data will no be removed, only display updated.

In [14]:
df.drop('New', axis=1, inplace=True)

In [15]:
df

Unnamed: 0,X,Y,Z,W
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


Two methods to get data on a row

In [16]:
df.loc['C']

X   -2.018168
Y    0.740122
Z    0.528813
W   -0.589001
Name: C, dtype: float64

In [17]:
df.iloc[2]

X   -2.018168
Y    0.740122
Z    0.528813
W   -0.589001
Name: C, dtype: float64

Get value of a cell

In [18]:
df.loc['A', 'Z']

0.9079694464765431

In [19]:
df.loc[['A', 'B'], 'Z']

A    0.907969
B   -0.848077
Name: Z, dtype: float64

## Conditional selection

In [20]:
df > 0

Unnamed: 0,X,Y,Z,W
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [21]:
df[df > 0]

Unnamed: 0,X,Y,Z,W
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [22]:
df['W'] > 0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [26]:
df[df['W'] > 0]['W']

A    0.503826
B    0.605965
D    0.955057
E    0.683509
Name: W, dtype: float64

Must use ampersand, doesn't work with 'and'

In [25]:
df[(df['W']>0) & (df['Z']>1)]

Unnamed: 0,X,Y,Z,W
E,0.190794,1.978757,2.605967,0.683509


In [28]:
df.reset_index(inplace=True)

In [36]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.497104,-0.75407
G1,2,-0.943406,0.484752
G1,3,-0.116773,1.901755
G2,1,0.238127,1.996652
G2,2,-0.993263,0.1968
G2,3,-1.136645,0.000366


## Multi index dataframe

In [30]:
outside = ['G1','G1', 'G1', 'G2', 'G2', 'G2']
inside = [1, 2, 3, 1, 2, 3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [31]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [34]:
df = pd.DataFrame(randn(6, 2), hier_index, ['A', 'B'])

In [35]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.497104,-0.75407
G1,2,-0.943406,0.484752
G1,3,-0.116773,1.901755
G2,1,0.238127,1.996652
G2,2,-0.993263,0.1968
G2,3,-1.136645,0.000366


In [38]:
df.index.names = ['Groups', 'Numbers']

In [43]:
df.loc['G2'].loc[2]['B']

0.19679950499134005