In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(101)

In [45]:
df = pd.DataFrame(data = np.random.randn(5,4), index = ['A','B','C','D','E'], columns = 'W X Y Z'.split())
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [7]:
df['W']

A    0.386030
B    0.681209
C   -1.005187
D   -1.382920
E    0.992573
Name: W, dtype: float64

In [8]:
type(df['W'])

pandas.core.series.Series

In [10]:
df[['W','Z']]

Unnamed: 0,W,Z
A,0.38603,0.230336
B,0.681209,1.939932
C,-1.005187,-0.732845
D,-1.38292,-2.141212
E,0.992573,1.292765


In [11]:
type(df[['W','Z']])

pandas.core.frame.DataFrame

In [46]:
df['New'] = df['W'] + df['Y']
df

Unnamed: 0,W,X,Y,Z,New
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


In [13]:
df.drop('New', axis = 1)

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
B,0.681209,1.035125,-0.03116,1.939932
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212
E,0.992573,1.192241,-1.04678,1.292765


In [14]:
df.drop('E', axis = 0)

Unnamed: 0,W,X,Y,Z,New
A,0.38603,2.084019,-0.376519,0.230336,0.009512
B,0.681209,1.035125,-0.03116,1.939932,0.650049
C,-1.005187,-0.74179,0.187125,-0.732845,-0.818062
D,-1.38292,1.482495,0.961458,-2.141212,-0.421462


Getting row

In [16]:
# By label of index

df.loc['A']

W      0.386030
X      2.084019
Y     -0.376519
Z      0.230336
New    0.009512
Name: A, dtype: float64

In [17]:
# By location of index

df.iloc[0]

W      0.386030
X      2.084019
Y     -0.376519
Z      0.230336
New    0.009512
Name: A, dtype: float64

In [18]:
# Slicing

df.loc['D']['Y']

0.9614581560918355

In [19]:
df.loc['D','Y']

0.9614581560918355

In [20]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,0.38603,-0.376519
B,0.681209,-0.03116


Conditional Selection    

In [21]:
df > 0

Unnamed: 0,W,X,Y,Z,New
A,True,True,False,True,True
B,True,True,False,True,True
C,False,False,True,False,False
D,False,True,True,False,False
E,True,True,False,True,False


In [22]:
df[df > 0]

Unnamed: 0,W,X,Y,Z,New
A,0.38603,2.084019,,0.230336,0.009512
B,0.681209,1.035125,,1.939932,0.650049
C,,,0.187125,,
D,,1.482495,0.961458,,
E,0.992573,1.192241,,1.292765,


In [23]:
df['W'] > 0

A     True
B     True
C    False
D    False
E     True
Name: W, dtype: bool

In [24]:
df[df['W'] > 0]

Unnamed: 0,W,X,Y,Z,New
A,0.38603,2.084019,-0.376519,0.230336,0.009512
B,0.681209,1.035125,-0.03116,1.939932,0.650049
E,0.992573,1.192241,-1.04678,1.292765,-0.054206


In [27]:
df[df['W'] > 0]['X']

A    2.084019
B    1.035125
E    1.192241
Name: X, dtype: float64

In [29]:
df[df['W'] > 0][['X','Y']]

Unnamed: 0,X,Y
A,2.084019,-0.376519
B,1.035125,-0.03116
E,1.192241,-1.04678


Multiple Conditions

In [31]:
df[(df['W'] > 0) & (df['Y'] < 0)]

Unnamed: 0,W,X,Y,Z,New
A,0.38603,2.084019,-0.376519,0.230336,0.009512
B,0.681209,1.035125,-0.03116,1.939932,0.650049
E,0.992573,1.192241,-1.04678,1.292765,-0.054206


In [32]:
df[(df['W'] > 0) | (df['Y'] < 0)]

Unnamed: 0,W,X,Y,Z,New
A,0.38603,2.084019,-0.376519,0.230336,0.009512
B,0.681209,1.035125,-0.03116,1.939932,0.650049
E,0.992573,1.192241,-1.04678,1.292765,-0.054206


In [33]:
df

Unnamed: 0,W,X,Y,Z,New
A,0.38603,2.084019,-0.376519,0.230336,0.009512
B,0.681209,1.035125,-0.03116,1.939932,0.650049
C,-1.005187,-0.74179,0.187125,-0.732845,-0.818062
D,-1.38292,1.482495,0.961458,-2.141212,-0.421462
E,0.992573,1.192241,-1.04678,1.292765,-0.054206


In [34]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z,New
0,A,0.38603,2.084019,-0.376519,0.230336,0.009512
1,B,0.681209,1.035125,-0.03116,1.939932,0.650049
2,C,-1.005187,-0.74179,0.187125,-0.732845,-0.818062
3,D,-1.38292,1.482495,0.961458,-2.141212,-0.421462
4,E,0.992573,1.192241,-1.04678,1.292765,-0.054206


In [35]:
df

Unnamed: 0,W,X,Y,Z,New
A,0.38603,2.084019,-0.376519,0.230336,0.009512
B,0.681209,1.035125,-0.03116,1.939932,0.650049
C,-1.005187,-0.74179,0.187125,-0.732845,-0.818062
D,-1.38292,1.482495,0.961458,-2.141212,-0.421462
E,0.992573,1.192241,-1.04678,1.292765,-0.054206


In [37]:
new_index = 'CA NY WY OR CO'.split()
new_index

['CA', 'NY', 'WY', 'OR', 'CO']

In [38]:
df['States'] = new_index

In [47]:
df

Unnamed: 0,W,X,Y,Z,New
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


In [44]:
df.set_index('States', inplace=True)
df

Unnamed: 0_level_0,W,X,Y,Z,New
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CA,0.38603,2.084019,-0.376519,0.230336,0.009512
NY,0.681209,1.035125,-0.03116,1.939932,0.650049
WY,-1.005187,-0.74179,0.187125,-0.732845,-0.818062
OR,-1.38292,1.482495,0.961458,-2.141212,-0.421462
CO,0.992573,1.192241,-1.04678,1.292765,-0.054206


In [48]:
df

Unnamed: 0,W,X,Y,Z,New
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818
