In [49]:
import numpy as np
import pandas as pd
from numpy.random import randn

#pandas must be imported along with numpy

In [50]:
np.random.seed(101)

In [51]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])
df

#A-E are the row and W-Z are the columns(series)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [52]:
#Calling the W series

df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [53]:
#Check the type of W

type(df['W'])

pandas.core.series.Series

In [54]:
#Check the type of df

type(df)

pandas.core.frame.DataFrame

In [55]:
#Pass in a list, use double [] and get back a dataframe

df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [56]:
#Create a new field 'new' on the fly

df['new'] = df['W']+df['Y']
df['new']

A    3.614819
B   -0.196959
C   -1.489355
D   -0.744542
E    2.796762
Name: new, dtype: float64

In [57]:
#Now the 'new' column will be in place

df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [58]:
#To drop the 'new'column, need to use axis=1 (for column), else axis=0 or not mentioned means drop a row

df.drop('new',axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [59]:
#As shown, 'new' column is still in memory, need to use the Inplace=Y to fully remove this 

df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [60]:
df.drop('new',axis=1,inplace=True)

In [61]:
#'new' is no longer there

df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [63]:
#To drop a row

df.drop('E',axis=0, inplace=True)

In [64]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [65]:
#Dataframe is really a cover on top of numpy and adopt the same row, column convention

df.shape

(4, 4)

In [66]:
#Calling rows is different from columns(series). Use Loc or iloc methods

#To call the row based on the row name,use loc

df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [67]:
#To use the index of the row, use iloc
#To get row C use index 2 (start with zero)

df.iloc[2]

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [69]:
#To get subset of rows and columns
#To get the value of row B and column Y

df.loc['B','Y']

-0.8480769834036315

In [70]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [71]:
#To get multiple values

df.loc[['A','C'],['W','Y']]


Unnamed: 0,W,Y
A,2.70685,0.907969
C,-2.018168,0.528813
