# Pandas DataFrames

In [None]:
#DataFrames are built on top of Pandas Series

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [None]:
#seed below helps us in generating same random numbers everytime it is called

In [15]:
np.random.seed(101)

In [13]:
from numpy.random import randn

In [19]:
df = pd.DataFrame(data=randn(5,4),index=['A','B','C','D','E'],columns=['W','X','Y','Z'])

In [20]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [None]:
#Thus DataFrame is a bunch of Series that share common indexes

In [None]:
#In the above dataFrame, each of the column is a Series and each of the row (indexes) is also a Series

In [None]:
#Let us fetch a single column W

In [22]:
df['W'] #This returns a series

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

In [None]:
#Let us fetch the columns W and Y
#This returns another DataFrame

In [23]:
df[['W','Y']]

Unnamed: 0,W,Y
A,0.302665,-1.706086
B,-0.134841,0.166905
C,0.807706,0.638787
D,-0.497104,-0.943406
E,-0.116773,0.238127


In [None]:
#Let us add a new column to df

In [40]:
df['new']=df['W']+df['Y']

In [41]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.302665,1.693723,-1.706086,-1.159119,-1.40342
B,-0.134841,0.390528,0.166905,0.184502,0.032064
C,0.807706,0.07296,0.638787,0.329646,1.446493
D,-0.497104,-0.75407,-0.943406,0.484752,-1.44051
E,-0.116773,1.901755,0.238127,1.996652,0.121354


In [26]:
#To drop this column

In [36]:
df.drop('new') #This errors because by default axis=0 and that means it refers to indexes i.e., rows.
#For columns axis=1 and need to be specified

KeyError: "['new'] not found in axis"

In [42]:
df.drop('new',axis=1)

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [None]:
#Though we have dropped 'new', when we print df we can see that it still exists
#This is because, by default inplace arg is set to False

In [43]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.302665,1.693723,-1.706086,-1.159119,-1.40342
B,-0.134841,0.390528,0.166905,0.184502,0.032064
C,0.807706,0.07296,0.638787,0.329646,1.446493
D,-0.497104,-0.75407,-0.943406,0.484752,-1.44051
E,-0.116773,1.901755,0.238127,1.996652,0.121354


In [44]:
#Let us set the inplace arg to True in the df
df.drop('new',axis=1,inplace=True)

In [None]:
#Now we could see that 'new' column is dropped

In [45]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [48]:
#To drop a column 'E'
df.drop('E',inplace=True)
#Note that we have'nt mentioned axis in above statement because by default axis=0 and in case of rows, axis=0

In [49]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752


In [None]:
#Let us now fetch rows. We can do this in 2 ways, using loc and iloc

In [50]:
df.loc['A']

W    0.302665
X    1.693723
Y   -1.706086
Z   -1.159119
Name: A, dtype: float64

In [51]:
df.loc[['A','C']]

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
C,0.807706,0.07296,0.638787,0.329646


In [None]:
#With iloc we pass the index location

In [52]:
df.iloc[0]

W    0.302665
X    1.693723
Y   -1.706086
Z   -1.159119
Name: A, dtype: float64

In [53]:
df.iloc[[0,2]]

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
C,0.807706,0.07296,0.638787,0.329646


In [54]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752


In [None]:
#Let us say, we want to grab B,C rows and W,Y columns

In [55]:
df.loc[['B','C'],['W','Y']]

Unnamed: 0,W,Y
B,-0.134841,0.166905
C,0.807706,0.638787
