In [1]:
import pandas as pd

# Dictionary we will work on
myDict = {"fname": ["Siddhant", "Manish", "Kanai", "Ram"],
         "lname": ["Shah", "Jha", "Murmu", "Jana"],
         "age": [32, 32, 30, 33],
         "score": [9.1, 8.7, 8.9, 9.6],
         "email": ["sid@panda.com", "man@panda.com", "kan@panda.com", "ram@panda.com"]}

# creating dataframe
df = pd.DataFrame.from_dict(data=myDict)
df

Unnamed: 0,fname,lname,age,score,email
0,Siddhant,Shah,32,9.1,sid@panda.com
1,Manish,Jha,32,8.7,man@panda.com
2,Kanai,Murmu,30,8.9,kan@panda.com
3,Ram,Jana,33,9.6,ram@panda.com


In [2]:
# I hope you remember the analogy in our last blog where we compared DataFrame as higher version of Dictionary.
# Just as in dictionary we use key to access value of key, simillary in Dataframe, we use column name(or index)
# to access all elements associated with that column

# Dot Notation
# This is a faster way to access the column elements but can be only if the column name is unique and not same
# as any dataframe's attributes or methods name. We also can't access the elements of a column if it's column 
# name consists of more then 1 word and is separated by anything other then underscore. Using this notation
# also forces us to fetch elements of only one column as a time as it always retuns a Series
df.fname

0    Siddhant
1      Manish
2       Kanai
3         Ram
Name: fname, dtype: object

In [3]:
# Bracket Notation: This seems to be better and more comfortable version of accessing elements from one or more
# columns in a dataframe.
df['fname']

0    Siddhant
1      Manish
2       Kanai
3         Ram
Name: fname, dtype: object

In [4]:
# In order to fetch elements of more the 1 columns, we need to pass list of column names we need.
df[['fname', 'lname', 'score']]

Unnamed: 0,fname,lname,score
0,Siddhant,Shah,9.1
1,Manish,Jha,8.7
2,Kanai,Murmu,8.9
3,Ram,Jana,9.6


In [5]:
# Accessing Rows: In order to access rows of a dataframe, we use 2 method, loc[] and iloc[]
# iloc[]: This method allows us to fetch one or more rows using the integer index of the row. Even of the row
# has labeled index, we would have to use integer index only. 

# When we fetch just a single row, we get a Series where index are the coluns name and 
# value is the element for that particular selected row
df.iloc[0]

fname         Siddhant
lname             Shah
age                 32
score              9.1
email    sid@panda.com
Name: 0, dtype: object

In [6]:
# We can fetch multiple rows using iloc by passing list of integer indexes of the rows we want. 
# This will retun a new dataframe
df.iloc[[0, 1, 3]]

Unnamed: 0,fname,lname,age,score,email
0,Siddhant,Shah,32,9.1,sid@panda.com
1,Manish,Jha,32,8.7,man@panda.com
3,Ram,Jana,33,9.6,ram@panda.com


In [7]:
# We can even use slice to select multiple rows. (Last index is not included).
df.iloc[0:3]

Unnamed: 0,fname,lname,age,score,email
0,Siddhant,Shah,32,9.1,sid@panda.com
1,Manish,Jha,32,8.7,man@panda.com
2,Kanai,Murmu,30,8.9,kan@panda.com


In [8]:
# We can also use jumper with slice
df.iloc[0:4:2]

Unnamed: 0,fname,lname,age,score,email
0,Siddhant,Shah,32,9.1,sid@panda.com
2,Kanai,Murmu,30,8.9,kan@panda.com


In [9]:
# iloc[] takes 2nd paramter as column index. 

# If we pass a single row and single column then we get single element that belong to that particular row and column
type(df.iloc[0,0])

str

In [10]:
# Just like with row index, we can pass list of integer indexes of column that we want. This retuns a new dataframe
df.iloc[[0,2], [1, 2, 3]]

Unnamed: 0,lname,age,score
0,Shah,32,9.1
2,Murmu,30,8.9


In [11]:
# Just like with row index, we can also pass use slicing to pass integer indexes of column that we want. This retuns a new dataframe
df.iloc[0:2, 1:3]

Unnamed: 0,lname,age
0,Shah,32
1,Jha,32


In [12]:
# loc[]: This method works exactly like iloc[] with one difference. In loc[] we use labeled index instead 
# of numeric index provided labeled index are not abailable.
df.loc[0, 'fname']

'Siddhant'

In [13]:
# it is important to note that in loc[] when we are using slicing or list to provide more then 
# one rows or column, the last index is also included.
df.loc[[0,1,2,3], ['fname', 'lname', 'score', 'age']]

Unnamed: 0,fname,lname,score,age
0,Siddhant,Shah,9.1,32
1,Manish,Jha,8.7,32
2,Kanai,Murmu,8.9,30
3,Ram,Jana,9.6,33


In [14]:
df.loc[:, 'fname':'score']

Unnamed: 0,fname,lname,age,score
0,Siddhant,Shah,32,9.1
1,Manish,Jha,32,8.7
2,Kanai,Murmu,30,8.9
3,Ram,Jana,33,9.6
