## Traditional Way in Python without Pandas

In [40]:
person = {
    "first": "Ashish",
    "last": "Rai",
    "email": "ashishrai@email.com"
}

In [41]:
people = {
    "first": ["Ashish"],
    "last": ["Rai"],
    "email": ["ashishrai@email.com"]
}

In [42]:
people = {
    "first": ["Ashish", "Anjali", "Sudha"],
    "last": ["Rai", "Rai", "Sharma"],
    "email": ["ashishrai@email.com", "anjalirai@email.com", "sudhasharma@email.com"]
}

In [43]:
people #prints whole dictionary 

{'first': ['Ashish', 'Anjali', 'Sudha'],
 'last': ['Rai', 'Rai', 'Sharma'],
 'email': ['ashishrai@email.com',
  'anjalirai@email.com',
  'sudhasharma@email.com']}

In [44]:
people['email'] #prints the 'email' column

['ashishrai@email.com', 'anjalirai@email.com', 'sudhasharma@email.com']

## Similar thing using Pandas

In [45]:
import pandas as pd

### Creating DataFrame

In [46]:
df = pd.DataFrame(people)

In [47]:
df # prints whole dataframe

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


### Parsing Through a DataFrame

In [48]:
df['email'] # prints the 'email' column

0      ashishrai@email.com
1      anjalirai@email.com
2    sudhasharma@email.com
Name: email, dtype: object

In [49]:
df.email # another way to print a particular column, less suitable

0      ashishrai@email.com
1      anjalirai@email.com
2    sudhasharma@email.com
Name: email, dtype: object

In [50]:
type(df) # gives type of dataframe

pandas.core.frame.DataFrame

In [51]:
type(df['email']) # gives type of a column of dataframe, which is a series

# A series cann be known as a single column of a dataframe

pandas.core.series.Series

In [52]:
df[['last', 'first']] # gives columns particularly passed in a list (note repetiton of square brackets)

# the output will now no more be a series as there are more than one columns

Unnamed: 0,last,first
0,Rai,Ashish
1,Rai,Anjali
2,Sharma,Sudha


In [53]:
df.columns # gives indexes and datatype all columns

Index(['first', 'last', 'email'], dtype='object')

### The iloc attribute

In [54]:
df.iloc[0] # gives first row of all columns

first                 Ashish
last                     Rai
email    ashishrai@email.com
Name: 0, dtype: object

In [55]:
df.iloc[[0, 1]] # gives mentioned rows of all columns passed in a list (note repition of square brackets)

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com


In [56]:
df.iloc[[0, 1], 1] # gives mentioned rows of mentioned columns

0    Rai
1    Rai
Name: last, dtype: object

In [57]:
df.iloc[[0, 1], [1, 2]] # gives mentioned rows of mentioned columns

Unnamed: 0,last,email
0,Rai,ashishrai@email.com
1,Rai,anjalirai@email.com


### The loc attribute 

In [58]:
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [59]:
df.loc[0]

first                 Ashish
last                     Rai
email    ashishrai@email.com
Name: 0, dtype: object

In [60]:
df.loc[[0, 1]]

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com


In [61]:
df.loc[[0, 1], ['email', 'last']]

Unnamed: 0,email,last
0,ashishrai@email.com,Rai
1,anjalirai@email.com,Rai


In [62]:
df


Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


### set_index method 

In [63]:
df.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
ashishrai@email.com,Ashish,Rai
anjalirai@email.com,Anjali,Rai
sudhasharma@email.com,Sudha,Sharma


In [64]:
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


we can see thath set_index method didn't changed the index of the original dataframe itself. To do so we can write df.set_index('email', inplace=True)

In [65]:
df.set_index('email', inplace=True)
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
ashishrai@email.com,Ashish,Rai
anjalirai@email.com,Anjali,Rai
sudhasharma@email.com,Sudha,Sharma


In [66]:
df.loc['anjalirai@email.com', 'first'] # email is used as index

'Anjali'

Now we can not use the previous index values of 0,1... in loc as they have been replaced by 'email' but can be used in iloc

In [67]:
df.iloc[1, 0]

'Anjali'

If we want to reset the default index values then we can do that by using reset_index() along with passing inplace=True so that the changes actually takes place

In [68]:
df.reset_index(inplace=True)
df

Unnamed: 0,email,first,last
0,ashishrai@email.com,Ashish,Rai
1,anjalirai@email.com,Anjali,Rai
2,sudhasharma@email.com,Sudha,Sharma


## Filtering 

In [69]:
df

Unnamed: 0,email,first,last
0,ashishrai@email.com,Ashish,Rai
1,anjalirai@email.com,Anjali,Rai
2,sudhasharma@email.com,Sudha,Sharma


In [71]:
df[df['last'] == 'Sharma'] # first way with direct assignment

Unnamed: 0,email,first,last
2,sudhasharma@email.com,Sudha,Sharma


In [75]:
filt = df['last'] == 'Sharma' # second way by using a variald 'filt'
df[filt] # calling variable 'filt' first way

Unnamed: 0,email,first,last
2,sudhasharma@email.com,Sudha,Sharma


In [76]:
df.loc[filt] # second way to call 'filt' variable (using .loc) and getting same result

Unnamed: 0,email,first,last
2,sudhasharma@email.com,Sudha,Sharma


In [78]:
filt # third way to call 'filt' variable which is a Series and a series is printed here as shown

0    False
1    False
2     True
Name: last, dtype: bool