# 🐼 Introduction To Pandas

In [1]:
# importing libraries
import pandas as pd

In [2]:
# checking that pandas has been loaded correctly
pd

<module 'pandas' from 'c:\\users\\anish\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\pandas\\__init__.py'>

## Making Pandas DataFrame from Python Dictionaries

In [3]:
people = {
    'first_name': ['Alan', 'Tim', 'Grace', 'John'],
    'last_name': ['Turing', 'Berners-Lee', 'Hopper', 'V. Neuman'],
    'birth_place': ['UK', 'UK', 'USA', 'Hungary']
}

In [4]:
people['first_name']

['Alan', 'Tim', 'Grace', 'John']

In [5]:
people['birth_place']

['UK', 'UK', 'USA', 'Hungary']

In [6]:
data = pd.DataFrame(people)
data

Unnamed: 0,first_name,last_name,birth_place
0,Alan,Turing,UK
1,Tim,Berners-Lee,UK
2,Grace,Hopper,USA
3,John,V. Neuman,Hungary


In [15]:
data.iloc[:, [1, 2]]

Unnamed: 0,last_name,birth_place
0,Turing,UK
1,Berners-Lee,UK
2,Hopper,USA
3,V. Neuman,Hungary


In [8]:
data['last_name']

0         Turing
1    Berners-Lee
2         Hopper
3      V. Neuman
Name: last_name, dtype: object

In [9]:
data['first_name']

0     Alan
1      Tim
2    Grace
3     John
Name: first_name, dtype: object

In [10]:
data['birth_place']

0         UK
1         UK
2        USA
3    Hungary
Name: birth_place, dtype: object

In [11]:
# We can view the columns in a Data Frame
data.columns

Index(['first_name', 'last_name', 'birth_place'], dtype='object')

## Accessing Elements

We can access individual cells, rows and columns using the `iloc` property of the DataFrame object.

In [12]:
# Accessing individual cell
data.iloc[0, 0]

'Alan'

In [13]:
data.iloc[0, 1]

'Turing'

In [14]:
# Accessinng entire rows
data.iloc[0, :]

first_name       Alan
last_name      Turing
birth_place        UK
Name: 0, dtype: object

In [15]:
# Accessing entire column 
data.iloc[:, 0]

0     Alan
1      Tim
2    Grace
3     John
Name: first_name, dtype: object

In [16]:
data.iloc[:, 2]

0         UK
1         UK
2        USA
3    Hungary
Name: birth_place, dtype: object

In [35]:
# Accessing Multiple Rows
data.iloc[[1, 0], ]

Unnamed: 0,first_name,last_name,birth_place
1,Tim,Berners-Lee,UK
0,Alan,Turing,UK


In [30]:
# Accessing Multiple Columns
data.iloc[:, [0]]

Unnamed: 0,first_name
0,Alan
1,Tim
2,Grace
3,John


In [36]:
data.iloc[:, [1, 2]]

Unnamed: 0,last_name,birth_place
0,Turing,UK
1,Berners-Lee,UK
2,Hopper,USA
3,V. Neuman,Hungary


In [37]:
# Accessing particular rows and columns
data.iloc[[2, 0], [2, 1]]

Unnamed: 0,birth_place,last_name
2,USA,Hopper
0,UK,Turing


## Using `loc` to access columns through names rather than numeric location

In [47]:
# We can also access columns through their unique names (identifiers)
data.loc[[2, 1], ['last_name', 'first_name']]

Unnamed: 0,last_name,first_name
2,Hopper,Grace
1,Berners-Lee,Tim


## Creating Custom Indices in DataFrames

In [48]:
people = {
    'first_name': ['Alan', 'Tim', 'Grace', 'John'],
    'last_name': ['Turing', 'Berners-Lee', 'Hopper', 'V. Neuman'],
    'birth_place': ['UK', 'UK', 'USA', 'Hungary'],
    'email': ['alan_turing@outlook.com', 'time.lee@cern.ch', 'grace.hopper@gmail.com', 'john_n@gmail.com']
}

In [49]:
data = pd.DataFrame(people)
data

Unnamed: 0,first_name,last_name,birth_place,email
0,Alan,Turing,UK,alan_turing@outlook.com
1,Tim,Berners-Lee,UK,time.lee@cern.ch
2,Grace,Hopper,USA,grace.hopper@gmail.com
3,John,V. Neuman,Hungary,john_n@gmail.com


In [50]:
# accessing the email column 
data['email']

0    alan_turing@outlook.com
1           time.lee@cern.ch
2     grace.hopper@gmail.com
3           john_n@gmail.com
Name: email, dtype: object

In [51]:
# accessing rows using the row index number
data.iloc[0, ]

first_name                        Alan
last_name                       Turing
birth_place                         UK
email          alan_turing@outlook.com
Name: 0, dtype: object

In [54]:
# setting the key as email
data.set_index('email', inplace=True)

In [55]:
data

Unnamed: 0_level_0,first_name,last_name,birth_place
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
alan_turing@outlook.com,Alan,Turing,UK
time.lee@cern.ch,Tim,Berners-Lee,UK
grace.hopper@gmail.com,Grace,Hopper,USA
john_n@gmail.com,John,V. Neuman,Hungary


In [56]:
# accessing the index
data.index

Index(['alan_turing@outlook.com', 'time.lee@cern.ch', 'grace.hopper@gmail.com',
       'john_n@gmail.com'],
      dtype='object', name='email')

In [57]:
data.loc['alan_turing@outlook.com']

first_name       Alan
last_name      Turing
birth_place        UK
Name: alan_turing@outlook.com, dtype: object

In [58]:
data.iloc[0,]

first_name       Alan
last_name      Turing
birth_place        UK
Name: alan_turing@outlook.com, dtype: object

In [67]:
# accessing particular cell using loc 
data.loc['alan_turing@outlook.com', 'first_name']

'Alan'

In [69]:
# If we wish to revert back to the original indexes (0 integer based)
data.reset_index(inplace=True)

In [76]:
data.index = pd.RangeIndex(start=10, stop=18, step=2)
data.index

RangeIndex(start=10, stop=18, step=2)

In [77]:
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [78]:
data.iloc[0, :]

email          alan_turing@outlook.com
first_name                        Alan
last_name                       Turing
birth_place                         UK
Name: 10, dtype: object

In [79]:
data.loc[10, :]

email          alan_turing@outlook.com
first_name                        Alan
last_name                       Turing
birth_place                         UK
Name: 10, dtype: object

In [81]:
data['last_name'] == 'Hopper'

10    False
12    False
14     True
16    False
Name: last_name, dtype: bool

In [82]:
data['first_name'] == 'Grace'

10    False
12    False
14     True
16    False
Name: first_name, dtype: bool

In [83]:
# filtering DataFrame
mask = (data['last_name'] == 'Turing') & (data['first_name'] == 'Alan')
mask

10     True
12    False
14    False
16    False
dtype: bool

In [85]:
data[mask]

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK


In [86]:
#using .loc also gives the same result
data.loc[mask, ['last_name', 'first_name']]

Unnamed: 0,last_name,first_name
10,Turing,Alan


In [87]:
# or operator
mask = (data['first_name'] == 'John') | (data['last_name'] == 'Hopper')
mask

10    False
12    False
14     True
16     True
dtype: bool

In [88]:
data.loc[mask]

Unnamed: 0,email,first_name,last_name,birth_place
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [92]:
# negative of a mask 
mask = ~mask
mask

10     True
12     True
14    False
16    False
dtype: bool

In [93]:
data.loc[mask]

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK


## Updating Data

In [94]:
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [95]:
data.columns

Index(['email', 'first_name', 'last_name', 'birth_place'], dtype='object')

In [96]:
data.columns = ['email_id', 'first', 'last', 'bp']

In [97]:
data

Unnamed: 0,email_id,first,last,bp
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [98]:
data.columns = ['email_id', 'first_name', 'last_name', 'birth_place']

In [99]:
data

Unnamed: 0,email_id,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [100]:
# making all columns Upper Case
data.columns = [column.upper() for column in data.columns]

In [101]:
data

Unnamed: 0,EMAIL_ID,FIRST_NAME,LAST_NAME,BIRTH_PLACE
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [102]:
# replacing underscores with spaces
data.columns = data.columns.str.replace('_', ' ')
data

Unnamed: 0,EMAIL ID,FIRST NAME,LAST NAME,BIRTH PLACE
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [103]:
# replacing all space characters with underscore
data.columns = data.columns.str.replace(' ', '_')
data

Unnamed: 0,EMAIL_ID,FIRST_NAME,LAST_NAME,BIRTH_PLACE
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [104]:
# replacing upper case with lower case
data.columns = [column.lower() for column in data.columns]
data

Unnamed: 0,email_id,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [108]:
data.email_id

10    alan_turing@outlook.com
12           time.lee@cern.ch
14     grace.hopper@gmail.com
16           john_n@gmail.com
Name: email_id, dtype: object

In [109]:
# We can now access directly using columnn names
data.first_name

10     Alan
12      Tim
14    Grace
16     John
Name: first_name, dtype: object

In [110]:
data.email_id

10    alan_turing@outlook.com
12           time.lee@cern.ch
14     grace.hopper@gmail.com
16           john_n@gmail.com
Name: email_id, dtype: object

In [111]:
# Renaming a Few Columns
data.rename(columns={'email_id': 'email'}, inplace=True)
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


## Updating Data in Rows

In [112]:
# changing an entire row
data.iloc[1] = ['email', 'john', 'doe', 'bermuda triangle']
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,email,john,doe,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [113]:
# changing only a few specific columns
data.loc[12, ['first_name', 'email']] = ['Tim', 'tim@cern.ch']
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,tim@cern.ch,Tim,doe,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [114]:
# changing a single cell
data.loc[12, ['last_name']] = 'apple'
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,tim@cern.ch,Tim,apple,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [115]:
# using at to look up single cell values
data.at[12, 'last_name'] = 'Berners-Lee'
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [116]:
# seeking specific value using birth place
uk_filter = data['birth_place'] == 'UK'
uk_filter

10     True
12    False
14    False
16    False
Name: birth_place, dtype: bool

In [117]:
data.loc[uk_filter]

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK


In [118]:
data[uk_filter]

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK


In [119]:
# This will not work
data[uk_filter]['last_name'] = 'Smith'
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [120]:
# to actually make a change we need to use .loc
data.loc[uk_filter, 'last_name'] = 'Smith'
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Smith,UK
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


## String Operations

In [121]:
data['email'] = data['email'].str.upper()
data

Unnamed: 0,email,first_name,last_name,birth_place
10,ALAN_TURING@OUTLOOK.COM,Alan,Smith,UK
12,TIM@CERN.CH,Tim,Berners-Lee,bermuda triangle
14,GRACE.HOPPER@GMAIL.COM,Grace,Hopper,USA
16,JOHN_N@GMAIL.COM,John,V. Neuman,Hungary


In [124]:
# converting to lower case
data['email'] = data['email'].str.lower()
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Smith,UK
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


### Apply Method

In [68]:
# applying a function to every item in a series
data['email'].apply(len)

0    23
1    11
2    22
3    16
Name: email, dtype: int64

In [125]:
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Smith,UK
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [126]:
def update_email(email: str) -> str:
    return email.upper()

In [127]:
# using external function with apply
data['email'].apply(update_email)

10    ALAN_TURING@OUTLOOK.COM
12                TIM@CERN.CH
14     GRACE.HOPPER@GMAIL.COM
16           JOHN_N@GMAIL.COM
Name: email, dtype: object

In [128]:
# to change the value of a series, we can set it
data['email'] = data['email'].apply(update_email)
data

Unnamed: 0,email,first_name,last_name,birth_place
10,ALAN_TURING@OUTLOOK.COM,Alan,Smith,UK
12,TIM@CERN.CH,Tim,Berners-Lee,bermuda triangle
14,GRACE.HOPPER@GMAIL.COM,Grace,Hopper,USA
16,JOHN_N@GMAIL.COM,John,V. Neuman,Hungary


In [130]:
# Using a Lambda function for converting back to lowercase
data['email'] = data['email'].apply(lambda x: x.lower())
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Smith,UK
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [131]:
# applying apply() to the entire data frame
data.apply(len)

email          4
first_name     4
last_name      4
birth_place    4
dtype: int64

In [132]:
len(data['first_name'])

4

In [134]:
data.apply(len, axis='columns')

10    4
12    4
14    4
16    4
dtype: int64

In [135]:
# selecting tehe minimum in every column (Series)
# Lexographical sorting
data.apply(pd.Series.min)

email          alan_turing@outlook.com
first_name                        Alan
last_name                  Berners-Lee
birth_place                    Hungary
dtype: object

In [136]:
# using Lambda
data.apply(lambda x: x.min())

email          alan_turing@outlook.com
first_name                        Alan
last_name                  Berners-Lee
birth_place                    Hungary
dtype: object

In [137]:
sorted(data.birth_place)

['Hungary', 'UK', 'USA', 'bermuda triangle']

### Apply Map
Only works with data frame objects and not series objects.

In [138]:
data.applymap(len)

Unnamed: 0,email,first_name,last_name,birth_place
10,23,4,5,2
12,11,3,11,16
14,22,5,6,3
16,16,4,9,7


In [139]:
# converting everything to lowercase
data.applymap(str.lower)

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,alan,smith,uk
12,tim@cern.ch,tim,berners-lee,bermuda triangle
14,grace.hopper@gmail.com,grace,hopper,usa
16,john_n@gmail.com,john,v. neuman,hungary


### Map method

In [140]:
data['first_name']

10     Alan
12      Tim
14    Grace
16     John
Name: first_name, dtype: object

In [141]:
data['first_name'].map({
    'Alan': 'Chris',
    'John': 'Jane'
})

10    Chris
12      NaN
14      NaN
16     Jane
Name: first_name, dtype: object

In [142]:
# replace doesn't put NaN values, whereas map does
data['first_name'].replace({
    'Alan': 'Chris',
    'John': 'Jane'
})

10    Chris
12      Tim
14    Grace
16     Jane
Name: first_name, dtype: object

In [143]:
# to update the value of our data frame we need to set the series to it
data['first_name'] = data['first_name'].replace({
    'Alan': 'Chris',
    'John': 'Jane'
})
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Chris,Smith,UK
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,Jane,V. Neuman,Hungary


## Adding and Removing Columns From The Table

In [144]:
data['first_name'] + ' ' + data['last_name']

10        Chris Smith
12    Tim Berners-Lee
14       Grace Hopper
16     Jane V. Neuman
dtype: object

In [145]:
# creating a full_name column
data['full_name'] = data['first_name'] + ' ' + data['last_name']
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
10,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
14,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
16,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman


In [146]:
# Removing columns from the dataframe
data.drop(columns=['first_name', 'last_name'])

Unnamed: 0,email,birth_place,full_name
10,alan_turing@outlook.com,UK,Chris Smith
12,tim@cern.ch,bermuda triangle,Tim Berners-Lee
14,grace.hopper@gmail.com,USA,Grace Hopper
16,john_n@gmail.com,Hungary,Jane V. Neuman


In [147]:
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
10,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
14,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
16,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman


In [149]:
data['full_name'].str.split(expand=True)

Unnamed: 0,0,1,2
10,Chris,Smith,
12,Tim,Berners-Lee,
14,Grace,Hopper,
16,Jane,V.,Neuman


## Adding / Removing Rows of Data

In [150]:
data.append({'first_name': 'Tony'}, ignore_index=True)

Unnamed: 0,email,first_name,last_name,birth_place,full_name
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
4,,Tony,,,


## Recreating Table

In [151]:
people = {
    'first_name': ['Tony', 'Steve'],
    'last_name': ['Stark', 'Rogers'],
    'email': ['tony@avengers.com', 'steve@avengers.com']
}

In [152]:
df = pd.DataFrame(people)
df

Unnamed: 0,first_name,last_name,email
0,Tony,Stark,tony@avengers.com
1,Steve,Rogers,steve@avengers.com


In [153]:
data.append(df, ignore_index=True)

Unnamed: 0,email,first_name,last_name,birth_place,full_name
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
4,tony@avengers.com,Tony,Stark,,
5,steve@avengers.com,Steve,Rogers,,


In [154]:
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
10,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
12,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
14,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
16,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman


In [155]:
data = data.append(df, ignore_index=True)
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
4,tony@avengers.com,Tony,Stark,,
5,steve@avengers.com,Steve,Rogers,,


In [156]:
# removing a row
data.index

RangeIndex(start=0, stop=6, step=1)

In [157]:
data.drop(index=5, inplace=True)
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
4,tony@avengers.com,Tony,Stark,,


In [158]:
filter = data['first_name'] == 'Jane'
filter

0    False
1    False
2    False
3     True
4    False
Name: first_name, dtype: bool

In [159]:
data.loc[filter].index

Int64Index([3], dtype='int64')

In [160]:
data.drop(index=data.loc[filter].index)

Unnamed: 0,email,first_name,last_name,birth_place,full_name
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
4,tony@avengers.com,Tony,Stark,,


## Sorting Column in Pandas 

In [162]:
data.sort_values(by='last_name', ascending=False)

Unnamed: 0,email,first_name,last_name,birth_place,full_name
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
4,tony@avengers.com,Tony,Stark,,
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee


In [163]:
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
4,tony@avengers.com,Tony,Stark,,


In [164]:
data.sort_values(by='last_name', inplace=True)
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
0,alan_turing@outlook.com,Chris,Smith,UK,Chris Smith
4,tony@avengers.com,Tony,Stark,,
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman


In [167]:
data.loc[0, 'last_name'] = 'Stark'
data

Unnamed: 0,email,first_name,last_name,birth_place,full_name
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
0,alan_turing@outlook.com,Chris,Stark,UK,Chris Smith
4,tony@avengers.com,Tony,Stark,,
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman


In [168]:
# sorting using multiple values
data.sort_values(by=['last_name', 'first_name'])

Unnamed: 0,email,first_name,last_name,birth_place,full_name
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
0,alan_turing@outlook.com,Chris,Stark,UK,Chris Smith
4,tony@avengers.com,Tony,Stark,,
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman


In [169]:
data.sort_values(by=['last_name', 'first_name'], ascending=[False, True])

Unnamed: 0,email,first_name,last_name,birth_place,full_name
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
0,alan_turing@outlook.com,Chris,Stark,UK,Chris Smith
4,tony@avengers.com,Tony,Stark,,
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee


In [170]:
# sorting by indexes
data.sort_index()

Unnamed: 0,email,first_name,last_name,birth_place,full_name
0,alan_turing@outlook.com,Chris,Stark,UK,Chris Smith
1,tim@cern.ch,Tim,Berners-Lee,bermuda triangle,Tim Berners-Lee
2,grace.hopper@gmail.com,Grace,Hopper,USA,Grace Hopper
3,john_n@gmail.com,Jane,V. Neuman,Hungary,Jane V. Neuman
4,tony@avengers.com,Tony,Stark,,


In [171]:
# we can even sort series
data['first_name'].sort_values()

0    Chris
2    Grace
3     Jane
1      Tim
4     Tony
Name: first_name, dtype: object