# 🐼 Introduction To Pandas

In [123]:
# importing libraries
import pandas as pd

In [124]:
# checking that pandas has been loaded correctly
pd

<module 'pandas' from 'c:\\users\\anish\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\pandas\\__init__.py'>

## Making Pandas DataFrame from Python Dictionaries

In [125]:
people = {
    'first_name': ['Alan', 'Tim', 'Grace', 'John'],
    'last_name': ['Turing', 'Berners-Lee', 'Hopper', 'V. Neuman'],
    'birth_place': ['UK', 'UK', 'USA', 'Hungary']
}

In [126]:
people['first_name']

['Alan', 'Tim', 'Grace', 'John']

In [127]:
people['birth_place']

['UK', 'UK', 'USA', 'Hungary']

In [128]:
data = pd.DataFrame(people)
data

Unnamed: 0,first_name,last_name,birth_place
0,Alan,Turing,UK
1,Tim,Berners-Lee,UK
2,Grace,Hopper,USA
3,John,V. Neuman,Hungary


In [129]:
data.iloc[:, [1, 2]]

Unnamed: 0,last_name,birth_place
0,Turing,UK
1,Berners-Lee,UK
2,Hopper,USA
3,V. Neuman,Hungary


In [130]:
data['last_name']

0         Turing
1    Berners-Lee
2         Hopper
3      V. Neuman
Name: last_name, dtype: object

In [131]:
data['first_name']

0     Alan
1      Tim
2    Grace
3     John
Name: first_name, dtype: object

In [132]:
data['birth_place']

0         UK
1         UK
2        USA
3    Hungary
Name: birth_place, dtype: object

In [133]:
# We can view the columns in a Data Frame
data.columns

Index(['first_name', 'last_name', 'birth_place'], dtype='object')

## Accessing Elements

We can access individual cells, rows and columns using the `iloc` property of the DataFrame object.

In [134]:
# Accessing individual cell
data.iloc[0, 0]

'Alan'

In [135]:
data.iloc[0, 1]

'Turing'

In [136]:
# Accessinng entire rows
data.iloc[0, :]

first_name       Alan
last_name      Turing
birth_place        UK
Name: 0, dtype: object

In [137]:
# Accessing entire column 
data.iloc[:, 0]

0     Alan
1      Tim
2    Grace
3     John
Name: first_name, dtype: object

In [138]:
data.iloc[:, 2]

0         UK
1         UK
2        USA
3    Hungary
Name: birth_place, dtype: object

In [139]:
type(data)

pandas.core.frame.DataFrame

In [140]:
type(data['first_name'])

pandas.core.series.Series

In [141]:
# Accessing Multiple Rows
data.iloc[[1, 0], ]

Unnamed: 0,first_name,last_name,birth_place
1,Tim,Berners-Lee,UK
0,Alan,Turing,UK


In [142]:
# Accessing Multiple Columns
data.iloc[:, [0]]

Unnamed: 0,first_name
0,Alan
1,Tim
2,Grace
3,John


In [143]:
data.iloc[:, [1, 2]]

Unnamed: 0,last_name,birth_place
0,Turing,UK
1,Berners-Lee,UK
2,Hopper,USA
3,V. Neuman,Hungary


In [144]:
# Accessing particular rows and columns
data.iloc[[2, 0], [2, 1]]

Unnamed: 0,birth_place,last_name
2,USA,Hopper
0,UK,Turing


## Using `loc` to access columns through names rather than numeric location

In [145]:
# We can also access columns through their unique names (identifiers)
data.loc[[2, 1], ['last_name', 'first_name']]

Unnamed: 0,last_name,first_name
2,Hopper,Grace
1,Berners-Lee,Tim


## Creating Custom Indices in DataFrames

In [146]:
people = {
    'first_name': ['Alan', 'Tim', 'Grace', 'John'],
    'last_name': ['Turing', 'Berners-Lee', 'Hopper', 'V. Neuman'],
    'birth_place': ['UK', 'UK', 'USA', 'Hungary'],
    'email': ['alan_turing@outlook.com', 'time.lee@cern.ch', 'grace.hopper@gmail.com', 'john_n@gmail.com']
}

In [147]:
data = pd.DataFrame(people)
data

Unnamed: 0,first_name,last_name,birth_place,email
0,Alan,Turing,UK,alan_turing@outlook.com
1,Tim,Berners-Lee,UK,time.lee@cern.ch
2,Grace,Hopper,USA,grace.hopper@gmail.com
3,John,V. Neuman,Hungary,john_n@gmail.com


In [148]:
# accessing the email column 
data['email']

0    alan_turing@outlook.com
1           time.lee@cern.ch
2     grace.hopper@gmail.com
3           john_n@gmail.com
Name: email, dtype: object

In [149]:
# accessing rows using the row index number
data.iloc[0, ]

first_name                        Alan
last_name                       Turing
birth_place                         UK
email          alan_turing@outlook.com
Name: 0, dtype: object

In [150]:
# setting the email as key
data.set_index('email', inplace=True)

In [151]:
data

Unnamed: 0_level_0,first_name,last_name,birth_place
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
alan_turing@outlook.com,Alan,Turing,UK
time.lee@cern.ch,Tim,Berners-Lee,UK
grace.hopper@gmail.com,Grace,Hopper,USA
john_n@gmail.com,John,V. Neuman,Hungary


In [152]:
# accessing the index
data.index

Index(['alan_turing@outlook.com', 'time.lee@cern.ch', 'grace.hopper@gmail.com',
       'john_n@gmail.com'],
      dtype='object', name='email')

In [153]:
data.loc['alan_turing@outlook.com']

first_name       Alan
last_name      Turing
birth_place        UK
Name: alan_turing@outlook.com, dtype: object

In [154]:
data.iloc[0, ]

first_name       Alan
last_name      Turing
birth_place        UK
Name: alan_turing@outlook.com, dtype: object

In [155]:
# accessing particular cell using loc 
data.loc['alan_turing@outlook.com', 'first_name']

'Alan'

In [156]:
# If we wish to revert back to the original indexes (0 integer based)
data.reset_index(inplace=True)

In [157]:
data

Unnamed: 0,email,first_name,last_name,birth_place
0,alan_turing@outlook.com,Alan,Turing,UK
1,time.lee@cern.ch,Tim,Berners-Lee,UK
2,grace.hopper@gmail.com,Grace,Hopper,USA
3,john_n@gmail.com,John,V. Neuman,Hungary


In [158]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [159]:
data.index = pd.RangeIndex(start=10, stop=18, step=2)
data.index

RangeIndex(start=10, stop=18, step=2)

In [160]:
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [161]:
data.iloc[0, :]

email          alan_turing@outlook.com
first_name                        Alan
last_name                       Turing
birth_place                         UK
Name: 10, dtype: object

In [162]:
data.loc[[10], ]

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK


In [163]:
data['last_name'] == 'Hopper'

10    False
12    False
14     True
16    False
Name: last_name, dtype: bool

In [164]:
data['first_name'] == 'Grace'

10    False
12    False
14     True
16    False
Name: first_name, dtype: bool

In [165]:
# filtering DataFrame
mask = (data['last_name'] == 'Turing') & (data['first_name'] == 'Alan')
mask

10     True
12    False
14    False
16    False
dtype: bool

In [166]:
data[mask]

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK


In [167]:
#using .loc also gives the same result
data.loc[mask, ['last_name', 'birth_place', 'first_name']]

Unnamed: 0,last_name,birth_place,first_name
10,Turing,UK,Alan


In [168]:
# or operator
mask = (data['first_name'] == 'John') | (data['last_name'] == 'Hopper')
mask

10    False
12    False
14     True
16     True
dtype: bool

In [169]:
data.loc[mask]

Unnamed: 0,email,first_name,last_name,birth_place
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [170]:
# negative of a mask 
mask = ~mask
mask

10     True
12     True
14    False
16    False
dtype: bool

In [171]:
data.loc[mask]

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK


## Updating Data

In [172]:
data

Unnamed: 0,email,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [173]:
data.columns

Index(['email', 'first_name', 'last_name', 'birth_place'], dtype='object')

In [174]:
data.columns = ['email_id', 'first', 'last', 'bp']

In [175]:
data

Unnamed: 0,email_id,first,last,bp
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [176]:
data.columns = ['email_id', 'first_name', 'last_name', 'birth_place']

In [177]:
data

Unnamed: 0,email_id,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [178]:
# making all columns Upper Case
data.columns = [column.upper() for column in data.columns]

In [179]:
data

Unnamed: 0,EMAIL_ID,FIRST_NAME,LAST_NAME,BIRTH_PLACE
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [180]:
# replacing underscores with spaces
data.columns = data.columns.str.replace('_', ' ')
data

Unnamed: 0,EMAIL ID,FIRST NAME,LAST NAME,BIRTH PLACE
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [181]:
# replacing all space characters with underscore
data.columns = data.columns.str.replace(' ', '_')
data

Unnamed: 0,EMAIL_ID,FIRST_NAME,LAST_NAME,BIRTH_PLACE
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


In [182]:
# replacing upper case with lower case
data.columns = [column.lower() for column in data.columns]
data

Unnamed: 0,email_id,first_name,last_name,birth_place
10,alan_turing@outlook.com,Alan,Turing,UK
12,time.lee@cern.ch,Tim,Berners-Lee,UK
14,grace.hopper@gmail.com,Grace,Hopper,USA
16,john_n@gmail.com,John,V. Neuman,Hungary


## Re-ordedring Columns

In [183]:
columns = data.columns.tolist()
columns

['email_id', 'first_name', 'last_name', 'birth_place']

In [184]:
columns = columns[1:] + columns[0:1]
columns

['first_name', 'last_name', 'birth_place', 'email_id']

In [185]:
data = data[columns]
data

Unnamed: 0,first_name,last_name,birth_place,email_id
10,Alan,Turing,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,UK,time.lee@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [186]:
data.email_id

10    alan_turing@outlook.com
12           time.lee@cern.ch
14     grace.hopper@gmail.com
16           john_n@gmail.com
Name: email_id, dtype: object

In [187]:
# We can now access directly using columnn names
data.first_name

10     Alan
12      Tim
14    Grace
16     John
Name: first_name, dtype: object

In [188]:
data.email_id

10    alan_turing@outlook.com
12           time.lee@cern.ch
14     grace.hopper@gmail.com
16           john_n@gmail.com
Name: email_id, dtype: object

In [189]:
# Renaming a Few Columns
data.rename(columns={'email_id': 'email'}, inplace=True)
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,UK,time.lee@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


## Updating Data in Rows

In [190]:
# changing an entire row
data.iloc[1] = ['john', 'doe', 'bermuda triangle', 'email']
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com
12,john,doe,bermuda triangle,email
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [191]:
# changing only a few specific columns
data.loc[12, ['first_name', 'email']] = ['Tim', 'tim@cern.ch']
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com
12,Tim,doe,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [192]:
# changing a single cell
data.loc[12, ['last_name']] = 'apple'
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com
12,Tim,apple,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [193]:
# using at to look up single cell values
data.at[12, 'last_name'] = 'Berners-Lee'
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [194]:
# seeking specific value using birth place
uk_filter = data['birth_place'] == 'UK'
uk_filter

10     True
12    False
14    False
16    False
Name: birth_place, dtype: bool

In [195]:
data.loc[uk_filter]

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com


In [196]:
data[uk_filter]

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com


In [197]:
# This will not work
data[uk_filter]['last_name'] = 'Smith'
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Turing,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [198]:
# to actually make a change we need to use .loc
data.loc[uk_filter, 'last_name'] = 'Smith'
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Smith,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


## String Operations

In [199]:
data['email'] = data['email'].str.upper()
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Smith,UK,ALAN_TURING@OUTLOOK.COM
12,Tim,Berners-Lee,bermuda triangle,TIM@CERN.CH
14,Grace,Hopper,USA,GRACE.HOPPER@GMAIL.COM
16,John,V. Neuman,Hungary,JOHN_N@GMAIL.COM


In [200]:
# converting to lower case
data['email'] = data['email'].str.lower()
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Smith,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


### Apply Method

In [201]:
# applying a function to every item in a series
data['email'].apply(len)

10    23
12    11
14    22
16    16
Name: email, dtype: int64

In [202]:
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Smith,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [203]:
def update_email(email: str) -> str:
    return email.upper()

In [204]:
# using external function with apply
data['email'].apply(update_email)

10    ALAN_TURING@OUTLOOK.COM
12                TIM@CERN.CH
14     GRACE.HOPPER@GMAIL.COM
16           JOHN_N@GMAIL.COM
Name: email, dtype: object

In [205]:
# to change the value of a series, we can set it
data['email'] = data['email'].apply(update_email)
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Smith,UK,ALAN_TURING@OUTLOOK.COM
12,Tim,Berners-Lee,bermuda triangle,TIM@CERN.CH
14,Grace,Hopper,USA,GRACE.HOPPER@GMAIL.COM
16,John,V. Neuman,Hungary,JOHN_N@GMAIL.COM


In [206]:
# Using a Lambda function for converting back to lowercase
data['email'] = data['email'].apply(lambda z: z.lower())
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Alan,Smith,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,John,V. Neuman,Hungary,john_n@gmail.com


In [207]:
# applying apply() to the entire data frame
data.apply(len)

first_name     4
last_name      4
birth_place    4
email          4
dtype: int64

In [208]:
len(data['first_name'])

4

In [209]:
data.apply(len, axis='columns')

10    4
12    4
14    4
16    4
dtype: int64

In [210]:
# selecting tehe minimum in every column (Series)
# Lexographical sorting
data.apply(pd.Series.min)

first_name                        Alan
last_name                  Berners-Lee
birth_place                    Hungary
email          alan_turing@outlook.com
dtype: object

In [211]:
# using Lambda
data.apply(lambda x: x.min())

first_name                        Alan
last_name                  Berners-Lee
birth_place                    Hungary
email          alan_turing@outlook.com
dtype: object

In [212]:
sorted(data.birth_place)

['Hungary', 'UK', 'USA', 'bermuda triangle']

### Apply Map
Only works with data frame objects and not series objects.

In [213]:
data.applymap(len)

Unnamed: 0,first_name,last_name,birth_place,email
10,4,5,2,23
12,3,11,16,11
14,5,6,3,22
16,4,9,7,16


In [214]:
# converting everything to lowercase
data.applymap(str.lower)

Unnamed: 0,first_name,last_name,birth_place,email
10,alan,smith,uk,alan_turing@outlook.com
12,tim,berners-lee,bermuda triangle,tim@cern.ch
14,grace,hopper,usa,grace.hopper@gmail.com
16,john,v. neuman,hungary,john_n@gmail.com


### Map method

In [215]:
data['first_name']

10     Alan
12      Tim
14    Grace
16     John
Name: first_name, dtype: object

In [216]:
data['first_name'].map({
    'Alan': 'Chris',
    'John': 'Jane'
})

10    Chris
12      NaN
14      NaN
16     Jane
Name: first_name, dtype: object

In [217]:
# replace doesn't put NaN values, whereas map does
data['first_name'].replace({
    'Alan': 'Chris',
    'John': 'Jane'
})

10    Chris
12      Tim
14    Grace
16     Jane
Name: first_name, dtype: object

In [218]:
# to update the value of our data frame we need to set the series to it
data['first_name'] = data['first_name'].replace({
    'Alan': 'Chris',
    'John': 'Jane'
})
data

Unnamed: 0,first_name,last_name,birth_place,email
10,Chris,Smith,UK,alan_turing@outlook.com
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch
14,Grace,Hopper,USA,grace.hopper@gmail.com
16,Jane,V. Neuman,Hungary,john_n@gmail.com


## Adding and Removing Columns From The Table

In [219]:
data['first_name'] + ' ' + data['last_name']

10        Chris Smith
12    Tim Berners-Lee
14       Grace Hopper
16     Jane V. Neuman
dtype: object

In [220]:
# creating a full_name column
data['full_name'] = data['first_name'] + ' ' + data['last_name']
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
10,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
14,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
16,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman


In [221]:
# Removing columns from the dataframe
data.drop(columns=['first_name', 'last_name'])

Unnamed: 0,birth_place,email,full_name
10,UK,alan_turing@outlook.com,Chris Smith
12,bermuda triangle,tim@cern.ch,Tim Berners-Lee
14,USA,grace.hopper@gmail.com,Grace Hopper
16,Hungary,john_n@gmail.com,Jane V. Neuman


In [222]:
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
10,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
12,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
14,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
16,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman


In [223]:
data['full_name'].str.split(expand=True)

Unnamed: 0,0,1,2
10,Chris,Smith,
12,Tim,Berners-Lee,
14,Grace,Hopper,
16,Jane,V.,Neuman


## Adding / Removing Rows of Data

In [224]:
data.append({'first_name': 'Tony'}, ignore_index=True)

Unnamed: 0,first_name,last_name,birth_place,email,full_name
0,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
4,Tony,,,,


## Recreating Table

In [243]:
people = {
    'first_name': ['Tony', 'Steve'],
    'last_name': ['Stark', 'Rogers'],
    'email': ['tony@avengers.com', 'steve@avengers.com']
}

In [244]:
df = pd.DataFrame(people)
df

Unnamed: 0,first_name,last_name,email
0,Tony,Stark,tony@avengers.com
1,Steve,Rogers,steve@avengers.com


In [246]:
data.append(df, ignore_index=True)

Unnamed: 0,first_name,last_name,birth_place,email,full_name
0,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
1,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
2,Chris,Stark,UK,alan_turing@outlook.com,Chris Smith
3,Tony,Stark,,tony@avengers.com,
4,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
5,Tony,Stark,,tony@avengers.com,
6,Steve,Rogers,,steve@avengers.com,


In [247]:
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
0,Chris,Stark,UK,alan_turing@outlook.com,Chris Smith
4,Tony,Stark,,tony@avengers.com,
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman


In [229]:
data = data.append(df, ignore_index=True)
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
0,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
4,Tony,Stark,,tony@avengers.com,
5,Steve,Rogers,,steve@avengers.com,


In [230]:
# removing a row
data.index

RangeIndex(start=0, stop=6, step=1)

In [231]:
data.drop(index=5, inplace=True)
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
0,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
4,Tony,Stark,,tony@avengers.com,


In [232]:
filter = data['first_name'] == 'Jane'
filter

0    False
1    False
2    False
3     True
4    False
Name: first_name, dtype: bool

In [233]:
data.loc[filter].index

Int64Index([3], dtype='int64')

In [234]:
data.drop(index=data.loc[filter].index)

Unnamed: 0,first_name,last_name,birth_place,email,full_name
0,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
4,Tony,Stark,,tony@avengers.com,


## Sorting Column in Pandas 

In [235]:
data.sort_values(by='last_name', ascending=False)

Unnamed: 0,first_name,last_name,birth_place,email,full_name
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
4,Tony,Stark,,tony@avengers.com,
0,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee


In [236]:
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
0,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
4,Tony,Stark,,tony@avengers.com,


In [237]:
data.sort_values(by='last_name', inplace=True)
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
0,Chris,Smith,UK,alan_turing@outlook.com,Chris Smith
4,Tony,Stark,,tony@avengers.com,
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman


In [238]:
data.loc[0, 'last_name'] = 'Stark'
data

Unnamed: 0,first_name,last_name,birth_place,email,full_name
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
0,Chris,Stark,UK,alan_turing@outlook.com,Chris Smith
4,Tony,Stark,,tony@avengers.com,
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman


In [239]:
# sorting using multiple values
data.sort_values(by=['last_name', 'first_name'])

Unnamed: 0,first_name,last_name,birth_place,email,full_name
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
0,Chris,Stark,UK,alan_turing@outlook.com,Chris Smith
4,Tony,Stark,,tony@avengers.com,
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman


In [240]:
data.sort_values(by=['last_name', 'first_name'], ascending=[False, True])

Unnamed: 0,first_name,last_name,birth_place,email,full_name
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
0,Chris,Stark,UK,alan_turing@outlook.com,Chris Smith
4,Tony,Stark,,tony@avengers.com,
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee


In [241]:
# sorting by indexes
data.sort_index()

Unnamed: 0,first_name,last_name,birth_place,email,full_name
0,Chris,Stark,UK,alan_turing@outlook.com,Chris Smith
1,Tim,Berners-Lee,bermuda triangle,tim@cern.ch,Tim Berners-Lee
2,Grace,Hopper,USA,grace.hopper@gmail.com,Grace Hopper
3,Jane,V. Neuman,Hungary,john_n@gmail.com,Jane V. Neuman
4,Tony,Stark,,tony@avengers.com,


In [242]:
# we can even sort series
data['first_name'].sort_values()

0    Chris
2    Grace
3     Jane
1      Tim
4     Tony
Name: first_name, dtype: object