In [1]:
import pandas as pd
import numpy as np

# Read Dataset

In [2]:
df = pd.read_csv('Srt_dta.csv')
df

Unnamed: 0,Name,Breed,Color,Height(cm),Weight(kg),Date of Birth
0,Bella,Labrador,Brown,56,25,2013-07-01
1,Charlie,Poddle,Black,43,23,2016-09-16
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
3,Copper,Schnauzer,Gray,49,17,2011-12-11
4,Max,Labrador,Black,59,29,2017-01-20
5,Stella,Chihuahua,Tan,18,2,2015-04-20
6,Bernle,St. Bernard,White,77,74,2018-02-27


# Subsetting columns
To select a single column, use square brackets [] with the column name of the column of interest.

In [3]:
df['Name']

0      Bella
1    Charlie
2       Lucy
3     Copper
4        Max
5     Stella
6     Bernle
Name: Name, dtype: object

# Subsetting multiple columns

In [5]:
# method 1
df[["Breed","Height(cm)"]]

Unnamed: 0,Breed,Height(cm)
0,Labrador,56
1,Poddle,43
2,Chow Chow,46
3,Schnauzer,49
4,Labrador,59
5,Chihuahua,18
6,St. Bernard,77


In [7]:
# method 2
cols_to_subset = ["Breed","Height(cm)"]
df[cols_to_subset]

Unnamed: 0,Breed,Height(cm)
0,Labrador,56
1,Poddle,43
2,Chow Chow,46
3,Schnauzer,49
4,Labrador,59
5,Chihuahua,18
6,St. Bernard,77


# Subsetting rows
This return boolean value.

In [8]:
df["Height(cm)"] > 50

0     True
1    False
2    False
3    False
4     True
5    False
6     True
Name: Height(cm), dtype: bool

In [9]:
# This return numeric value
df[df["Height(cm)"] > 50]

Unnamed: 0,Name,Breed,Color,Height(cm),Weight(kg),Date of Birth
0,Bella,Labrador,Brown,56,25,2013-07-01
4,Max,Labrador,Black,59,29,2017-01-20
6,Bernle,St. Bernard,White,77,74,2018-02-27


# Subsetting based on text data

In [10]:
df[df["Breed"] == 'Labrador']

Unnamed: 0,Name,Breed,Color,Height(cm),Weight(kg),Date of Birth
0,Bella,Labrador,Brown,56,25,2013-07-01
4,Max,Labrador,Black,59,29,2017-01-20


# Subsetting based on dates

In [11]:
df[df["Breed"] > '2015-01-01']

Unnamed: 0,Name,Breed,Color,Height(cm),Weight(kg),Date of Birth
0,Bella,Labrador,Brown,56,25,2013-07-01
1,Charlie,Poddle,Black,43,23,2016-09-16
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
3,Copper,Schnauzer,Gray,49,17,2011-12-11
4,Max,Labrador,Black,59,29,2017-01-20
5,Stella,Chihuahua,Tan,18,2,2015-04-20
6,Bernle,St. Bernard,White,77,74,2018-02-27


# Subsetting based on multiple conditions

In [15]:
is_lab = df['Breed'] == 'Labrador'
is_black = df['Color'] == 'Black'
df[is_lab & is_black]

Unnamed: 0,Name,Breed,Color,Height(cm),Weight(kg),Date of Birth
4,Max,Labrador,Black,59,29,2017-01-20


# Subsetting using .isin()
Pandas isin() method is used to filter data frames. isin() method helps in selecting rows with having a particular(or Multiple) value in a particular column. Parameters: values: iterable, Series, List, Tuple, DataFrame or dictionary to check in the caller Series/Data Frame.

In [18]:
is_black_or_brown = df['Color'].isin(['Black', 'Brown'])

df[is_black_or_brown]

Unnamed: 0,Name,Breed,Color,Height(cm),Weight(kg),Date of Birth
0,Bella,Labrador,Brown,56,25,2013-07-01
1,Charlie,Poddle,Black,43,23,2016-09-16
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
4,Max,Labrador,Black,59,29,2017-01-20
