## Sorting and subsetting

## Sorting

In [21]:
import pandas as pd
dogs = pd.read_csv('dogs.csv')
dogs.head()

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,25,2013-07-01
1,Charlie,Poodle,Black,43,23,2016-09-16
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
3,Cooper,Schanauzer,Gray,49,17,2011-12-11
4,Max,Labrador,Black,59,29,2017-01-20


In [22]:
dogs.sort_values("weight_kg")

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
5,Stella,Chihuahua,Tan,18,2,2015-04-20
3,Cooper,Schanauzer,Gray,49,17,2011-12-11
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
1,Charlie,Poodle,Black,43,23,2016-09-16
0,Bella,Labrador,Brown,56,25,2013-07-01
4,Max,Labrador,Black,59,29,2017-01-20
6,Bernie,St.Bernard,White,77,74,2018-02-27


## Sorting in descending order

In [23]:
dogs.sort_values("weight_kg", ascending=False)

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
6,Bernie,St.Bernard,White,77,74,2018-02-27
4,Max,Labrador,Black,59,29,2017-01-20
0,Bella,Labrador,Brown,56,25,2013-07-01
1,Charlie,Poodle,Black,43,23,2016-09-16
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
3,Cooper,Schanauzer,Gray,49,17,2011-12-11
5,Stella,Chihuahua,Tan,18,2,2015-04-20


## Sorting by multiple variables

In [24]:
dogs.sort_values(["weight_kg", "height_cm"])

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
5,Stella,Chihuahua,Tan,18,2,2015-04-20
3,Cooper,Schanauzer,Gray,49,17,2011-12-11
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
1,Charlie,Poodle,Black,43,23,2016-09-16
0,Bella,Labrador,Brown,56,25,2013-07-01
4,Max,Labrador,Black,59,29,2017-01-20
6,Bernie,St.Bernard,White,77,74,2018-02-27


## Sorting by multiple variables

In [25]:
dogs.sort_values(["weight_kg", "height_cm"], ascending=[True, False])

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
5,Stella,Chihuahua,Tan,18,2,2015-04-20
3,Cooper,Schanauzer,Gray,49,17,2011-12-11
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
1,Charlie,Poodle,Black,43,23,2016-09-16
0,Bella,Labrador,Brown,56,25,2013-07-01
4,Max,Labrador,Black,59,29,2017-01-20
6,Bernie,St.Bernard,White,77,74,2018-02-27


Subsetting columns

In [26]:
dogs["name"]

0      Bella
1    Charlie
2       Lucy
3     Cooper
4        Max
5     Stella
6     Bernie
Name: name, dtype: object

## Subsetting multiple columns

In [27]:
dogs[["breed", "height_cm"]]

Unnamed: 0,breed,height_cm
0,Labrador,56
1,Poodle,43
2,Chow Chow,46
3,Schanauzer,49
4,Labrador,59
5,Chihuahua,18
6,St.Bernard,77


In [28]:
cols_to_subset = ["breed", "height_cm"]
dogs[cols_to_subset]

Unnamed: 0,breed,height_cm
0,Labrador,56
1,Poodle,43
2,Chow Chow,46
3,Schanauzer,49
4,Labrador,59
5,Chihuahua,18
6,St.Bernard,77


## Subsetting rows

In [29]:
dogs["height_cm"] > 50

0     True
1    False
2    False
3    False
4     True
5    False
6     True
Name: height_cm, dtype: bool

In [30]:
dogs[dogs["height_cm"] > 50]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,25,2013-07-01
4,Max,Labrador,Black,59,29,2017-01-20
6,Bernie,St.Bernard,White,77,74,2018-02-27


## Subsetting based on text data

In [31]:
dogs[dogs["breed"] == "Labrador"]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,25,2013-07-01
4,Max,Labrador,Black,59,29,2017-01-20


## Subsetting based on dates

In [32]:
dogs[dogs["date_of_birth"] > "2015-01-01"]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
1,Charlie,Poodle,Black,43,23,2016-09-16
4,Max,Labrador,Black,59,29,2017-01-20
5,Stella,Chihuahua,Tan,18,2,2015-04-20
6,Bernie,St.Bernard,White,77,74,2018-02-27


# Subsetting based on multiple conditions

In [34]:
is_lab = dogs["breed"] == "Labrador"
is_brown = dogs["color"] == "Brown"
dogs[is_lab & is_brown]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,25,2013-07-01


one line of code

In [35]:
dogs[ (dogs["breed"] == "Labrador") & (dogs["color"] == "Brown") ]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,25,2013-07-01


## Subsetting using .isin()

In [36]:
is_black_or_brown = dogs["color"].isin(["Black", "Brown"])
dogs[is_black_or_brown]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,25,2013-07-01
1,Charlie,Poodle,Black,43,23,2016-09-16
2,Lucy,Chow Chow,Brown,46,22,2014-08-25
4,Max,Labrador,Black,59,29,2017-01-20
