## Slicing and Indexing DataFrames

In [30]:
# importing pandas
import pandas as pd

# importing sales dataset
temperatures = pd.read_csv("../datasets/temperatures.csv")
temperatures.head()

Unnamed: 0.1,Unnamed: 0,date,city,country,avg_temp_c
0,0,2000-01-01,Abidjan,Côte D'Ivoire,27.293
1,1,2000-02-01,Abidjan,Côte D'Ivoire,27.685
2,2,2000-03-01,Abidjan,Côte D'Ivoire,29.061
3,3,2000-04-01,Abidjan,Côte D'Ivoire,28.162
4,4,2000-05-01,Abidjan,Côte D'Ivoire,27.547


In [13]:
temp_cities = temperatures["city"]
temp_cities.drop_duplicates()

0            Abidjan
165      Addis Abeba
330        Ahmadabad
495           Aleppo
660       Alexandria
            ...     
15675          Tokyo
15840        Toronto
16005     Umm Durman
16170          Wuhan
16335           Xian
Name: city, Length: 100, dtype: object

#### Slicing lists

In [14]:
cities = ["Tokyo", "Xian", "Taipei", "Sydney", "Seoul", "Rome", "Paris", "New York"]

In [8]:
cities[2:5]

['Taipei', 'Sydney', 'Seoul']

In [9]:
cities[:3]

['Tokyo', 'Xian', 'Taipei']

In [10]:
cities[:]

['Tokyo', 'Xian', 'Taipei', 'Sydney', 'Seoul', 'Rome', 'Paris', 'New York']

#### Sort the index before you slice

In [24]:
temp_srt = temperatures.set_index(["city", "country"]).sort_index()
temp_srt

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,date,avg_temp_c
city,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abidjan,Côte D'Ivoire,0,2000-01-01,27.293
Abidjan,Côte D'Ivoire,1,2000-02-01,27.685
Abidjan,Côte D'Ivoire,2,2000-03-01,29.061
Abidjan,Côte D'Ivoire,3,2000-04-01,28.162
Abidjan,Côte D'Ivoire,4,2000-05-01,27.547
...,...,...,...,...
Xian,China,16495,2013-05-01,18.979
Xian,China,16496,2013-06-01,23.522
Xian,China,16497,2013-07-01,25.251
Xian,China,16498,2013-08-01,24.528


#### Slicing the outer index level

In [17]:
temp_srt.loc["Taipei": "Xian"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,date,avg_temp_c
city,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Taipei,Taiwan,15015,2000-01-01,18.086
Taipei,Taiwan,15016,2000-02-01,16.874
Taipei,Taiwan,15017,2000-03-01,19.186
Taipei,Taiwan,15018,2000-04-01,21.147
Taipei,Taiwan,15019,2000-05-01,24.224
...,...,...,...,...
Xian,China,16495,2013-05-01,18.979
Xian,China,16496,2013-06-01,23.522
Xian,China,16497,2013-07-01,25.251
Xian,China,16498,2013-08-01,24.528


#### Slicing the inner index levels badly

In [20]:
temp_srt.loc["Taiwan": "China"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,date,avg_temp_c
city,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


#### Slicing the inner index levels correctly

In [26]:
temp_srt.loc[("Taipei", "Taiwan"): ("Xian", "China")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,date,avg_temp_c
city,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Taipei,Taiwan,15015,2000-01-01,18.086
Taipei,Taiwan,15016,2000-02-01,16.874
Taipei,Taiwan,15017,2000-03-01,19.186
Taipei,Taiwan,15018,2000-04-01,21.147
Taipei,Taiwan,15019,2000-05-01,24.224
...,...,...,...,...
Xian,China,16495,2013-05-01,18.979
Xian,China,16496,2013-06-01,23.522
Xian,China,16497,2013-07-01,25.251
Xian,China,16498,2013-08-01,24.528


#### Slicing columns

In [28]:
temp_srt.loc[:, "date":"avg_temp_c"]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
city,country,Unnamed: 2_level_1,Unnamed: 3_level_1
Abidjan,Côte D'Ivoire,2000-01-01,27.293
Abidjan,Côte D'Ivoire,2000-02-01,27.685
Abidjan,Côte D'Ivoire,2000-03-01,29.061
Abidjan,Côte D'Ivoire,2000-04-01,28.162
Abidjan,Côte D'Ivoire,2000-05-01,27.547
...,...,...,...
Xian,China,2013-05-01,18.979
Xian,China,2013-06-01,23.522
Xian,China,2013-07-01,25.251
Xian,China,2013-08-01,24.528


#### Slice twice

In [29]:
temp_srt.loc[("Taipei", "Taiwan") : ("Xian", "China"), "date": "avg_temp_c"]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
city,country,Unnamed: 2_level_1,Unnamed: 3_level_1
Taipei,Taiwan,2000-01-01,18.086
Taipei,Taiwan,2000-02-01,16.874
Taipei,Taiwan,2000-03-01,19.186
Taipei,Taiwan,2000-04-01,21.147
Taipei,Taiwan,2000-05-01,24.224
...,...,...,...
Xian,China,2013-05-01,18.979
Xian,China,2013-06-01,23.522
Xian,China,2013-07-01,25.251
Xian,China,2013-08-01,24.528


#### temp days

In [31]:
temperatures = temperatures.set_index("date").sort_index()
temperatures

Unnamed: 0_level_0,Unnamed: 0,city,country,avg_temp_c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01,0,Abidjan,Côte D'Ivoire,27.293
2000-01-01,8415,Lahore,Pakistan,12.792
2000-01-01,15345,Tangshan,China,-5.406
2000-01-01,5115,Gizeh,Egypt,12.669
2000-01-01,8580,Lakhnau,India,15.152
...,...,...,...,...
2013-09-01,11549,Nanjing,China,
2013-09-01,11714,New Delhi,India,
2013-09-01,11879,New York,United States,17.408
2013-09-01,12209,Peking,China,


#### Slicing by dates

In [27]:
temperatures.loc["2000-01-01": "2013-05-01"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Côte D'Ivoire,Abidjan,0,2000-01-01,27.293
Côte D'Ivoire,Abidjan,1,2000-02-01,27.685
Côte D'Ivoire,Abidjan,2,2000-03-01,29.061
Côte D'Ivoire,Abidjan,3,2000-04-01,28.162
Côte D'Ivoire,Abidjan,4,2000-05-01,27.547
...,...,...,...,...
China,Xian,16495,2013-05-01,18.979
China,Xian,16496,2013-06-01,23.522
China,Xian,16497,2013-07-01,25.251
China,Xian,16498,2013-08-01,24.528


#### Slicing by partial dates

In [34]:
temperatures.loc["2000": "2001"]

Unnamed: 0_level_0,Unnamed: 0,city,country,avg_temp_c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01,0,Abidjan,Côte D'Ivoire,27.293
2000-01-01,8415,Lahore,Pakistan,12.792
2000-01-01,15345,Tangshan,China,-5.406
2000-01-01,5115,Gizeh,Egypt,12.669
2000-01-01,8580,Lakhnau,India,15.152
...,...,...,...,...
2000-12-01,8261,Lagos,Nigeria,27.173
2000-12-01,5621,Harbin,China,-19.429
2000-12-01,1001,Baghdad,Iraq,12.108
2000-12-01,5951,Hyderabad,India,21.949


#### Subsetting by row/column number

In [35]:
temperatures.iloc[2:5, 1:4]

Unnamed: 0_level_0,city,country,avg_temp_c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01,Tangshan,China,-5.406
2000-01-01,Gizeh,Egypt,12.669
2000-01-01,Lakhnau,India,15.152
