In [1]:
import pandas as pd 

In [3]:
temperatures = pd.read_csv('https://www.dropbox.com/s/6jzwa6ve4fqgole/temperatures.csv?dl=1')
temperatures.head()

Unnamed: 0.1,Unnamed: 0,date,city,country,avg_temp_c
0,0,2000-01-01,Abidjan,Côte D'Ivoire,27.293
1,1,2000-02-01,Abidjan,Côte D'Ivoire,27.685
2,2,2000-03-01,Abidjan,Côte D'Ivoire,29.061
3,3,2000-04-01,Abidjan,Côte D'Ivoire,28.162
4,4,2000-05-01,Abidjan,Côte D'Ivoire,27.547


# Setting and removing indexes

In [5]:
# Index temperatures by city
temperatures_ind = temperatures.set_index("city")
temperatures_ind

Unnamed: 0_level_0,Unnamed: 0,date,country,avg_temp_c
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abidjan,0,2000-01-01,Côte D'Ivoire,27.293
Abidjan,1,2000-02-01,Côte D'Ivoire,27.685
Abidjan,2,2000-03-01,Côte D'Ivoire,29.061
Abidjan,3,2000-04-01,Côte D'Ivoire,28.162
Abidjan,4,2000-05-01,Côte D'Ivoire,27.547
...,...,...,...,...
Xian,16495,2013-05-01,China,18.979
Xian,16496,2013-06-01,China,23.522
Xian,16497,2013-07-01,China,25.251
Xian,16498,2013-08-01,China,24.528


In [6]:
#Reset Index
temperatures_ind.reset_index()

Unnamed: 0.1,city,Unnamed: 0,date,country,avg_temp_c
0,Abidjan,0,2000-01-01,Côte D'Ivoire,27.293
1,Abidjan,1,2000-02-01,Côte D'Ivoire,27.685
2,Abidjan,2,2000-03-01,Côte D'Ivoire,29.061
3,Abidjan,3,2000-04-01,Côte D'Ivoire,28.162
4,Abidjan,4,2000-05-01,Côte D'Ivoire,27.547
...,...,...,...,...,...
16495,Xian,16495,2013-05-01,China,18.979
16496,Xian,16496,2013-06-01,China,23.522
16497,Xian,16497,2013-07-01,China,25.251
16498,Xian,16498,2013-08-01,China,24.528


In [7]:
# Reset the index, dropping its contents
temperatures_ind.reset_index(drop=True)

Unnamed: 0.1,Unnamed: 0,date,country,avg_temp_c
0,0,2000-01-01,Côte D'Ivoire,27.293
1,1,2000-02-01,Côte D'Ivoire,27.685
2,2,2000-03-01,Côte D'Ivoire,29.061
3,3,2000-04-01,Côte D'Ivoire,28.162
4,4,2000-05-01,Côte D'Ivoire,27.547
...,...,...,...,...
16495,16495,2013-05-01,China,18.979
16496,16496,2013-06-01,China,23.522
16497,16497,2013-07-01,China,25.251
16498,16498,2013-08-01,China,24.528


**Subsetting with .loc**

In [9]:
# Make a list of cities to subset on
cities = ["Moscow", "Saint Petersburg"]

# Subset temperatures using square brackets
temperatures[temperatures["city"].isin(cities)]

Unnamed: 0.1,Unnamed: 0,date,city,country,avg_temp_c
10725,10725,2000-01-01,Moscow,Russia,-7.313
10726,10726,2000-02-01,Moscow,Russia,-3.551
10727,10727,2000-03-01,Moscow,Russia,-1.661
10728,10728,2000-04-01,Moscow,Russia,10.096
10729,10729,2000-05-01,Moscow,Russia,10.357
...,...,...,...,...,...
13360,13360,2013-05-01,Saint Petersburg,Russia,12.355
13361,13361,2013-06-01,Saint Petersburg,Russia,17.185
13362,13362,2013-07-01,Saint Petersburg,Russia,17.234
13363,13363,2013-08-01,Saint Petersburg,Russia,17.153


In [10]:
temperatures_ind.loc[cities]

Unnamed: 0_level_0,Unnamed: 0,date,country,avg_temp_c
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Moscow,10725,2000-01-01,Russia,-7.313
Moscow,10726,2000-02-01,Russia,-3.551
Moscow,10727,2000-03-01,Russia,-1.661
Moscow,10728,2000-04-01,Russia,10.096
Moscow,10729,2000-05-01,Russia,10.357
...,...,...,...,...
Saint Petersburg,13360,2013-05-01,Russia,12.355
Saint Petersburg,13361,2013-06-01,Russia,17.185
Saint Petersburg,13362,2013-07-01,Russia,17.234
Saint Petersburg,13363,2013-08-01,Russia,17.153


**Setting multi-level indexes**

In [11]:
# Index temperatures by country & city
temperatures_ind = temperatures.set_index(["country", "city"])

# List of tuples: Brazil, Rio De Janeiro & Pakistan, Lahore
rows_to_keep = [("Brazil", "Rio De Janeiro"), ("Pakistan", "Lahore")]

# Subset for rows to keep
temperatures_ind.loc[rows_to_keep]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brazil,Rio De Janeiro,12540,2000-01-01,25.974
Brazil,Rio De Janeiro,12541,2000-02-01,26.699
Brazil,Rio De Janeiro,12542,2000-03-01,26.270
Brazil,Rio De Janeiro,12543,2000-04-01,25.750
Brazil,Rio De Janeiro,12544,2000-05-01,24.356
...,...,...,...,...
Pakistan,Lahore,8575,2013-05-01,33.457
Pakistan,Lahore,8576,2013-06-01,34.456
Pakistan,Lahore,8577,2013-07-01,33.279
Pakistan,Lahore,8578,2013-08-01,31.511


**Sorting by index values**

In [12]:
# Sort temperatures_ind by index values
print(temperatures_ind.sort_index())

# Sort temperatures_ind by index values at the city level
print(temperatures_ind.sort_index(level="city"))

# Sort temperatures_ind by country then descending city
print(temperatures_ind.sort_index(level=["country", "city"], ascending=[True, False]))

                    Unnamed: 0        date  avg_temp_c
country     city                                      
Afghanistan Kabul         7260  2000-01-01       3.326
            Kabul         7261  2000-02-01       3.454
            Kabul         7262  2000-03-01       9.612
            Kabul         7263  2000-04-01      17.925
            Kabul         7264  2000-05-01      24.658
...                        ...         ...         ...
Zimbabwe    Harare        5605  2013-05-01      18.298
            Harare        5606  2013-06-01      17.020
            Harare        5607  2013-07-01      16.299
            Harare        5608  2013-08-01      19.232
            Harare        5609  2013-09-01         NaN

[16500 rows x 3 columns]
                       Unnamed: 0        date  avg_temp_c
country       city                                       
Côte D'Ivoire Abidjan           0  2000-01-01      27.293
              Abidjan           1  2000-02-01      27.685
              Abidjan      