In [45]:
import pandas as pd

In [46]:
cities = pd.read_csv('cities_extended.csv')
print(cities.index)
cities = cities.set_index(['Country', 'City', 'District'])
print(cities.index[:10])

# Sort the index - to increase performance
cities = cities.sort_index()

RangeIndex(start=0, stop=33, step=1)
MultiIndex([(   'USA',    'New York',    'Manhattan'),
            (   'USA',    'New York',     'Brooklyn'),
            (   'USA',    'New York',       'Queens'),
            (   'USA', 'Los Angeles',     'Downtown'),
            (   'USA', 'Los Angeles',    'Hollywood'),
            (   'USA', 'Los Angeles', 'Santa Monica'),
            (   'USA',     'Chicago',         'Loop'),
            (   'USA',     'Chicago', 'Lincoln Park'),
            ('Canada',     'Toronto',     'Downtown'),
            ('Canada',     'Toronto',   'North York')],
           names=['Country', 'City', 'District'])


In [47]:
print("1. All districts in Tokyo:")
print(cities.loc[('Japan', 'Tokyo')])

1. All districts in Tokyo:
          Population  Temperature  Rainfall  GDP_per_capita  Air_Quality_Index
District                                                                      
Ginza         339135         18.7      1304           60818                 73
Shibuya       252251         23.6      1279           78033                 62
Shinjuku      313986         22.9       878           68660                 70


In [48]:
print("\n2. All cities in USA:")
print(cities.loc['USA'])


2. All cities in USA:
                          Population  Temperature  Rainfall  GDP_per_capita  \
City        District                                                          
Chicago     Lincoln Park      153556         19.1       520           44502   
            Loop              284654         19.3      1767           61551   
Los Angeles Downtown          503355         21.8       959           49769   
            Hollywood         271932         24.3       974           55658   
            Santa Monica      108431         22.3      1475           30189   
New York    Brooklyn         2254886         19.5      1738           67194   
            Manhattan        1921958         23.0      1794           74732   
            Queens           2041090         15.2      1269           32433   

                          Air_Quality_Index  
City        District                         
Chicago     Lincoln Park                 47  
            Loop                         32  
Lo

In [49]:
# Access things in cities (rows are multi-index)

print("\n3. Just Manhattan's data:")
print(cities.loc[('USA', 'New York', 'Manhattan')])


3. Just Manhattan's data:
Population           1921958.0
Temperature               23.0
Rainfall                1794.0
GDP_per_capita         74732.0
Air_Quality_Index         50.0
Name: (USA, New York, Manhattan), dtype: float64


#### IndexSlice
- An object to more easily perform multi-index slicing.
- See [documentation](https://pandas.pydata.org/docs/reference/api/pandas.IndexSlice.html#pandas.IndexSlice)

In [50]:
# Using IndexSlice
idx = pd.IndexSlice

print("\n4. All districts in New York and Tokyo:")
print(cities.loc[idx[  ['USA', 'Japan'], ['New York', 'Tokyo'], :]  ])


4. All districts in New York and Tokyo:
                            Population  Temperature  Rainfall  GDP_per_capita  \
Country City     District                                                       
USA     New York Brooklyn      2254886         19.5      1738           67194   
                 Manhattan     1921958         23.0      1794           74732   
                 Queens        2041090         15.2      1269           32433   
Japan   Tokyo    Ginza          339135         18.7      1304           60818   
                 Shibuya        252251         23.6      1279           78033   
                 Shinjuku       313986         22.9       878           68660   

                            Air_Quality_Index  
Country City     District                      
USA     New York Brooklyn                  53  
                 Manhattan                 50  
                 Queens                    93  
Japan   Tokyo    Ginza                     73  
                 Shibu

#### DataFrame.xs
- The .xs() method (cross-section) looks across all higher levels to find any occurrences of a value in a lower level
- See documentation [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.xs.html#pandas.DataFrame.xs)

In [51]:
# CBD stands for "Central Business District"
# Multiple cities may have a district with that name
print("\n5. Cross-section: All CBD districts:")
print(cities.xs('CBD', level='District'))


5. Cross-section: All CBD districts:
                     Population  Temperature  Rainfall  GDP_per_capita  \
Country   City                                                           
Australia Melbourne      203776         17.7      1806           36776   
          Sydney         295259         20.2      1863           78925   

                     Air_Quality_Index  
Country   City                          
Australia Melbourne                 32  
          Sydney                    91  


In [52]:
# Get specific columns for certain locations
print("\n6. Population and Temperature for all districts in Paris:")
print(cities.loc[('France', 'Paris'), ['Population', 'Temperature']])


6. Population and Temperature for all districts in Paris:
            Population  Temperature
District                           
Le Marais       174276         15.9
Montmartre      201271         15.6
