# Accessing data in a MultiIndex DataFrame

In [5]:
import pandas as pd
from pandas import IndexSlice as idx

In [6]:
df = pd.read_csv("data/dataset.csv", index_col=[0, 1], header=[0, 1])

In [7]:
df = df.sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Day,Day,Day,Night,Night,Night
Unnamed: 0_level_1,Unnamed: 1_level_1,Weather,Wind,Max Temperature,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Cambridge,2019-07-01,Shower,SW 16 mph,24,Shower,SW 16 mph,17
Cambridge,2019-07-02,Scattered showers,E 20 mph,21,Scattered showers,E 20 mph,16
Cambridge,2019-07-03,Scattered showers,SE 10 mph,22,Scattered showers,SE 10 mph,16
Cambridge,2019-07-04,Shower,S 25 mph,22,Mostly cloudy,S 25 mph,16
London,2019-07-01,Shower,SW 16 mph,28,Shower,SW 16 mph,18
London,2019-07-02,Shower,SW 16 mph,29,Heavy rain,SW 16 mph,17
London,2019-07-03,Scattered showers,SW 16 mph,29,Heavy rain,SW 16 mph,19
London,2019-07-04,Mostly cloudy,SW 16 mph,31,Mostly cloudy,SW 16 mph,23
Oxford,2019-07-01,Shower,SW 13 mph,25,Shower,SW 16 mph,19
Oxford,2019-07-02,Shower,SW 16 mph,26,Shower,SW 16 mph,19


#### View Index Levels

In [8]:
df.columns.levels

FrozenList([['Day', 'Night'], ['Max Temperature', 'Weather', 'Wind']])

In [9]:
df.index.levels

FrozenList([['Cambridge', 'London', 'Oxford'], ['2019-07-01', '2019-07-02', '2019-07-03', '2019-07-04']])

In [10]:
df.columns.get_level_values(0)

Index(['Day', 'Day', 'Day', 'Night', 'Night', 'Night'], dtype='object')

In [11]:
df.columns.get_level_values(1)

Index(['Weather', 'Wind', 'Max Temperature', 'Weather', 'Wind',
       'Max Temperature'],
      dtype='object')

## 1. Accessing first level

In [12]:
df.loc["London", "Day"]

Unnamed: 0_level_0,Weather,Wind,Max Temperature
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-07-01,Shower,SW 16 mph,28
2019-07-02,Shower,SW 16 mph,29
2019-07-03,Scattered showers,SW 16 mph,29
2019-07-04,Mostly cloudy,SW 16 mph,31


In [13]:
# To get all rows
df.loc[:, "Day"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cambridge,2019-07-01,Shower,SW 16 mph,24
Cambridge,2019-07-02,Scattered showers,E 20 mph,21
Cambridge,2019-07-03,Scattered showers,SE 10 mph,22
Cambridge,2019-07-04,Shower,S 25 mph,22
London,2019-07-01,Shower,SW 16 mph,28
London,2019-07-02,Shower,SW 16 mph,29
London,2019-07-03,Scattered showers,SW 16 mph,29
London,2019-07-04,Mostly cloudy,SW 16 mph,31
Oxford,2019-07-01,Shower,SW 13 mph,25
Oxford,2019-07-02,Shower,SW 16 mph,26


In [14]:
# To get all columns
df.loc["London", :]

Unnamed: 0_level_0,Day,Day,Day,Night,Night,Night
Unnamed: 0_level_1,Weather,Wind,Max Temperature,Weather,Wind,Max Temperature
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2019-07-01,Shower,SW 16 mph,28,Shower,SW 16 mph,18
2019-07-02,Shower,SW 16 mph,29,Heavy rain,SW 16 mph,17
2019-07-03,Scattered showers,SW 16 mph,29,Heavy rain,SW 16 mph,19
2019-07-04,Mostly cloudy,SW 16 mph,31,Mostly cloudy,SW 16 mph,23


In [15]:
# This also works
df.loc["London", "2019-07-02"]

Day    Weather                Shower
       Wind                SW 16 mph
       Max Temperature            29
Night  Weather            Heavy rain
       Wind                SW 16 mph
       Max Temperature            17
Name: (London, 2019-07-02), dtype: object

In [16]:
# Equivalent to df.loc['London', 'Day']
df.loc[("London",), ("Day",)]

Unnamed: 0_level_0,Weather,Wind,Max Temperature
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-07-01,Shower,SW 16 mph,28
2019-07-02,Shower,SW 16 mph,29
2019-07-03,Scattered showers,SW 16 mph,29
2019-07-04,Mostly cloudy,SW 16 mph,31


In [17]:
# Equivalent to df.loc[:, 'Day']
df.loc[:, ("Day",)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cambridge,2019-07-01,Shower,SW 16 mph,24
Cambridge,2019-07-02,Scattered showers,E 20 mph,21
Cambridge,2019-07-03,Scattered showers,SE 10 mph,22
Cambridge,2019-07-04,Shower,S 25 mph,22
London,2019-07-01,Shower,SW 16 mph,28
London,2019-07-02,Shower,SW 16 mph,29
London,2019-07-03,Scattered showers,SW 16 mph,29
London,2019-07-04,Mostly cloudy,SW 16 mph,31
Oxford,2019-07-01,Shower,SW 13 mph,25
Oxford,2019-07-02,Shower,SW 16 mph,26


In [18]:
# Equivalent to df.loc['London' , :]
df.loc[("London",), :]

Unnamed: 0_level_0,Day,Day,Day,Night,Night,Night
Unnamed: 0_level_1,Weather,Wind,Max Temperature,Weather,Wind,Max Temperature
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2019-07-01,Shower,SW 16 mph,28,Shower,SW 16 mph,18
2019-07-02,Shower,SW 16 mph,29,Heavy rain,SW 16 mph,17
2019-07-03,Scattered showers,SW 16 mph,29,Heavy rain,SW 16 mph,19
2019-07-04,Mostly cloudy,SW 16 mph,31,Mostly cloudy,SW 16 mph,23


In [19]:
# Equivalent to df.loc['London' , '2019-07-02']
df.loc[("London", "2019-07-02")]

Day    Weather                Shower
       Wind                SW 16 mph
       Max Temperature            29
Night  Weather            Heavy rain
       Wind                SW 16 mph
       Max Temperature            17
Name: (London, 2019-07-02), dtype: object

## 2. Multi-level index

In [20]:
df.loc["London", "Day"].loc["2019-07-01"]

Weather               Shower
Wind               SW 16 mph
Max Temperature           28
Name: 2019-07-01, dtype: object

In [21]:
# Use tuple
df.loc[("London", "2019-07-01"), "Day"]

Weather               Shower
Wind               SW 16 mph
Max Temperature           28
Name: (London, 2019-07-01), dtype: object

In [22]:
# Select multiple rows
df.loc[("London", ["2019-07-01", "2019-07-02"]), "Day"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
London,2019-07-01,Shower,SW 16 mph,28
London,2019-07-02,Shower,SW 16 mph,29


In [23]:
# Select multiple columns
df.loc["London", ("Day", ["Weather", "Wind"])]

Unnamed: 0_level_0,Day,Day
Unnamed: 0_level_1,Weather,Wind
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2019-07-01,Shower,SW 16 mph
2019-07-02,Shower,SW 16 mph
2019-07-03,Scattered showers,SW 16 mph
2019-07-04,Mostly cloudy,SW 16 mph


## 3. Selecting a range of data via slice

In [24]:
# First level index
df.loc["Cambridge":"Oxford", "Day"]

# Above is a shorthand to
# df.loc[
#     ('Cambridge', ):('Oxford', ),
#     'Day'
# ]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cambridge,2019-07-01,Shower,SW 16 mph,24
Cambridge,2019-07-02,Scattered showers,E 20 mph,21
Cambridge,2019-07-03,Scattered showers,SE 10 mph,22
Cambridge,2019-07-04,Shower,S 25 mph,22
London,2019-07-01,Shower,SW 16 mph,28
London,2019-07-02,Shower,SW 16 mph,29
London,2019-07-03,Scattered showers,SW 16 mph,29
London,2019-07-04,Mostly cloudy,SW 16 mph,31
Oxford,2019-07-01,Shower,SW 13 mph,25
Oxford,2019-07-02,Shower,SW 16 mph,26


In [25]:
# We are getting a SyntaxError
df.loc[
    ('London', '2019-07-01':'2019-07-03'),
    'Day'
]

SyntaxError: invalid syntax (<ipython-input-25-176180497f92>, line 3)

In [26]:
# Correct way to slice
df.loc[("London", "2019-07-01"):("London", "2019-07-03"), "Day"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
London,2019-07-01,Shower,SW 16 mph,28
London,2019-07-02,Shower,SW 16 mph,29
London,2019-07-03,Scattered showers,SW 16 mph,29


In [27]:
df.loc[("Cambridge", "2019-07-01"):("London", "2019-07-02"), "Day"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cambridge,2019-07-01,Shower,SW 16 mph,24
Cambridge,2019-07-02,Scattered showers,E 20 mph,21
Cambridge,2019-07-03,Scattered showers,SE 10 mph,22
Cambridge,2019-07-04,Shower,S 25 mph,22
London,2019-07-01,Shower,SW 16 mph,28
London,2019-07-02,Shower,SW 16 mph,29


## 4. Selecting all content using `slice(None)`

In [28]:
# Getting SyntaxError
df.loc[ 
    ( : , '2019-07-04'),
    'Day'
]

SyntaxError: invalid syntax (<ipython-input-28-e8f48c36d5e3>, line 3)

In [29]:
# Getting SyntaxError
df.loc[ 
    ('London', :),
    'Day'
]

SyntaxError: invalid syntax (<ipython-input-29-ca28e8185688>, line 3)

In [30]:
# The correct way to select all content
df.loc[("London", slice(None)), "Day"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
London,2019-07-01,Shower,SW 16 mph,28
London,2019-07-02,Shower,SW 16 mph,29
London,2019-07-03,Scattered showers,SW 16 mph,29
London,2019-07-04,Mostly cloudy,SW 16 mph,31


In [31]:
# The correct way to select all content
df.loc[(slice(None), "2019-07-04"), "Day"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cambridge,2019-07-04,Shower,S 25 mph,22
London,2019-07-04,Mostly cloudy,SW 16 mph,31
Oxford,2019-07-04,Shower,SW 14 mph,25


## 5. Using cross-section `xs()`

In [32]:
# The xs() method of DataFrame additionally takes a level argument
# to make selecting data at a particular level of a MultiIndex easier.
df.xs("2019-07-04", level="Date")

Unnamed: 0_level_0,Day,Day,Day,Night,Night,Night
Unnamed: 0_level_1,Weather,Wind,Max Temperature,Weather,Wind,Max Temperature
City,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Cambridge,Shower,S 25 mph,22,Mostly cloudy,S 25 mph,16
London,Mostly cloudy,SW 16 mph,31,Mostly cloudy,SW 16 mph,23
Oxford,Shower,SW 14 mph,25,Shower,SW 16 mph,21


In [33]:
# multiple keys
df.xs(("London", "2019-07-04"), level=["City", "Date"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Day,Day,Day,Night,Night,Night
Unnamed: 0_level_1,Unnamed: 1_level_1,Weather,Wind,Max Temperature,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
London,2019-07-04,Mostly cloudy,SW 16 mph,31,Mostly cloudy,SW 16 mph,23


In [34]:
# You can also select on the columns with xs, by providing the axis argument.
df.xs("Wind", level=1, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Day,Night
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
Cambridge,2019-07-01,SW 16 mph,SW 16 mph
Cambridge,2019-07-02,E 20 mph,E 20 mph
Cambridge,2019-07-03,SE 10 mph,SE 10 mph
Cambridge,2019-07-04,S 25 mph,S 25 mph
London,2019-07-01,SW 16 mph,SW 16 mph
London,2019-07-02,SW 16 mph,SW 16 mph
London,2019-07-03,SW 16 mph,SW 16 mph
London,2019-07-04,SW 16 mph,SW 16 mph
Oxford,2019-07-01,SW 13 mph,SW 16 mph
Oxford,2019-07-02,SW 16 mph,SW 16 mph


In [35]:
df.xs("Wind", level=1, axis=1, drop_level=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Day,Night
Unnamed: 0_level_1,Unnamed: 1_level_1,Wind,Wind
City,Date,Unnamed: 2_level_2,Unnamed: 3_level_2
Cambridge,2019-07-01,SW 16 mph,SW 16 mph
Cambridge,2019-07-02,E 20 mph,E 20 mph
Cambridge,2019-07-03,SE 10 mph,SE 10 mph
Cambridge,2019-07-04,S 25 mph,S 25 mph
London,2019-07-01,SW 16 mph,SW 16 mph
London,2019-07-02,SW 16 mph,SW 16 mph
London,2019-07-03,SW 16 mph,SW 16 mph
London,2019-07-04,SW 16 mph,SW 16 mph
Oxford,2019-07-01,SW 13 mph,SW 16 mph
Oxford,2019-07-02,SW 16 mph,SW 16 mph


## 6. Use `IndexSlice`

In [36]:
from pandas import IndexSlice as idx

In [37]:
df.loc[idx[:, "2019-07-04"], "Day"]

# Equivlant to
# df.loc[
#     (slice(None) , '2019-07-04'),
#     'Day'
# ]

Unnamed: 0_level_0,Unnamed: 1_level_0,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cambridge,2019-07-04,Shower,S 25 mph,22
London,2019-07-04,Mostly cloudy,SW 16 mph,31
Oxford,2019-07-04,Shower,SW 14 mph,25


In [38]:
rows = idx[:, "2019-07-01"]
cols = idx["Day", ["Max Temperature", "Weather"]]

df.loc[rows, cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,Day,Day
Unnamed: 0_level_1,Unnamed: 1_level_1,Max Temperature,Weather
City,Date,Unnamed: 2_level_2,Unnamed: 3_level_2
Cambridge,2019-07-01,24,Shower
London,2019-07-01,28,Shower
Oxford,2019-07-01,25,Shower


In [39]:
# Use xs() with IndexSlice for selecting a range of data
rows = (idx["2019-07-02":"2019-07-04"], "London")

df.xs(rows, level=["Date", "City"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Day,Day,Day,Night,Night,Night
Unnamed: 0_level_1,Unnamed: 1_level_1,Weather,Wind,Max Temperature,Weather,Wind,Max Temperature
City,Date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
London,2019-07-02,Shower,SW 16 mph,29,Heavy rain,SW 16 mph,17
London,2019-07-03,Scattered showers,SW 16 mph,29,Heavy rain,SW 16 mph,19
London,2019-07-04,Mostly cloudy,SW 16 mph,31,Mostly cloudy,SW 16 mph,23
