## Advanced Indexing and Slicing

In [1]:
import pandas as pd
import numpy as np
arrays = [
    ['Q1', 'Q1', 'Q2', 'Q2', 'Q3', 'Q3', 'Q4', 'Q4'],
    ['Store A', 'Store B', 'Store A', 'Store B', 'Store A', 'Store B', 'Store A', 'Store B']
]
index = pd.MultiIndex.from_arrays(arrays, names=('Quarter','Store'))
index

MultiIndex([('Q1', 'Store A'),
            ('Q1', 'Store B'),
            ('Q2', 'Store A'),
            ('Q2', 'Store B'),
            ('Q3', 'Store A'),
            ('Q3', 'Store B'),
            ('Q4', 'Store A'),
            ('Q4', 'Store B')],
           names=['Quarter', 'Store'])

In [2]:
df_multi = pd.DataFrame( 
np.random.randint(5000, 10000, size=(8, 4)), 
index=index, 
columns=['Product A', 'Product B', 'Product C', 'Product D'] 
)
df = pd.DataFrame(df_multi)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Product A,Product B,Product C,Product D
Quarter,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Q1,Store A,7701,9726,5420,5038
Q1,Store B,5214,9540,6204,9708
Q2,Store A,6084,7837,8731,6904
Q2,Store B,5168,8909,5374,6676
Q3,Store A,5878,8565,5705,8231
Q3,Store B,8471,7509,7910,6217
Q4,Store A,9050,6949,6385,5176
Q4,Store B,6502,6548,6010,7643


In [3]:
# 61. From df_multi, select all data for 'Store A' across all quarters. 
df_multi.xs('Store A', level='Store')

Unnamed: 0_level_0,Product A,Product B,Product C,Product D
Quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Q1,7701,9726,5420,5038
Q2,6084,7837,8731,6904
Q3,5878,8565,5705,8231
Q4,9050,6949,6385,5176


In [4]:
# 62. Select the data for 'Product B' and 'Product D' in 'Q2'. 
df_multi.loc['Q2', ['Product B','Product D']]

Unnamed: 0_level_0,Product B,Product D
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
Store A,7837,6904
Store B,8909,6676


In [5]:
# 63. Use the .xs() method to select all data for 'Q3'. 
df_multi.xs('Q3', level='Quarter')

Unnamed: 0_level_0,Product A,Product B,Product C,Product D
Store,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Store A,5878,8565,5705,8231
Store B,8471,7509,7910,6217


In [6]:
# 64. Select all rows where sales for 'Product A' are greater than 8000. 
df_multi[df_multi['Product A'] > 8000]

Unnamed: 0_level_0,Unnamed: 1_level_0,Product A,Product B,Product C,Product D
Quarter,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Q3,Store B,8471,7509,7910,6217
Q4,Store A,9050,6949,6385,5176


In [7]:
# 65. Select the data for the first store ('Store A') in the first quarter ('Q1'). 
df_multi.loc[('Q1','Store A')]

Product A    7701
Product B    9726
Product C    5420
Product D    5038
Name: (Q1, Store A), dtype: int32

In [8]:
# 66. Swap the levels of the MultiIndex (i.e., make 'Store' the first level). 
df_multi.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,Product A,Product B,Product C,Product D
Store,Quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Store A,Q1,7701,9726,5420,5038
Store B,Q1,5214,9540,6204,9708
Store A,Q2,6084,7837,8731,6904
Store B,Q2,5168,8909,5374,6676
Store A,Q3,5878,8565,5705,8231
Store B,Q3,8471,7509,7910,6217
Store A,Q4,9050,6949,6385,5176
Store B,Q4,6502,6548,6010,7643


In [11]:
# 67. Sort the DataFrame df_multi based on the 'Store' level of the index. 
df_multi.sort_index(level='Store')

Unnamed: 0_level_0,Unnamed: 1_level_0,Product A,Product B,Product C,Product D
Quarter,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Q1,Store A,7701,9726,5420,5038
Q2,Store A,6084,7837,8731,6904
Q3,Store A,5878,8565,5705,8231
Q4,Store A,9050,6949,6385,5176
Q1,Store B,5214,9540,6204,9708
Q2,Store B,5168,8909,5374,6676
Q3,Store B,8471,7509,7910,6217
Q4,Store B,6502,6548,6010,7643


In [10]:
# 68. Select rows for both 'Q1' and 'Q4' using a list of labels in .loc. 
df_multi.loc[['Q1','Q4']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Product A,Product B,Product C,Product D
Quarter,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Q1,Store A,7701,9726,5420,5038
Q1,Store B,5214,9540,6204,9708
Q4,Store A,9050,6949,6385,5176
Q4,Store B,6502,6548,6010,7643


In [12]:
# 69. Calculate the total sales for each product only for 'Store B'. 
df_multi.xs('Store B', level='Store').sum()

Product A    25355
Product B    32506
Product C    25498
Product D    30244
dtype: int64

In [13]:
# 70. Select the last row for each quarter from df_multi. 
df_multi.groupby(level='Quarter').tail(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Product A,Product B,Product C,Product D
Quarter,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Q1,Store B,5214,9540,6204,9708
Q2,Store B,5168,8909,5374,6676
Q3,Store B,8471,7509,7910,6217
Q4,Store B,6502,6548,6010,7643
