# 第2章　表の変形と結合・分割

## 2.1 DataFrameの部分選択

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame(
    data=np.arange(20).reshape(5, 4),
    index=['a', 'b', 'c', 'd', 'e'],
    columns=['Alpha', 'Bravo', 'Charlie', 'Delta']
)

df

Unnamed: 0,Alpha,Bravo,Charlie,Delta
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15
e,16,17,18,19


In [2]:
df.at['d', 'Bravo']

13

In [3]:
df.iat[3, 1]    # df.at['d', 'Bravo'] と同じ

13

In [4]:
df.loc['d', :]  # df.loc['d'] と同じ

Alpha      12
Bravo      13
Charlie    14
Delta      15
Name: d, dtype: int64

In [5]:
df.loc[['d'], :]    # df.loc[['d']] と同じ

Unnamed: 0,Alpha,Bravo,Charlie,Delta
d,12,13,14,15


In [6]:
df.loc[:, 'Bravo']  # df['Bravo'] と同じ

a     1
b     5
c     9
d    13
e    17
Name: Bravo, dtype: int64

In [7]:
df.loc[:, ['Bravo']]

Unnamed: 0,Bravo
a,1
b,5
c,9
d,13
e,17


In [8]:
df.loc[['a', 'c', 'e'], :]  # df.loc[['a', 'c', 'e']] と同じ

Unnamed: 0,Alpha,Bravo,Charlie,Delta
a,0,1,2,3
c,8,9,10,11
e,16,17,18,19


In [9]:
df.loc[:, ['Bravo', 'Delta']]   # df[['Bravo', 'Delta']] と同じ

Unnamed: 0,Bravo,Delta
a,1,3
b,5,7
c,9,11
d,13,15
e,17,19


In [10]:
df.loc[['a', 'c', 'e'], ['Bravo', 'Delta']]

Unnamed: 0,Bravo,Delta
a,1,3
c,9,11
e,17,19


In [11]:
df.iloc[[0, 2, 4], [1, 3]]

Unnamed: 0,Bravo,Delta
a,1,3
c,9,11
e,17,19


In [12]:
# df.loc['b':'d', 'Bravo':'Delta'] でも実現できるがilocを使ったほうが分かりやすい
df.iloc[1:4, 1:4] 

Unnamed: 0,Bravo,Charlie,Delta
b,5,6,7
c,9,10,11
d,13,14,15


In [13]:
row_indices = np.array([0, 2, 3])
col_indices = np.arange(1, 4)

df.iloc[row_indices, col_indices]

Unnamed: 0,Bravo,Charlie,Delta
a,1,2,3
c,9,10,11
d,13,14,15


In [14]:
N = 100000
row_indices = np.random.randint(0, df.shape[0], N)
col_indices = np.random.randint(0, df.shape[1], N)

In [15]:
%%timeit

for row_index, col_index in zip(row_indices, col_indices):
    df.iloc[row_index, col_index]

1.68 s ± 53.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%timeit

for row_index, col_index in zip(row_indices, col_indices):
    df.iat[row_index, col_index]

1.2 s ± 14.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
df.filter(['Alpha', 'Charlie'])

Unnamed: 0,Alpha,Charlie
a,0,2
b,4,6
c,8,10
d,12,14
e,16,18


In [18]:
df.filter(like='r')

Unnamed: 0,Bravo,Charlie
a,1,2
b,5,6
c,9,10
d,13,14
e,17,18


In [19]:
df.filter(regex='.*l.*a')

Unnamed: 0,Alpha,Delta
a,0,3
b,4,7
c,8,11
d,12,15
e,16,19


In [20]:
df.filter(['a', 'e'], axis=0)   # df.filter(['a', 'e'], axis='index') でも同じ

Unnamed: 0,Alpha,Bravo,Charlie,Delta
a,0,1,2,3
e,16,17,18,19


In [21]:
row_mask = [True, False, False, True, True]

df.loc[row_mask]    # df.iloc[row_bool] や df[row_bool] と同じ

Unnamed: 0,Alpha,Bravo,Charlie,Delta
a,0,1,2,3
d,12,13,14,15
e,16,17,18,19


In [22]:
col_mask = [False, True, False, True]

df.loc[row_mask, col_mask]  # df.iloc[row_mask, col_mask] と同じ

Unnamed: 0,Bravo,Delta
a,1,3
d,13,15
e,17,19


In [23]:
row_mask = df.loc[:, 'Alpha'] > 6   # df['Alpha'] > 6 でも同じ

row_mask

a    False
b    False
c     True
d     True
e     True
Name: Alpha, dtype: bool

In [24]:
col_mask = df.loc['d', :] % 2 == 0  # df.loc['d'] % 2 == 0 でも同じ

col_mask

Alpha       True
Bravo      False
Charlie     True
Delta      False
Name: d, dtype: bool

In [25]:
df.loc[row_mask, col_mask]

Unnamed: 0,Alpha,Charlie
c,8,10
d,12,14
e,16,18


In [26]:
df.loc[~row_mask, ~col_mask]

Unnamed: 0,Bravo,Delta
a,1,3
b,5,7


In [27]:
# c, e の行がTrueになる二値マスク
row_mask2 = df.loc[:, 'Alpha'].isin([8, 16])    # df['Alpha'].isin([8, 16]) でも同じ

# Delta の列がTrueになる二値マスク
col_mask2 = df.loc['b', :] == 7             # df.loc['b'] == 7 でも同じ

df.loc[row_mask & row_mask2, col_mask | col_mask2]

Unnamed: 0,Alpha,Charlie,Delta
c,8,10,11
e,16,18,19


In [28]:
df.isin([0, 5, 10, 15])

Unnamed: 0,Alpha,Bravo,Charlie,Delta
a,True,False,False,False
b,False,True,False,False
c,False,False,True,False
d,False,False,False,True
e,False,False,False,False


In [29]:
row_mask = df['Alpha'].isin([0, 8, 16])

df.loc[row_mask]

Unnamed: 0,Alpha,Bravo,Charlie,Delta
a,0,1,2,3
c,8,9,10,11
e,16,17,18,19


In [30]:
df.query('Alpha > 6')

Unnamed: 0,Alpha,Bravo,Charlie,Delta
c,8,9,10,11
d,12,13,14,15
e,16,17,18,19


In [31]:
%%timeit

df.loc[df.loc[:, 'Alpha'] > 6]
df.loc[df.loc[:, 'Bravo'] == 9]
df.loc[df.loc[:, 'Charlie'] < 12]

566 µs ± 6.54 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [32]:
%%timeit

df.query('Alpha > 6')
df.query('Bravo == 9')
df.query('Charlie < 12')

1.93 ms ± 34 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
