# Where

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import pandas as pd
import numpy as np
import xarray as xr

def show_component(cmp):
    s = '\n{}'.format(cmp)
    return s.replace('\n', '\n\t')

def show_df(df):
    from IPython.display import display, HTML
    html = '<div style="margin-left:55px">{}</div>'.format(df.to_html())
    display(HTML(html))

## 条件表达式

### 在 Series 上使用 where

In [None]:
s = pd.Series(range(5))
print('* when series "s" is: {}'.format(show_component(s)))

#### 条件表达式

In [None]:
mask = (s > 2)
print('* mask "s > 2" is: {}'.format(show_component(mask)))

#### 利用条件表达式过滤数据（保留符合条件的结果）

In [None]:
r = s.where(mask)
print('* "s.where(mask)" is: {}'.format(show_component(r)))

r = r.dropna()
print('\n after "dropna", "s.where(mask)" is: {}'.format(show_component(r)))

#### 利用条件表达式过滤数据（删除符合条件的结果）

In [None]:
r = s.mask(mask)
print('* "s.mask(mask) is: {}'.format(show_component(r)))

r = r.dropna()
print('\n after "dropna", "s.mask(mask)" is: {}'.format(show_component(r)))

### 使用默认值填充

In [None]:
r = s.where((s % 2 == 0), '?')
print('* "s.where(s % 2 == 0, \'?\')" is: {}'.format(show_component(r)))

r = s.where((s % 2 == 0), -s)
print('\n* "s.where(s % 2 == 0, -s)" is: {}'.format(show_component(r)))

r = s.where((s % 2 == 0), s * 100)
print('\n* "s.where(s % 2 == 0, s * 100)" is: {}'.format(show_component(r)))

### 在 DataFrame 上使用 where

In [None]:
rows = 6

df = pd.DataFrame({
    'A': np.random.rand(rows) * (10 - 1) + 1,
    'B': pd.date_range('20190101', periods=rows),
    'C': pd.Series(np.arange(0.1, 0.7, step=0.1), index=[chr(0x61 + n) for n in range(rows)], dtype='float32'),
    'D': np.array([3] * rows, dtype='int32'),
    'E': pd.Categorical(np.tile(['test', 'train'], reps=rows//2)),
    'F': 'foo'
})

print('* when data frame “df” is:')
show_df(df)

#### 整体条件处理

In [None]:
df_ = df.loc[:, ['A', 'C', 'D']]
print('* when data frame “df” is:')
show_df(df_)

df_ = df_.where((df_ >= 3))
print('\n  "df.where(df >= 3)" is:')
show_df(df_)

df_ = df_.where((pd.notna(df_)), None)
print('\n  "df.where(pd.notna(df), None)" is:')
show_df(df_)

df_ = df_.where((pd.notnull(df_)), -1)
print('\n  "df.where(pd.notnull(df), -1)" is:')
show_df(df_)

#### 按列条件过滤

- 注意，在组合多个条件表达式时，可以用`&`(and)或`|`(or)连接，且每个表达式必须使用`()`引用，例如:

    ```python
     condition = (df_['B'] >= '2019-01-03') & (df_['C'] > 0.4)
    ```

In [None]:
df_ = df.copy()
print('* when data frame “df” is:')
show_df(df_)

df_ = df_.where((df_['A'] >= 3))
print('\n  "df_.where(df_[\'A\'] >= 3)" is:')
show_df(df_)

df_ = df_.where((pd.notna(df_)), None)
print('\n  "df.where(pd.notna(df), None)" is:')
show_df(df_)

df_ = df_.where((pd.notnull(df_)))
print('\n  "df.where(pd.notnull(df))" is:')
show_df(df_)

df_ = df_.dropna()
print('\n  "df.dropna()" is:')
show_df(df_)

df_ = df_.where((df_['B'] >= '2019-01-03') & (df_['C'] > 0.4))
print('\n  "df.where((df[\'B\'] >= \'2019-01-03\') & (df[\'C\'] > 0.4))" is:')
show_df(df_.dropna())