# Boolean Selection 

This lesson covers:

* Boolean selection
* `where`

Begin by loading the data in momentum.csv.


In [1]:
# Setup: Load the momentum data

import numpy as np
import pandas as pd

momentum = pd.read_csv("data/momentum.csv", index_col="date", parse_dates=True)

print(momentum.head())

            mom_01  mom_02  mom_03  mom_04  mom_05  mom_06  mom_07  mom_08  \
date                                                                         
2016-01-04    0.67   -0.03   -0.93   -1.11   -1.47   -1.66   -1.40   -2.08   
2016-01-05   -0.36    0.20   -0.37    0.28    0.16    0.18   -0.22    0.25   
2016-01-06   -4.97   -2.33   -2.60   -1.16   -1.70   -1.45   -1.15   -1.46   
2016-01-07   -4.91   -1.91   -3.03   -1.87   -2.31   -2.30   -2.70   -2.31   
2016-01-08   -0.40   -1.26   -0.98   -1.26   -1.13   -1.02   -0.96   -1.42   

            mom_09  mom_10  
date                        
2016-01-04   -1.71   -2.67  
2016-01-05    0.29    0.13  
2016-01-06   -1.14   -0.45  
2016-01-07   -2.36   -2.66  
2016-01-08   -0.94   -1.32  


## Problem: Selecting rows with boolean conditions

Select the rows in `momentum` where all returns on a day are negative.

In [2]:
momentum.loc[(momentum < 0).all(axis=1)]

Unnamed: 0_level_0,mom_01,mom_02,mom_03,mom_04,mom_05,mom_06,mom_07,mom_08,mom_09,mom_10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-06,-4.97,-2.33,-2.60,-1.16,-1.70,-1.45,-1.15,-1.46,-1.14,-0.45
2016-01-07,-4.91,-1.91,-3.03,-1.87,-2.31,-2.30,-2.70,-2.31,-2.36,-2.66
2016-01-08,-0.40,-1.26,-0.98,-1.26,-1.13,-1.02,-0.96,-1.42,-0.94,-1.32
2016-01-13,-4.88,-3.06,-2.70,-2.00,-2.34,-2.18,-2.44,-2.77,-2.65,-3.71
2016-01-15,-3.85,-2.87,-1.92,-2.09,-1.38,-2.46,-1.88,-2.23,-2.28,-2.40
...,...,...,...,...,...,...,...,...,...,...
2017-10-25,-0.77,-0.72,-0.33,-0.83,-0.28,-0.52,-0.23,-0.48,-0.35,-1.52
2017-10-30,-0.01,-0.72,-0.50,-0.91,-0.73,-0.27,-0.42,-0.04,-0.41,-0.40
2017-11-15,-0.20,-0.64,-0.65,-0.13,-0.37,-0.41,-0.75,-0.37,-0.77,-0.59
2017-12-14,-1.20,-0.99,-0.78,-0.26,-0.38,-0.41,-0.76,-0.32,-0.60,-0.17


## Problem: Selecting rows

Select the rows in `momentum` where 50% or more of the returns on a day are negative.

In [3]:
momentum.loc[(momentum < 0).mean(axis=1) >= 0.5]

Unnamed: 0_level_0,mom_01,mom_02,mom_03,mom_04,mom_05,mom_06,mom_07,mom_08,mom_09,mom_10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-04,0.67,-0.03,-0.93,-1.11,-1.47,-1.66,-1.40,-2.08,-1.71,-2.67
2016-01-06,-4.97,-2.33,-2.60,-1.16,-1.70,-1.45,-1.15,-1.46,-1.14,-0.45
2016-01-07,-4.91,-1.91,-3.03,-1.87,-2.31,-2.30,-2.70,-2.31,-2.36,-2.66
2016-01-08,-0.40,-1.26,-0.98,-1.26,-1.13,-1.02,-0.96,-1.42,-0.94,-1.32
2016-01-11,-5.14,-0.81,-1.33,-0.72,-0.43,0.14,0.23,0.28,0.29,0.33
...,...,...,...,...,...,...,...,...,...,...
2017-12-19,-0.61,0.19,-0.41,-0.30,-0.17,-0.31,-0.41,-0.18,-0.15,-0.56
2017-12-22,0.24,0.13,-0.07,-0.01,-0.08,-0.10,-0.14,-0.16,-0.05,-0.01
2017-12-26,0.89,0.70,0.23,-0.03,0.10,-0.06,-0.14,-0.07,0.14,-0.65
2017-12-27,-0.58,-0.55,-0.27,-0.19,-0.14,0.18,0.18,0.10,0.22,0.36


## Problem: Selecting columns

Select the columns in `momentum` what have the smallest and second smallest average returns.

In [4]:
mean = momentum.mean()
mean = mean.sort_values()
mean

mom_09    0.042167
mom_07    0.051789
mom_10    0.060954
mom_08    0.062604
mom_05    0.069920
mom_04    0.079145
mom_06    0.080775
mom_03    0.089801
mom_02    0.094811
mom_01    0.101909
dtype: float64

In [5]:
momentum.loc[:, momentum.mean() < mean.iloc[2]]

Unnamed: 0_level_0,mom_07,mom_09
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-04,-1.40,-1.71
2016-01-05,-0.22,0.29
2016-01-06,-1.15,-1.14
2016-01-07,-2.70,-2.36
2016-01-08,-0.96,-0.94
...,...,...
2017-12-22,-0.14,-0.05
2017-12-26,-0.14,0.14
2017-12-27,0.18,0.22
2017-12-28,0.30,0.28


In [6]:
momentum[mean.index[:2]]

Unnamed: 0_level_0,mom_09,mom_07
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-04,-1.71,-1.40
2016-01-05,0.29,-0.22
2016-01-06,-1.14,-1.15
2016-01-07,-2.36,-2.70
2016-01-08,-0.94,-0.96
...,...,...
2017-12-22,-0.05,-0.14
2017-12-26,0.14,-0.14
2017-12-27,0.22,0.18
2017-12-28,0.28,0.30


## Problem: Selecting rows and columns

Select the returns for the column with the single most negative return
on days where all of the returns are negative. 

In [7]:
lowest = (momentum == momentum.min().min()).any()
lowest

mom_01     True
mom_02    False
mom_03    False
mom_04    False
mom_05    False
mom_06    False
mom_07    False
mom_08    False
mom_09    False
mom_10    False
dtype: bool

In [8]:
momentum.loc[(momentum < 0).all(axis=1), lowest]

Unnamed: 0_level_0,mom_01
date,Unnamed: 1_level_1
2016-01-06,-4.97
2016-01-07,-4.91
2016-01-08,-0.40
2016-01-13,-4.88
2016-01-15,-3.85
...,...
2017-10-25,-0.77
2017-10-30,-0.01
2017-11-15,-0.20
2017-12-14,-1.20


## Problem: Selecting Elements using Logical Statements
For portfolio 1 and portfolio 10 compute the correlation when both 
returns are negative and when both are positive.


In [9]:
mom_01 = momentum.mom_01
mom_10 = momentum.mom_10

mom_01_neg = mom_01 < 0
mom_10_neg = mom_10 < 0
both_neg = mom_01_neg & mom_10_neg

momentum.loc[both_neg, ["mom_01", "mom_10"]].corr()

Unnamed: 0,mom_01,mom_10
mom_01,1.0,0.368096
mom_10,0.368096,1.0


In [10]:
sub = momentum[["mom_01", "mom_10"]]
sub.loc[(sub > 0).all(axis=1)].corr()

Unnamed: 0,mom_01,mom_10
mom_01,1.0,0.165254
mom_10,0.165254,1.0


In [11]:
# Setup: Reproducible random numbers

rs = np.random.RandomState(19991231)
x = rs.randint(1, 11, size=(10, 3))
x

array([[ 5,  8, 10],
       [10,  5,  9],
       [ 6,  9, 10],
       [ 8,  5,  9],
       [ 2,  2,  4],
       [ 5,  6,  7],
       [10,  7,  2],
       [ 4,  3,  5],
       [ 6,  3,  9],
       [10,  1,  9]], dtype=int32)

## Problem: Select the columns of x that means >= $E[x]$

In [12]:
x[:, x.mean(axis=0) >= 5.5]

array([[ 5, 10],
       [10,  9],
       [ 6, 10],
       [ 8,  9],
       [ 2,  4],
       [ 5,  7],
       [10,  2],
       [ 4,  5],
       [ 6,  9],
       [10,  9]], dtype=int32)

## Problem: Select the rows of x that means >= $E[x]$

In [13]:
x[x.mean(axis=1) >= 5.5]

array([[ 5,  8, 10],
       [10,  5,  9],
       [ 6,  9, 10],
       [ 8,  5,  9],
       [ 5,  6,  7],
       [10,  7,  2],
       [ 6,  3,  9],
       [10,  1,  9]], dtype=int32)

## Problem: Select the rows and column of x where both have means < $E[x]$

In [14]:
rows = x.mean(axis=1) >= 5.5
cols = x.mean(axis=0) >= 5.5

x[np.ix_(rows, cols)]

array([[ 5, 10],
       [10,  9],
       [ 6, 10],
       [ 8,  9],
       [ 5,  7],
       [10,  2],
       [ 6,  9],
       [10,  9]], dtype=int32)

## Problem: Using `where`
Use `where` to select the index of the elements in portfolio 5 that are
negative. Next, use the `where` command in its two output form to determine
which elements of the portfolio return matrix are less than -2%.

In [15]:
mom_05 = momentum.mom_05
np.where(mom_05 < 0)

(array([  0,   2,   3,   4,   5,   7,   9,  10,  11,  14,  16,  19,  20,
         23,  24,  25,  26,  27,  31,  32,  34,  38,  44,  48,  49,  54,
         55,  56,  57,  60,  62,  63,  65,  70,  71,  75,  77,  79,  80,
         81,  83,  84,  87,  89,  90,  91,  93,  97, 100, 105, 109, 110,
        111, 112, 113, 114, 117, 118, 120, 121, 126, 135, 136, 138, 140,
        142, 143, 144, 145, 146, 148, 150, 152, 154, 156, 160, 162, 164,
        167, 168, 170, 173, 175, 176, 178, 180, 183, 184, 187, 189, 190,
        193, 195, 197, 199, 202, 205, 206, 207, 209, 210, 211, 212, 213,
        217, 223, 228, 230, 231, 232, 238, 240, 242, 245, 246, 247, 249,
        251, 254, 256, 261, 263, 265, 268, 270, 271, 275, 276, 277, 283,
        287, 290, 292, 293, 294, 295, 300, 302, 303, 304, 305, 307, 308,
        309, 314, 316, 318, 320, 321, 322, 324, 325, 327, 331, 332, 335,
        338, 339, 341, 342, 344, 345, 353, 357, 358, 360, 365, 368, 373,
        375, 379, 381, 382, 391, 393, 395, 398, 399

In [16]:
loc = np.where(mom_05 < 0)
mom_05.iloc[loc]

date
2016-01-04   -1.47
2016-01-06   -1.70
2016-01-07   -2.31
2016-01-08   -1.13
2016-01-11   -0.43
              ... 
2017-12-14   -0.38
2017-12-19   -0.17
2017-12-22   -0.08
2017-12-27   -0.14
2017-12-29   -0.45
Name: mom_05, Length: 219, dtype: float64

In [17]:
loc = np.where(momentum < 0)
# Two arrays, one for rows and one for columns where momentum < 0
print(loc)

(array([  0,   0,   0, ..., 502, 502, 502], shape=(2247,)), array([1, 2, 3, ..., 7, 8, 9], shape=(2247,)))


## Exercises

### Exercise: Select the Most Volatile Portfolio

Select the column in momentum that has the highest standard deviation.

In [18]:
max_std = momentum.std().max()
momentum.loc[:, momentum.std() == max_std].head()

Unnamed: 0_level_0,mom_01
date,Unnamed: 1_level_1
2016-01-04,0.67
2016-01-05,-0.36
2016-01-06,-4.97
2016-01-07,-4.91
2016-01-08,-0.4


### Exercise: Select the High Kurtosis Portfolios

Select the columns that have kurtoses above the median kurtosis.

In [19]:
kurtosis = momentum.kurt()
sel = kurtosis > kurtosis.median()
momentum.loc[:, sel].head()

Unnamed: 0_level_0,mom_01,mom_03,mom_04,mom_05,mom_06
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,0.67,-0.93,-1.11,-1.47,-1.66
2016-01-05,-0.36,-0.37,0.28,0.16,0.18
2016-01-06,-4.97,-2.6,-1.16,-1.7,-1.45
2016-01-07,-4.91,-3.03,-1.87,-2.31,-2.3
2016-01-08,-0.4,-0.98,-1.26,-1.13,-1.02


### Exercise: Select 

Select the rows where all of the returns in the row are less than the 25%
quantile for their portfolio.

**Note**: Comparisons between `DataFrame`s and `Series` works like mathematical
operations (`+`, `-`, etc.).

In [20]:
quantiles = momentum.quantile(0.25)
less_than_quantile = momentum < quantiles
less_than_quantile.head()

Unnamed: 0_level_0,mom_01,mom_02,mom_03,mom_04,mom_05,mom_06,mom_07,mom_08,mom_09,mom_10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-04,False,False,True,True,True,True,True,True,True,True
2016-01-05,False,False,True,False,False,False,False,False,False,False
2016-01-06,True,True,True,True,True,True,True,True,True,True
2016-01-07,True,True,True,True,True,True,True,True,True,True
2016-01-08,False,True,True,True,True,True,True,True,True,True


In [21]:
sel = less_than_quantile.all(axis=1)
momentum.loc[sel]

Unnamed: 0_level_0,mom_01,mom_02,mom_03,mom_04,mom_05,mom_06,mom_07,mom_08,mom_09,mom_10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-06,-4.97,-2.33,-2.6,-1.16,-1.7,-1.45,-1.15,-1.46,-1.14,-0.45
2016-01-07,-4.91,-1.91,-3.03,-1.87,-2.31,-2.3,-2.7,-2.31,-2.36,-2.66
2016-01-13,-4.88,-3.06,-2.7,-2.0,-2.34,-2.18,-2.44,-2.77,-2.65,-3.71
2016-01-15,-3.85,-2.87,-1.92,-2.09,-1.38,-2.46,-1.88,-2.23,-2.28,-2.4
2016-01-25,-5.97,-3.67,-2.34,-2.16,-1.85,-1.71,-1.33,-1.44,-1.62,-1.12
2016-01-27,-0.68,-0.87,-0.47,-0.61,-1.9,-0.97,-0.53,-0.63,-1.16,-2.02
2016-02-02,-4.57,-3.03,-2.67,-2.83,-2.09,-1.25,-1.86,-1.98,-1.91,-1.7
2016-02-05,-2.49,-2.4,-1.52,-1.77,-1.77,-0.93,-1.47,-1.62,-2.04,-3.91
2016-02-08,-5.37,-3.92,-1.59,-1.08,-0.52,-1.26,-1.85,-1.0,-1.84,-2.01
2016-02-23,-4.09,-2.74,-1.65,-1.52,-1.7,-1.4,-1.12,-0.92,-0.42,-1.01
