In [18]:
import pandas as pd
import itertools # only need this to generate some fake data

print 'Pandas version: {}'.format(pd.__version__)

Pandas version: 0.15.2


#Assigning with boolean indexing

The task is to change the contents of a dataframe entry depending on what the values of the other entries (within the same row). The steps to take are:

1. Create a random dataframe.
2. Create an boolean index Series that will have True and False depending on if the conditions are true.
3. Use `loc[rows, columns]` where rows will be the boolean index Sieres, and columns will be the names of the particular columns we want to manipulate.

##1. Create a random dataframe.

In [17]:
def expand_grid(data_dict):
    rows = itertools.product(*data_dict.values())
    return pd.DataFrame.from_records(rows, columns=data_dict.keys())

df = expand_grid(
       {'height': [60, 70],
        'weight': [100, 140, 180],
        'sex': ['Male', 'Female', 'Unisex']})
print 'df before'
df

df before


Unnamed: 0,sex,weight,height
0,Male,100,60
1,Male,100,70
2,Male,140,60
3,Male,140,70
4,Male,180,60
5,Male,180,70
6,Female,100,60
7,Female,100,70
8,Female,140,60
9,Female,140,70


##2. Create an boolean index Series that will have True and False depending on if the conditions are true.

In [13]:
df['new column'] = 0
inds = (df['sex'] == 'Male') & \
       (df['height'] == 70)
inds

0     False
1      True
2     False
3      True
4     False
5      True
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
dtype: bool

##3. Use `loc[rows, columns]` where rows will be the boolean index Sieres, and columns will be the names of the particular columns we want to manipulate.

In [14]:
df.loc[inds,'new column'] = df.loc[inds,'height']
print 'df after'
df

df after


Unnamed: 0,sex,weight,height,new column
0,Male,100,60,0
1,Male,100,70,70
2,Male,140,60,0
3,Male,140,70,70
4,Male,180,60,0
5,Male,180,70,70
6,Female,100,60,0
7,Female,100,70,0
8,Female,140,60,0
9,Female,140,70,0


###Side note
We can also use the `in` to test if an entry exists within a list. The following statements are equivalent:
```
(df['column'] == 'va11') | (df['column'] == 'va12') | (df['column'] == 'va13')
df['column'].map(lambda x: x in ['val1', 'val2', 'val3'])
```
Using the example data above:

In [19]:
inds = (df['weight'].map(lambda x: x in [100, 180])) & \
       (df['sex'].map(lambda x: x in ['Male', 'Female']))
inds

0      True
1      True
2     False
3     False
4      True
5      True
6      True
7      True
8     False
9     False
10     True
11     True
12    False
13    False
14    False
15    False
16    False
17    False
dtype: bool