# Introduction to Logicals in Python

In [4]:
import math
import numpy as np
import pandas as pd

We will use the following data from a randomized experiment.  Let's load it in and come back to it shortly.

In [54]:
resume = pd.read_csv('../data/resume.csv')
resume.shape

(4870, 4)

In [55]:
resume.head()

Unnamed: 0,firstname,sex,race,call
0,Allison,female,white,0
1,Kristen,female,white,0
2,Lakisha,female,black,0
3,Latonya,female,black,0
4,Carrie,female,white,0


In [56]:
type(True)

bool

In [57]:
int(True)

1

In [58]:
int(False)

0

In [59]:
x = pd.Series([True, False, True]) # a vector with boolean values 
x.mean().round(2) # proportion of True values

0.67

In [60]:
x.sum() # number of True values

2

In [61]:
False & True

False

In [62]:
True & True

True

In [63]:
True | False 

True

In [64]:
False | False

False

In [65]:
True & False & True

False

In [15]:
# Parentheses evaluate to False
(True | False) & False

False

In [16]:
# Parentheses evaluate to True
True | (False & False)

True

In [17]:
# Vector-wise logical operations 
TF1 = pd.Series([True, False, False])
TF2 = pd.Series([True, False, True])
TF1 | TF2

0     True
1    False
2     True
dtype: bool

In [18]:
TF1 & TF2

0     True
1    False
2    False
dtype: bool

In [19]:
4 > 3

True

In [20]:
"Hello" == "hello" # Python is case-sensitive

False

In [21]:
"Hello" != "hello"

True

In [23]:
x = pd.Series([3, 2, 1, -2, -1])
x >= 2

0     True
1     True
2    False
3    False
4    False
dtype: bool

In [24]:
x != 1

0     True
1     True
2    False
3     True
4     True
dtype: bool

In [25]:
# logical conjunction of two vectors with boolean values
(x > 0) & (x <= 2)

0    False
1     True
2     True
3    False
4    False
dtype: bool

In [26]:
# logical disjunction of two vectors with boolean values
(x > 2) | (x <= -1)

0     True
1    False
2    False
3     True
4     True
dtype: bool

In [27]:
x_int = (x > 0) & (x <= 2) # logical vector 
x_int

0    False
1     True
2     True
3    False
4    False
dtype: bool

In [28]:
x_int.mean() # proportion of True values

0.4

In [29]:
x_int.sum() # number of True values

2

Going back to the resume experiment!

In [66]:
# race of the first 5 observations
resume['race'][0:5]

0    white
1    white
2    black
3    black
4    white
Name: race, dtype: object

In [67]:
# comparison of first 5 observations
resume['race'][0:5] == 'black'

0    False
1    False
2     True
3     True
4    False
Name: race, dtype: bool

In [68]:
resume.shape # dimensions of the original data frame

(4870, 4)

In [69]:
# subset blacks only
resumeB = resume.loc[resume.race == 'black'].copy()
resumeW = resume.loc[resume.race == 'white'].copy()
resumeB.shape # this data frame has fewer rows than the original 

(2435, 4)

In [70]:
resumeB['call'].mean() # callback rate for blacks

0.06447638603696099

In [71]:
resumeW['call'].mean()

0.09650924024640657

In [72]:
# compute callback rate for each first name
callback_name = resume.groupby('firstname')['call'].mean()

# look at the names with the lowest callback rates
callback_name.sort_values().head(n=10)

firstname
Aisha       0.022222
Rasheed     0.029851
Keisha      0.038251
Tremayne    0.043478
Kareem      0.046875
Darnell     0.047619
Tyrone      0.053333
Hakim       0.054545
Tamika      0.054688
Lakisha     0.055000
Name: call, dtype: float64

#### Your turn!

Please answer the following questions:

1. How does randomization help the scientific question here?
2. What effect does race have on callback rates?
3. Does that effect vary by gender?