In [15]:
import numpy as np
file_name = "./data/house-votes-84.data"
data = np.genfromtxt(file_name, dtype=np.str,delimiter=',')

# We create a count matrix containing the number of votes by Republican and Democrat representatives on each issue
# We also keep track of the total votes on each issue
n_representatives = data.shape[0]
n_issues = data.shape[1]-1
count_matrix = np.zeros((2,n_issues))
total_votes = np.zeros((2,n_issues))
n_reps = 0
n_dems = 0
for ind in range(n_representatives):
    if data[ind][0] == 'republican':
        n_reps += 1
        for i_vote in range(n_issues):
            if data[ind][1+i_vote] == 'y':
                count_matrix[0,i_vote] += 1
            if data[ind][1+i_vote] != '?':
                total_votes[0,i_vote] += 1
    elif data[ind][0] == 'democrat':
        n_dems += 1
        for i_vote in range(n_issues):
            if data[ind][1+i_vote] == 'y':
                count_matrix[1,i_vote] += 1
            if data[ind][1+i_vote] != '?':
                total_votes[1,i_vote] += 1

print("Republican Yes votes")
print(count_matrix[0,:])
print("Republican total votes")
print(total_votes[0,:])
print("Democrat Yes votes")
print(count_matrix[1,:])
print("Total votes")
print(total_votes[1,:])

Republican Yes votes
[ 31.  75.  22. 163. 157. 149.  39.  24.  19.  92.  21. 135. 136. 158.
  14.  96.]
Republican total votes
[165. 148. 164. 165. 165. 166. 162. 157. 165. 165. 159. 155. 158. 161.
 156. 146.]
Democrat Yes votes
[156. 120. 231.  14.  55. 123. 200. 218. 188. 124. 129.  36.  73.  90.
 160. 173.]
Total votes
[258. 239. 260. 259. 255. 258. 259. 263. 248. 263. 255. 249. 252. 257.
 251. 185.]


In [28]:
def empirical_probability(issue):
    p = ( count_matrix[0,issue] + count_matrix[1,issue] )/ ( total_votes[0,issue] + total_votes[1,issue] )
    return p
    
def empirical_probability_intersection(issue_1,issue_2):
    votes = np.zeros((2,2))
    count_yes_yes = 0
    count_yes_no = 0
    count_no_yes = 0
    count_no_no = 0
    total_aux = 0
    for ind in range(n_representatives):
        if data[ind][1+issue_1] == 'y' and data[ind][1+issue_2] == 'y':
            votes[0,0] += 1
        if data[ind][1+issue_1] == 'y' and data[ind][1+issue_2] == 'n':
            votes[0,1] += 1
        if data[ind][1+issue_1] == 'n' and data[ind][1+issue_2] == 'y':
            votes[1,0] += 1
        if data[ind][1+issue_1] == 'n' and data[ind][1+issue_2] == 'n':
            votes[1,1] += 1
        if data[ind][1+issue_1] != '?' and data[ind][1+issue_2] != '?':
            total_aux += 1
    p = votes/total_aux
    return p, votes

issue_1 = 2 # Adoption of the budget resolution
issue_2 = 14 # Duty free exports

p_1 = empirical_probability(issue_1)
p_2 = empirical_probability(issue_2)
p_12, votes_12 = empirical_probability_intersection(issue_1,issue_2)

print("P(Yes on Budget) = {:0.3f}".format( p_1 ))
print("P(Yes on Duty-free exports) = {:0.3f}".format( p_2 ))
print('Counts for Budget / Duty-free exports')
print(votes_12)
print("P(Yes on Budget and on Duty-free exports) = {:0.3f}".format( p_12[0,0] ))
print("P(Yes on Budget) P(Yes on Duty-free exports) = {:0.3f}".format( p_1 * p_2 ))
print("P(Yes on Budget | Yes on Duty-free exports) = {:0.3f}".format( p_12[0,0] / p_2 ))

P(Yes on Budget) = 0.597
P(Yes on Duty-free exports) = 0.428
Counts for Budget / Duty-free exports
[[151.  88.]
 [ 21. 140.]]
P(Yes on Budget and on Duty-free exports) = 0.378
P(Yes on Budget)P(Yes on Duty-free exports) = 0.255
P(Yes on Budget | Yes on Duty-free exports) = 0.883


In [30]:
issue_3 = 6 # Anti satellite test ban
issue_4 = 9 # Immigration

p_3 = empirical_probability(issue_3)
p_4 = empirical_probability(issue_4)
p_34, votes_34 = empirical_probability_intersection(issue_3,issue_4)

print("P(Yes on Satellite ban) = {:0.3f}".format( p_3 ))
print("P(Yes on Immigration) = {:0.3f}".format( p_4 ))
print('Counts for Satellite ban / Immigration')
print(votes_34)
print("P(Yes on Satellite ban and on Immigration) = {:0.3f}".format( p_34[0,0] ))
print("P(Yes on Satellite ban) P(Yes on Immigration) = {:0.3f}".format( p_3 * p_4 ))
print("P(Yes on Satellite ban | Yes on Immigration) = {:0.3f}".format( p_34[0,0] / p_4 ))

P(Yes on Satellite ban) = 0.568
P(Yes on Immigration) = 0.505
Counts for Satellite ban / Immigration
[[124. 113.]
 [ 89.  93.]]
P(Yes on Satellite ban and on Immigration) = 0.296
P(Yes on Satellite ban) P(Yes on Immigration) = 0.287
P(Yes on Satellite ban | Yes on Immigration) = 0.586


In [38]:
def empirical_conditional_probability(issue,affiliation):
    if affiliation == "republican":
        aff = 0
    else:
        aff = 1
    p = count_matrix[aff,issue] / total_votes[aff,issue] 
    return p
    
def empirical_conditional_probability_intersection(issue_1,issue_2,affiliation):
    votes = np.zeros((2,2))
    count_yes_yes = 0
    count_yes_no = 0
    count_no_yes = 0
    count_no_no = 0
    total_aux = 0
    for ind in range(n_representatives):
        if data[ind][0] == affiliation:
            if data[ind][1+issue_1] == 'y' and data[ind][1+issue_2] == 'y':
                votes[0,0] += 1
            if data[ind][1+issue_1] == 'y' and data[ind][1+issue_2] == 'n':
                votes[0,1] += 1
            if data[ind][1+issue_1] == 'n' and data[ind][1+issue_2] == 'y':
                votes[1,0] += 1
            if data[ind][1+issue_1] == 'n' and data[ind][1+issue_2] == 'n':
                votes[1,1] += 1
            if data[ind][1+issue_1] != '?' and data[ind][1+issue_2] != '?':
                total_aux += 1
    p = votes/total_aux
    return p, votes

p_3_rep = empirical_conditional_probability(issue_3,"republican")
p_4_rep = empirical_conditional_probability(issue_4,"republican")
p_34_rep, votes_34_rep = empirical_conditional_probability_intersection(issue_3,issue_4,"republican")

print("P(Yes on Satellite ban | Republican) = {:0.3f}".format( p_3_rep ))
print("P(Yes on Immigration | Republican) = {:0.3f}".format( p_4_rep ))
print('Counts for Satellite ban / Immigration among Republicans')
print(votes_34_rep)
print("P(Yes on Satellite ban and on Immigration | Republican) = {:0.3f}".format( p_34_rep[0,0] ))
print("P(Yes on Satellite ban | Republican) P(Yes on Immigration | Republican) = {:0.3f}".format( p_3_rep * p_4_rep ))
print("P(Yes on Satellite ban | Republican, Yes on Immigration) = {:0.3f}".format( p_34_rep[0,0] / p_4_rep ))

p_3_dem = empirical_conditional_probability(issue_3,"democrat")
p_4_dem = empirical_conditional_probability(issue_4,"democrat")
p_34_dem, votes_34_dem = empirical_conditional_probability_intersection(issue_3,issue_4,"democrat")

print("P(Yes on Satellite ban | Democrat) = {:0.3f}".format( p_3_dem ))
print("P(Yes on Immigration| Democrat) = {:0.3f}".format( p_4_dem ))
print('Counts for Satellite ban / Immigration among Democrats')
print(votes_34_dem)
print("P(Yes on Satellite ban and on Immigration | Democrat) = {:0.3f}".format( p_34_dem[0,0] ))
print("P(Yes on Satellite ban | Democrat) P(Yes on Immigration | Democrat) = {:0.3f}".format( p_3_dem * p_4_dem ))
print("P(Yes on Satellite ban | Democrat, Yes on Immigration) = {:0.3f}".format( p_34_dem[0,0] / p_4_dem ))

P(Yes on Satellite ban | Republican) = 0.241
P(Yes on Immigration) = 0.558
Counts for Satellite ban / Immigration among Republicans
[[25. 14.]
 [66. 57.]]
P(Yes on Satellite ban and on Immigration | Republican) = 0.154
P(Yes on Satellite ban) P(Yes on Immigration | Republican) = 0.134
P(Yes on Satellite ban | Republican, Yes on Immigration) = 0.277
P(Yes on Satellite ban | Republican) = 0.772
P(Yes on Immigration) = 0.471
Counts for Satellite ban / Immigration among Democrats
[[99. 99.]
 [23. 36.]]
P(Yes on Satellite ban and on Immigration | Democrat) = 0.385
P(Yes on Satellite ban) P(Yes on Immigration | Democrat) = 0.364
P(Yes on Satellite ban | Democrat, Yes on Immigration) = 0.817


In [36]:
i1 =  6# 2
i2 =  9# 14 # 6 and 9 are independent, 

p_i1 = ( count_matrix[0,i1] + count_matrix[1,i1] )/ ( total_votes[0,i1] + total_votes[1,i1] )
p_i2 = ( count_matrix[0,i2] + count_matrix[1,i2] )/ ( total_votes[0,i2] + total_votes[1,i2] )
print('P(Vote ' + str(i1) +' = Yes) = ' + str( p_i1 ))
print('P(Vote ' + str(i2) +' = Yes) = ' + str( p_i2 ))

print('Vote ' + str(i1) +' = Yes: ' + str( count_matrix[0,i1] + count_matrix[1,i1] ))
print('Vote ' + str(i2) +' = Yes: ' + str( count_matrix[0,i2] + count_matrix[1,i2] ))


count_yes_yes = 0
count_yes_no = 0
count_no_yes = 0
count_no_no = 0
total_aux = 0
for ind in range(n_representatives):
    if data[ind][1+i1] == 'y' and data[ind][1+i2] == 'y':
        count_yes_yes += 1
    if data[ind][1+i1] == 'y' and data[ind][1+i2] == 'n':
        count_yes_no += 1
    if data[ind][1+i1] == 'n' and data[ind][1+i2] == 'y':
        count_no_yes += 1
    if data[ind][1+i1] == 'n' and data[ind][1+i2] == 'n':
        count_no_no += 1
    if data[ind][1+i1] != '?' and data[ind][1+i2] != '?':
        total_aux += 1
    
print('Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = Yes: ' + str( count_yes_yes ))
print('Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = No: ' + str( count_yes_no ))
print('Vote ' + str(i1) +' = No and Vote ' + str(i2) +' = Yes: ' + str( count_no_yes ))
print('Vote ' + str(i1) +' = No and Vote ' + str(i2) +' = No: ' + str( count_no_no ))
print('Total votes: ' + str( total_aux ))

print('P(Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = Yes) = ' + str( count_yes_yes / total_aux ))
print('P(Vote ' + str(i1) +' = Yes) P(Vote ' + str(i2) +' = Yes) = ' + str( p_i1 * p_i2 ))

p_i1_r = count_matrix[0,i1] / total_votes[0,i1]  
p_i2_r = count_matrix[0,i2] / total_votes[0,i2]

print('P(Vote ' + str(i1) +' = Yes | Rep ) = ' + str( p_i1_r ))
print('P(Vote ' + str(i2) +' = Yes | Rep) = ' + str( p_i2_r ))

count_yes_yes = 0
count_yes_no = 0
count_no_yes = 0
count_no_no = 0
total_aux = 0
for ind in range(n_representatives):
    if data[ind][0] == 'republican': 
        if data[ind][1+i1] == 'y' and data[ind][1+i2] == 'y':
            count_yes_yes += 1
        if data[ind][1+i1] == 'y' and data[ind][1+i2] == 'n':
            count_yes_no += 1
        if data[ind][1+i1] == 'n' and data[ind][1+i2] == 'y':
            count_no_yes += 1
        if data[ind][1+i1] == 'n' and data[ind][1+i2] == 'n':
            count_no_no += 1
        if data[ind][1+i1] != '?' and data[ind][1+i2] != '?':
            total_aux += 1

print('Republicans')
print('Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = Yes: ' + str( count_yes_yes ))
print('Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = No: ' + str( count_yes_no ))
print('Vote ' + str(i1) +' = No and Vote ' + str(i2) +' = Yes: ' + str( count_no_yes ))
print('Vote ' + str(i1) +' = No and Vote ' + str(i2) +' = No: ' + str( count_no_no ))
print('Total votes: ' + str( total_aux ))

print('P(Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = Yes | Rep) = ' + str( count_yes_yes / total_aux ))
print('P(Vote ' + str(i1) +' = Yes) P(Vote ' + str(i2) +' = Yes | Rep) = ' + str( p_i1_r * p_i2_r ))

p_i1_d = count_matrix[1,i1] / total_votes[1,i1]  
p_i2_d = count_matrix[1,i2] / total_votes[1,i2]

print('P(Vote ' + str(i1) +' = Yes | Dem ) = ' + str( p_i1_d ))
print('P(Vote ' + str(i2) +' = Yes | Dem) = ' + str( p_i2_d ))

count_yes_yes = 0
count_yes_no = 0
count_no_yes = 0
count_no_no = 0
total_aux = 0
for ind in range(n_representatives):
    if data[ind][0] == 'democrat': 
        if data[ind][1+i1] == 'y' and data[ind][1+i2] == 'y':
            count_yes_yes += 1
        if data[ind][1+i1] == 'y' and data[ind][1+i2] == 'n':
            count_yes_no += 1
        if data[ind][1+i1] == 'n' and data[ind][1+i2] == 'y':
            count_no_yes += 1
        if data[ind][1+i1] == 'n' and data[ind][1+i2] == 'n':
            count_no_no += 1
        if data[ind][1+i1] != '?' and data[ind][1+i2] != '?':
            total_aux += 1

print('Democrats')
print('Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = Yes: ' + str( count_yes_yes ))
print('Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = No: ' + str( count_yes_no ))
print('Vote ' + str(i1) +' = No and Vote ' + str(i2) +' = Yes: ' + str( count_no_yes ))
print('Vote ' + str(i1) +' = No and Vote ' + str(i2) +' = No: ' + str( count_no_no ))
print('Total votes: ' + str( total_aux ))
    
print('P(Vote ' + str(i1) +' = Yes and Vote ' + str(i2) +' = Yes | Dem) = ' + str( count_yes_yes / total_aux ))
print('P(Vote ' + str(i1) +' = Yes) P(Vote ' + str(i2) +' = Yes | Dem) = ' + str( p_i1_d * p_i2_d ))

P(Vote 6 = Yes) = 0.5676959619952494
P(Vote 9 = Yes) = 0.5046728971962616
Vote 6 = Yes: 239.0
Vote 9 = Yes: 216.0
Vote 6 = Yes and Vote 9 = Yes: 124
Vote 6 = Yes and Vote 9 = No: 113
Vote 6 = No and Vote 9 = Yes: 89
Vote 6 = No and Vote 9 = No: 93
Total votes: 419
P(Vote 6 = Yes and Vote 9 = Yes) = 0.29594272076372313
P(Vote 6 = Yes) P(Vote 9 = Yes) = 0.2865007658667614
P(Vote 6 = Yes | Rep ) = 0.24074074074074073
P(Vote 9 = Yes | Rep) = 0.5575757575757576
Republicans
Vote 6 = Yes and Vote 9 = Yes: 25
Vote 6 = Yes and Vote 9 = No: 14
Vote 6 = No and Vote 9 = Yes: 66
Vote 6 = No and Vote 9 = No: 57
Total votes: 162
P(Vote 6 = Yes and Vote 9 = Yes | Rep) = 0.15432098765432098
P(Vote 6 = Yes) P(Vote 9 = Yes | Rep) = 0.13423120089786755
P(Vote 6 = Yes | Dem ) = 0.7722007722007722
P(Vote 9 = Yes | Dem) = 0.4714828897338403
Democrats
Vote 6 = Yes and Vote 9 = Yes: 99
Vote 6 = Yes and Vote 9 = No: 99
Vote 6 = No and Vote 9 = Yes: 23
Vote 6 = No and Vote 9 = No: 36
Total votes: 257
P(Vote 6 = 