# Ejercicio 3

## Read Data

In [1]:
import pandas as pd
data = pd.read_csv('datasets/binary.csv')
data

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.00,1
3,1,640,3.19,4
4,0,520,2.93,4
...,...,...,...,...
395,0,620,4.00,2
396,0,560,3.04,3
397,0,460,2.63,2
398,0,700,3.65,2


## Map Data to Categories

In [2]:
print(len(data[data['gpa'] >= 3]))
print(len(data[data['gre'] >= 500]))

data['gpa'] = data.apply(lambda x: 1 if x['gpa'] >= 3 else 0, axis=1)
data['gre'] = data.apply(lambda x: 1 if x['gre'] >= 500 else 0, axis=1)

# Checking map was successful
print(len(data[data['gpa'] == 1]))
print(len(data[data['gre'] == 1]))
data

333
322
333
322


Unnamed: 0,admit,gre,gpa,rank
0,0,0,1,3
1,1,1,1,3
2,1,1,1,1
3,1,1,1,4
4,0,1,0,4
...,...,...,...,...
395,0,1,1,2
396,0,1,1,3
397,0,0,0,2
398,0,1,1,2


## Define probability functions

In [3]:
def p_R(r):
    """P(R=r)"""
    r = data[data['rank']==r]
    # return len(r)/len(data['rank'])
    # with Laplace
    return (len(r)+1)/(len(data['rank'])+4)

def p_GRE_given_R(gre, r):
    """P(GRE=gre | R=r)"""
    gre_and_r = data[(data['gre'] == gre) & (data['rank'] == r)]
    r         = data[data['rank'] == r]
    # return len(gre_and_r)/len(r)
    # with Laplace
    return (len(gre_and_r)+1)/(len(r)+2)

def p_GPA_given_R(gpa, r):
    """P(GPA=gpa | R=r)"""
    gpa_and_r = data[(data['gpa'] == gpa) & (data['rank'] == r)]
    r         = data[data['rank'] == r]
    # return len(gpa_and_r)/len(r)
    # with Laplace
    return (len(gpa_and_r)+1)/(len(r)+2)

def p_A_given_GPA_and_GRE_and_R(a, gpa, gre, r):
    """P(A=a | GPA=gpa, GRE=gre, R=r)"""
    a_and_gpa_and_gre_and_r = data[(data['admit'] == a) & (data['gpa'] == gpa) & (data['gre'] == gre) & (data['rank'] == r)]
    gpa_and_gre_and_r       = data[(data['gpa'] == gpa) & (data['gre'] == gre) & (data['rank'] == r)]
    # return len(a_and_gpa_and_gre_and_r) / len(gpa_and_gre_and_r)
    # with Laplace
    return (len(a_and_gpa_and_gre_and_r)+1) / (len(gpa_and_gre_and_r)+2)

## Calculate Probabilities

### a) $P(A=0|R=1)$

$$
P(A=0|R=1) = \frac{P(A=0, R=1)}{P(R=1)}
\\
= \frac{\sum\limits_{gre \in \{0,1\}}\sum\limits_{gpa \in \{0,1\}}P(A=0, GPA=gpa, GRE=gre, R=1)}{P(R=1)}
$$


$$
\begin{align}
P(A=0, GPA=0, GRE=0, R=1) = \\
\\
P(A=0| GPA=0, GRE=0, R=1) \\
* \\
P(GRE=0 | R=1) \\
* \\
P(GPA=0 | R=1) \\
* \\
P(R=1) \\
\end{align}
$$

### b) $P(A=1|R=2, GRE=0, GPA=1)$

$$
P(A=1|R=2, GRE=0, GPA=1) = \frac{P(A=1, R=2, GRE=0, GPA=1)}{P(R=2, GRE=0, GPA=1)}
\\
\\
= \frac{P(A=1, R=2, GRE=0, GPa=1)}{\sum\limits_{A\in\{0,1\}}P(A, R=2, GRE=0, GPA=1)}
$$

In [4]:
def joint_probability(a, gpa, gre, r):
    return p_A_given_GPA_and_GRE_and_R(a, gpa, gre, r) * \
           p_GRE_given_R(gre, r) * \
           p_GPA_given_R(gpa, r) * \
           p_R(r)

def ex_a():
    sum = 0
    for gre in [0,1]:
        for gpa in [0,1]:
            sum += joint_probability(0, gpa, gre, 1)
    r = p_R(1)
    print(f"P(A=0|R=1) = {sum/r}")
def ex_b():
    sum = joint_probability(0, 1, 0, 2) + joint_probability(1, 1, 0, 2)
    a_and_r_and_gre_and_gpa = joint_probability(1, 1, 0, 2)
    print(f"P(A=1|R=2, GRE=0, GPA=1) = {a_and_r_and_gre_and_gpa/sum}")
    print(f"SUM {sum}")
ex_a()
ex_b()

P(A=0|R=1) = 0.4357351103706788
P(A=1|R=2, GRE=0, GPA=1) = 0.2173913043478261
SUM0.05872836418589955
