# INF-396 Introducción a la Cienca de Datos
## Tarea 1 Redes Bayesianes
### Ignas Pileckas, estudiante de intercambios

#### Pregunta 1 (50%): Programando una red bayesiana

In [1]:
# Las variables del modelo son: 
#    Si el paciente visitó Asia (A ∈ {0,1}),
#    Fumador (F ∈ {0,1}), Tuberculosis (T ∈ {0, 1}),
#    Cancer de pulmón (C ∈ {0, 1}), Bronquitis (B ∈ {0, 1}), 
#    Si el paciente tiene Tuberculosis o Cancer (E ∈ {0,1}), 
#    Resultados rayos X (X ∈ {0, 1}), 
#    Disnea (D ∈ {0, 1}).

In [2]:
# P(A) = 0.01
# P(F) = 0.5
# P(T|A) = 0.05, P(T|no A) = 0.01
# P(C|F) = 0.1, P(C| no F) = 0.01
# P(B|F) = 0.6, P(B| no F) = 0.3
# P(E|C,T)=1,P(E|C,noT)=1,P(E|noC,T)=1,P(E|noC,noT)=0
# P(X|E) = 0.98, P(X| no E) = 0.05
# P(D|E,B) = 0.9, P(D|E, no B) = 0.7, P(D|no E, B) = 0.8, P(D/no E, no B) = 0.1

In [21]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD

# P(A,F,T,C,B,E,X,D) = P(A)*P(F)*P(T/A)*P(C/F)*P(B/F)*P(E/C,T)*P(D/E,B)

model = BayesianModel([('A','T'),('F','C'), ('F','B'), ('T','E'),('C', 'E'),('E','X'),('E','D'),('B', 'D')])

cpd_a = TabularCPD(variable='A', variable_card=2, 
                   values=[[0.01, 0.99]] ,
                   state_names={'A': ['Visitado asia', 'No ha visitado asia']})

cpd_f = TabularCPD(variable='F', variable_card=2, 
                   values=[[0.5, 0.5]],
                   state_names={'F': ['Fumador', 'No fume']})

cpd_t = TabularCPD(variable='T', variable_card=2, 
                   values=[[0.05, 0.01],
                          [0.95, 0.99]],
                   evidence=['A'],
                   evidence_card=[2],
                   state_names={'T': ['Tuberculosis', 'No tuberculosis'],
                                'A': ['Visitado asia', 'No ha visitado asia']})

cpd_c = TabularCPD(variable='C', variable_card=2, 
                   values=[[0.1, 0.01],
                          [0.9, 0.99]],
                   evidence=['F'],
                   evidence_card=[2],
                   state_names={'C': ['Cancer de pulmón', 'No cancer de pulmón'],
                                'F': ['Fumador', 'No fume']})

cpd_b = TabularCPD(variable='B', variable_card=2, 
                   values=[[0.6, 0.3],
                          [0.4, 0.7]],
                   evidence=['F'],
                   evidence_card=[2],
                   state_names={'B': ['Bronquitis', 'No bronquitis'],
                                'F': ['Fumador', 'No fume']})

cpd_e = TabularCPD(variable='E', variable_card=4, 
                   values=[[1, 0, 0, 0],
                          [0, 1, 0, 0],
                          [0, 0, 1, 0],
                          [0, 0, 0, 1]],
                   evidence=['C', 'T'],
                   evidence_card=[2,2],
                   state_names={'E': ['Cancer y Tuberculosis', 'Cancer', 'Tuberculosis', 'Ninguno'],
                                'C': ['Cancer', 'No cancer'],
                                'T': ['Tuberculosis', 'No tuberculosis']})

cpd_x = TabularCPD(variable='X', variable_card=2, 
                   values=[[0.98, 0.05],
                          [0.02, 0.95]],
                   evidence=['E'],
                   evidence_card=[2],
                   state_names={'X': ['Resultados rayos X', 'No hay resultados rayos X'],
                                'E': ['Tuberculosis o Cancer', 'Ni tuberculosis, ni cancer']})

cpd_d = TabularCPD(variable='D', variable_card=2, 
                   values=[[0.9, 0.7, 0.8, 0.1],
                          [0.1, 0.3, 0.2, 0.9]],
                   evidence=['E', 'B'],
                   evidence_card=[2,2],
                   state_names={'D': ['Disnea', 'No disnea'],
                                'E': ['Tuberculosis o Cancer', 'Ni tuberculosis, ni cancer'],
                                'B': ['Bronquitis', 'No tiene bronquitis']})

model.add_cpds(cpd_a, cpd_f, cpd_t, cpd_c, cpd_b, cpd_e, cpd_x, cpd_d)
model.check_model()



+------------------------+------------+------------+
| F                      | F(Fumador) | F(No fume) |
+------------------------+------------+------------+
| C(Cancer de pulmón)    | 0.1        | 0.01       |
+------------------------+------------+------------+
| C(No cancer de pulmón) | 0.9        | 0.99       |
+------------------------+------------+------------+


### Consultas

In [25]:
# La Probilidad de Cancer de Pulmon es P(C/F) 
# Sea el paciente es Fumador, entonces:

print(model.get_cpds('C'))

# P(C/F) = 0.1



+------------------------+------------+------------+
| F                      | F(Fumador) | F(No fume) |
+------------------------+------------+------------+
| C(Cancer de pulmón)    | 0.1        | 0.01       |
+------------------------+------------+------------+
| C(No cancer de pulmón) | 0.9        | 0.99       |
+------------------------+------------+------------+


In [34]:
model.local_independencies('C')




(C _|_ B, A, T | F)