In [12]:
import csv
import itertools
import sys

PROBS = {

    # Unconditional probabilities for having gene
    "gene": {
        2: 0.01,
        1: 0.03,
        0: 0.96
    },

    "trait": {

        # Probability of trait given two copies of gene
        2: {
            True: 0.65,
            False: 0.35
        },

        # Probability of trait given one copy of gene
        1: {
            True: 0.56,
            False: 0.44
        },

        # Probability of trait given no gene
        0: {
            True: 0.01,
            False: 0.99
        }
    },

    # Mutation probability
    "mutation": 0.01
}


def load_data(filename):
    """
    Load gene and trait data from a file into a dictionary.
    File assumed to be a CSV containing fields name, mother, father, trait.
    mother, father must both be blank, or both be valid names in the CSV.
    trait should be 0 or 1 if trait is known, blank otherwise.
    """
    data = dict()
    with open(filename) as f:
        reader = csv.DictReader(f)
        for row in reader:
            name = row["name"]
            data[name] = {
                "name": name,
                "mother": row["mother"] or None,
                "father": row["father"] or None,
                "trait": (True if row["trait"] == "1" else
                          False if row["trait"] == "0" else None)
            }
    return data


def powerset(s):
    """
    Return a list of all possible subsets of set s.
    """
    s = list(s)
    return [
        set(s) for s in itertools.chain.from_iterable(
            itertools.combinations(s, r) for r in range(len(s) + 1)
        )
    ]

In [13]:
people = load_data("data/family0.csv")
names = set(people)
powernames = powerset(names)

In [27]:
tester = [people,{"Harry"},{"James"},{"James"}]

In [33]:
def joint_probability(people, one_gene, two_genes, have_trait):
    """
    Compute and return a joint probability.

    The probability returned should be the probability that
        * everyone in set `one_gene` has one copy of the gene, and
        * everyone in set `two_genes` has two copies of the gene, and
        * everyone not in `one_gene` or `two_gene` does not have the gene, and
        * everyone in set `have_trait` has the trait, and
        * everyone not in set` have_trait` does not have the trait.
    """
    #track genes and traits
    genes = dict()
    traits = dict()
    for person in people:
        if person in one_gene:
            genes[person] = 1
        elif person in two_genes:
            genes[person] = 2
        else:
            genes[person] = 0
        
        if person in have_trait:
            traits[person] = True
        else:
            traits[person] = False

    #probabilities
    probability = 1
    for person in people:
        #no parents
        if people[person]['mother'] == people[person]['father']:
            probability *= PROBS["gene"][genes[person]]*PROBS["trait"][genes[person]][traits[person]]
        #parents
        else:
            probability *= heritage(people,person, genes, genes[person]) * PROBS["trait"][genes[person]][traits[person]]
    return probability

#gene probability based on parents
def heritage(people, person, genes, goal):
    father = people[person]['father']
    mother = people[person]['mother']
    pass_prob = {father : float(1), mother: float(1) }
    for parent in pass_prob:
        if genes[parent] == 1:
            pass_prob[parent] = 0.5
        elif genes[parent] == 2:
            pass_prob[parent] = 1 - PROBS['mutation']
        else:
            pass_prob[parent] = PROBS['mutation']
    if goal == 1:
        return pass_prob[father]*(1 - pass_prob[mother]) + pass_prob[mother]*(1 - pass_prob[father])
    elif goal == 2:
        return pass_prob[father]*pass_prob[mother]
    else:
        return (1 - pass_prob[father])*(1 - pass_prob[mother])



In [137]:
probabilities = {
        person: {
            "gene": {
                2: 0,
                1: 0,
                0: 0
            },
            "trait": {
                True: 0,
                False: 0
            }
        }
        for person in people
    }

In [53]:
def update(probabilities, one_gene, two_genes, have_trait, p):
    """
    Add to `probabilities` a new joint probability `p`.
    Each person should have their "gene" and "trait" distributions updated.
    Which value for each distribution is updated depends on whether
    the person is in `have_gene` and `have_trait`, respectively.
    """
    #track genes and traits
    genes = dict()
    traits = dict()
    for person in probabilities:
        if person in one_gene:
            genes[person] = 1
        elif person in two_genes:
            genes[person] = 2
        else:
            genes[person] = 0
        
        if person in have_trait:
            traits[person] = True
        else:
            traits[person] = False
        
        #add probability p
        probabilities[person]['gene'][genes[person]] += p
        probabilities[person]['trait'][traits[person]] += p
    return

In [138]:
def normalize(probabilities):
    """
    Update `probabilities` such that each probability distribution
    is normalized (i.e., sums to 1, with relative proportions the same).
    """
    for person in probabilities:
        gene = probabilities[person]['gene']
        trait = probabilities[person]['trait']
        sum_gene = sum(gene.values())
        sum_trait = sum(trait.values())
        gene = {k: v/sum_gene for k, v in gene.items()} if sum_gene != 0 else {k:1/3 for k in gene}
        probabilities[person]['gene'] = gene
        trait = {k: v/sum_trait for k, v in trait.items()} if sum_trait != 0 else {k:1/3 for k in trait}
        probabilities[person]['trait'] = trait

In [107]:
tester2 = [probabilities, {"Harry"}, {"James"}, {"James"}, 0.0026643247488]

In [108]:
update(*tester2)

In [140]:
normalize(probabilities)

In [141]:
probabilities

{'Harry': {'gene': {2: 0.3333333333333333,
   1: 0.3333333333333333,
   0: 0.3333333333333333},
  'trait': {True: 0.3333333333333333, False: 0.3333333333333333}},
 'James': {'gene': {2: 0.3333333333333333,
   1: 0.3333333333333333,
   0: 0.3333333333333333},
  'trait': {True: 0.3333333333333333, False: 0.3333333333333333}},
 'Lily': {'gene': {2: 0.3333333333333333,
   1: 0.3333333333333333,
   0: 0.3333333333333333},
  'trait': {True: 0.3333333333333333, False: 0.3333333333333333}}}

In [93]:
probabilities['Harry']['gene']

1

In [69]:
sum(new_test.values())

1.0

In [72]:
probabilities.values()

dict_values([{'gene': {2: 0, 1: 0, 0: 0}, 'trait': {True: 0, False: 0}}, {'gene': {2: 0, 1: 0, 0: 0}, 'trait': {True: 0, False: 0}}, {'gene': {2: 0, 1: 0, 0: 0}, 'trait': {True: 0, False: 0}}])

In [81]:
for person in probabilities:
    for gene in probabilities[person]['gene']:
        print(gene)

2
1
0
2
1
0
2
1
0
