In [7]:
import csv
import itertools
import sys

PROBS = {

    # Unconditional probabilities for having gene
    "gene": {
        2: 0.01,
        1: 0.03,
        0: 0.96
    },

    "trait": {

        # Probability of trait given two copies of gene
        2: {
            True: 0.65,
            False: 0.35
        },

        # Probability of trait given one copy of gene
        1: {
            True: 0.56,
            False: 0.44
        },

        # Probability of trait given no gene
        0: {
            True: 0.01,
            False: 0.99
        }
    },

    # Mutation probability
    "mutation": 0.01
}



In [8]:


def load_data(filename):
    """
    Load gene and trait data from a file into a dictionary.
    File assumed to be a CSV containing fields name, mother, father, trait.
    mother, father must both be blank, or both be valid names in the CSV.
    trait should be 0 or 1 if trait is known, blank otherwise.
    """
    data = dict()
    with open(filename) as f:
        reader = csv.DictReader(f)
        for row in reader:
            name = row["name"]
            data[name] = {
                "name": name,
                "mother": row["mother"] or None,
                "father": row["father"] or None,
                "trait": (True if row["trait"] == "1" else
                          False if row["trait"] == "0" else None)
            }
    return data


def powerset(s):
    """
    Return a list of all possible subsets of set s.
    """
    s = list(s)
    return [
        set(s) for s in itertools.chain.from_iterable(
            itertools.combinations(s, r) for r in range(len(s) + 1)
        )
    ]


In [28]:
def joint_probability(people, one_gene, two_genes, have_trait):
    """
    Compute and return a joint probability.

    The probability returned should be the probability that
        * everyone in set `one_gene` has one copy of the gene, and
        * everyone in set `two_genes` has two copies of the gene, and
        * everyone not in `one_gene` or `two_gene` does not have the gene, and
        * everyone in set `have_trait` has the trait, and
        * everyone not in set` have_trait` does not have the trait.
    """
    #nogenes = people - one_gene -two_genes
    #notraits = people - have_traits
    jp = 1
    for person in people:
        print(person in one_gene)
        if person in one_gene:
            if people[person]['mother'] is None: #if no parents
                if person in have_trait:
                    p = PROBS["gene"][1]*PROBS["trait"][1][True]
                    print(PROBS["gene"][1],PROBS["trait"][1][True],"no paretns probs")
                else:
                    p = PROBS["gene"][1]*PROBS["trait"][1][False]

            else: #if parents
                if people[person]['mother'] in one_gene:
                    pma= 0.5
                elif people[person]['mother'] in two_genes:
                    pma = 1-PROBS["mutation"]
                else:
                    pma =PROBS["mutation"]
                
                if people[person]['father'] in one_gene:
                    pfa = 0.5
                elif people[person]['father'] in two_genes:
                    pfa = 1-PROBS["mutation"]
                else:
                    pfa = PROBS["mutation"]
                
                p = pfa*(1-pma) + (1-pfa)*pma
                print(p,"parents probs")

                if person in have_trait:
                    p = p*PROBS["trait"][1][True]
                    print(PROBS["gene"][1],PROBS["trait"][1][True],"probs w parents")
                else:
                    p = p*PROBS["trait"][1][False]
                
            jp = jp*p
            print(jp,"jp one gene")
        elif person in two_genes:
            if people[person]['mother'] is None: #if no parents
                if person in have_trait:
                    p = PROBS["gene"][2]*PROBS["trait"][2][True]
                    print(PROBS["gene"][2],PROBS["trait"][2][True],"no paretns probs,2genes")
                else:
                    p = PROBS["gene"][2]*PROBS["trait"][2][False]

            else: #if parents
                if people[person]['mother'] in one_gene:
                    pma= 0.5
                elif people[person]['mother'] in two_genes:
                    pma = 1-PROBS["mutation"]
                else:
                    pma =PROBS["mutation"]
                
                if people[person]['father'] in one_gene:
                    pfa = 0.5
                elif people[person]['father'] in two_genes:
                    pfa = 1-PROBS["mutation"]
                else:
                    pfa = PROBS["mutation"]
                
                p = pfa*pma
                print(p,"parents probs,2")

                if person in have_trait:
                    p = p*PROBS["trait"][2][True]
                    print(PROBS["gene"][2],PROBS["trait"][2][True],"probs w parents,2")
                else:
                    p = p*PROBS["trait"][2][False]
                
            jp = jp*p
            print(jp,"jp two gene")
                
        else:
            if people[person]['mother'] is None: #if no parents
                if person in have_trait:
                    p = PROBS["gene"][0]*PROBS["trait"][0][True]
                    print(PROBS["gene"][0],PROBS["trait"][0][True],"no paretns probs,0genes")
                else:
                    p = PROBS["gene"][0]*PROBS["trait"][0][False]

            else: #if parents
                if people[person]['mother'] in one_gene:
                    pma= 0.5
                elif people[person]['mother'] in two_genes:
                    pma = 1-PROBS["mutation"]
                else:
                    pma =PROBS["mutation"]
                
                if people[person]['father'] in one_gene:
                    pfa = 0.5
                elif people[person]['father'] in two_genes:
                    pfa = 1-PROBS["mutation"]
                else:
                    pfa = PROBS["mutation"]
                
                p = (1-pfa)*(1-pma)
                print(p,"parents probs,0")

                if person in have_trait:
                    p = p*PROBS["trait"][0][True]
                    print(PROBS["gene"][0],PROBS["trait"][0][True],"probs w parents,0")
                else:
                    p = p*PROBS["trait"][0][False]
                
            jp = jp*p
            print(jp,"jp 0 gene")

    print(jp)
    return jp
    raise NotImplementedError

In [6]:
dictabc = {'Harry': {'name': 'Harry', 'mother': 'Lily', 'father': 'James', 'trait': None}, 'James': {'name': 'James', 'mother': None, 'father': None, 'trait': True}, 'Lily': {'name': 'Lily', 'mother': None, 'father': None, 'trait': False}}
for abc in dictabc:
    print(abc)
    print(dictabc[abc]['mother'])

Harry
Lily
James
None
Lily
None


In [30]:
joint_probability(dictabc, {"Harry"}, {"James"}, {"James"})

True
0.9802 parents probs
0.431288 jp one gene
False
0.01 0.65 no paretns probs,2genes
0.002803372 jp two gene
False
0.0026643247488 jp 0 gene
0.0026643247488


0.0026643247488

In [None]:

def update(probabilities, one_gene, two_genes, have_trait, p):
    """
    Add to `probabilities` a new joint probability `p`.
    Each person should have their "gene" and "trait" distributions updated.
    Which value for each distribution is updated depends on whether
    the person is in `have_gene` and `have_trait`, respectively.
    """
    #raise NotImplementedError


def normalize(probabilities):
    """
    Update `probabilities` such that each probability distribution
    is normalized (i.e., sums to 1, with relative proportions the same).
    """
    #raise NotImplementedError
    