In [1]:
import glob
import os
import sys
import numba

import numpy as np
import pandas as pd
import scipy.signal

import bebi103
import bokeh
from bokeh.palettes import Dark2_5 as palette
import itertools
from bokeh.models import Legend, LegendItem
bokeh.io.output_notebook()

Features requiring DataShader will not work and you will get exceptions.
  Features requiring DataShader will not work and you will get exceptions.""")


**From these data, compute the heritability of beak depth in the two species, with confidence intervals. How do they differ, and what consequences might this have for introgressive hybridization?**

We first look at the species scandens.

We load in the data and look at it. We are given some useful information about the columns:

- mid_parent is the average beak depth of a male and female pair in millimeters.

- mid_offspring is the average beak depth of their offspring in millimeters.

In [8]:
# Load the data into DataFrame df
df_scand = pd.read_csv('../data/scandens_beak_depth_heredity.csv', comment = "#")

df_scand.head()

Unnamed: 0,mid_parent,mid_offspring
0,8.3318,8.419
1,8.4035,9.2468
2,8.5317,8.1532
3,8.7202,8.0089
4,8.7089,8.2215


In [9]:
cov_scand = np.cov(df_scand["mid_parent"], df_scand["mid_offspring"])
print("The covariance between the average offspring and average parents is {}".format(cov_scand[0][1]))

The covariance between the average offspring and average parents is 0.05696765903577816


In [10]:
var_scand = np.var(df_scand["mid_parent"])
print("The variance among all average parents is {}".format(var_scand))

The variance among all average parents is 0.10305548484082841


In [11]:
heritability_scand = cov_scand[0][1] / var_scand
print("Finally, we conclude that the heritability of scandens is {}".format(heritability_scand))

Finally, we conclude that the heritability of scandens is 0.5527862890923858


Now, we want to compute the confidence interval of the data. We write functions to bootstrap samples. Then, we calculate the heritability of each sample.

In [12]:
def draw_bs_sample(data):
    """
    Draw a bootstrap sample from a 1D data set.
    """
    return np.random.choice(data, size=len(data))

def draw_bs_reps_her(parent_array, off_array, size=1):
    """
    Draw boostrap replicates of the mean from 1D data set.
    """
    out = np.empty(size)
    for i in range(size):
        index = draw_bs_sample(np.arange(len(parent_array)))
        parent = parent_array[index]
        off = off_array[index]
        cov = np.cov(parent, off)[0][1]
        var = np.var(parent)
        her = cov / var
        out[i] = her
    return out

In [13]:
bs_her_scand = draw_bs_reps_her(df_scand["mid_parent"].values, df_scand["mid_offspring"].values, size=20000)

Now, we can actually compute the 95% confidence intervals.

In [14]:
# 95% confidence intervals
scand_her_conf_int = np.percentile(bs_her_scand, [2.5, 97.5])
scand_her_conf_int

array([0.35359931, 0.75532636])

We now consider the species fortis.

We load in the data and look at it. We are given some useful information about the columns:
- Mid-offspr represents the average beak depth in millimeters of the offspring of two parents.
- Male BD and Female BD represent the parental beak depths in millimeters.

In [15]:
df_fort = pd.read_csv('../data/fortis_beak_depth_heredity.csv', comment = '#')
df_fort.head()

Unnamed: 0,Mid-offspr,Male BD,Female BD
0,10.7,10.9,9.3
1,9.78,10.7,8.4
2,9.48,10.7,8.1
3,9.6,10.7,9.8
4,10.27,9.85,10.4


The first step to computing heritability is to computethe average value of that trait among the offspring of those parents, so we store that value in Mid-parent.

In [16]:
# compute average value of trait for two parents
df_fort["Mid-parent"] = (df_fort["Male BD"] + df_fort["Female BD"]) / 2

In [17]:
df_fort.head()

Unnamed: 0,Mid-offspr,Male BD,Female BD,Mid-parent
0,10.7,10.9,9.3,10.1
1,9.78,10.7,8.4,9.55
2,9.48,10.7,8.1,9.4
3,9.6,10.7,9.8,10.25
4,10.27,9.85,10.4,10.125


Now we will compute the covariance between the average offspring and average parents and the variance among all average parents.

In [18]:
cov_fort = np.cov(df_fort["Mid-offspr"], df_fort["Mid-parent"])
print("The covariance between the average offspring and average parents is {}".format(cov_fort[0][1]))

The covariance between the average offspring and average parents is 0.345044283187193


In [19]:
var_fort = np.var(df_fort["Mid-parent"])
print("The variance among all average parents is {}".format(var_fort))

The variance among all average parents is 0.47614656678528955


In [20]:
heritability_fort = cov_fort[0][1] / var_fort
print("Finally, we conclude that the heritability of fortis is {}".format(heritability_fort))

Finally, we conclude that the heritability of fortis is 0.7246598153941641


We repeat the same process as we went through for Scand to find the confidence interval of Fortis. We first bootstrap using our predefined functions.

In [21]:
bs_her_fort = draw_bs_reps_her(df_fort["Mid-parent"].values, df_fort["Mid-offspr"].values, size=20000)

And then we can calculate the confidence interval.

In [22]:
# 95% confidence intervals
fort_her_conf_int = np.percentile(bs_her_fort, [2.5, 97.5])
fort_her_conf_int

array([0.64838329, 0.80023367])

We now look at how they differ, and what consequences might this have for introgressive hybridization. 

Scandens 95% confidence interval: [0.3524225 , 0.75885995]

Fortis 95% confidence interval: [0.64786953, 0.80049313]

We observe that the Fortis heritability is significantly higher and has a much narrower range of potential values. This means that as introgressive hybridization occurs, the beak geometries of G. scandens will start to resemble Fortis beak geometries much more closely.