In [None]:
"""
Plot heatmap for haplotype co-occurence and rate of inheritance.
This requires a TSV where each row is an individual.
There should be at least six columns, two being the haplotypes of the individual, two for the sire, and two for the dam.
"""

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

path = "/path/to/file.tsv"

def haplotype_cooccurence(path, gene: str, indiv_hap1: str, indiv_hap2: str, sire_hap1: str, sire_hap2: str, dam_hap1: str, dam_hap2: str, calc_rate=False):
    """Plot a heatmap showing haplotype cooccurence."""
    # Italicize gene name, but not any hyphens
    split = gene.split('-')
    new = []
    for part in split:
        new.append('$\it{part}$'.replace('part', part))
    stylized_gene = "-".join(new)
    haplotypes = pd.read_table(path, usecols=[indiv_hap1, indiv_hap2, sire_hap1, sire_hap2, dam_hap1, dam_hap2])
    if calc_rate == False:
        haplotypes = pd.read_table(path, usecols=[indiv_hap1, indiv_hap2]).dropna()
    else:
        haplotypes = pd.read_table(path, usecols=[indiv_hap1, indiv_hap2, sire_hap1, sire_hap2, dam_hap1, dam_hap2]).dropna()
    haplotypes = haplotypes.replace({'\(':'', '\)':''}, regex=True)  # Remove parentheses, which represent the inferred haplotypes
    combined = pd.concat([haplotypes[indiv_hap1], haplotypes[indiv_hap2]]).unique()
    def make_matrix(indiv_hap1, indiv_hap2, combined=combined):
        """Create matrix of counts."""
        new_index = pd.MultiIndex.from_product([combined, combined], names = [indiv_hap1, indiv_hap2])
        haplotype_matrix = haplotypes.groupby([indiv_hap1, indiv_hap2]).size().unstack().stack().reindex(new_index).unstack(fill_value=0).fillna(0)
        
        # Return matrix added to transpose of itself with diagonal divided by two
        matrix_ones = np.ones(haplotype_matrix.shape)
        np.fill_diagonal(matrix_ones, 2)
        return (haplotype_matrix + haplotype_matrix.transpose()) / matrix_ones
    
    haplotype_matrix = make_matrix(indiv_hap1, indiv_hap2)

    if calc_rate == False:
        haplotype_matrix = haplotype_matrix.replace({'0':np.nan, 0.0:np.nan})  # Get rid of pairs
        title_text = "Counts of Co-occurence of"
        fmt="g"
        cmap = sns.cubehelix_palette(as_cmap=True)
        vmax = None
        vmin = None
    else:
        # Noramlize by using possible combinations from parents.
        parent_matrix_all = make_matrix(sire_hap1, dam_hap1) + make_matrix(sire_hap1, dam_hap2) + make_matrix(sire_hap2, dam_hap1) + make_matrix(sire_hap2, dam_hap2)
        parent_matrix_all[parent_matrix_all < 20] = np.nan  # Sets parents with below a certain count to empty. This is to prevent making inferences from undersampling.

        # Divide indiviual matrix by total possible parental combinations
        haplotype_matrix = haplotype_matrix/parent_matrix_all
        haplotype_matrix = (2 * np.sqrt(haplotype_matrix) - 1) * 100
        title_text = "Rate of Inheritance of"
        fmt = ".0f"
        cmap = "twilight_shifted"
        vmax = 100
        vmin = -100
    fig, ax = plt.subplots(figsize=(3/6*len(combined),2/6*len(combined)))
    sns.heatmap(haplotype_matrix, annot=True, fmt=fmt, linewidth=0.02, cmap=cmap, vmax=vmax, vmin=vmin).set_title(f'{title_text} {stylized_gene} Haplotypes')
    ax.set(xlabel=f"{stylized_gene} Haplotypes", ylabel=f"{stylized_gene} Haplotypes")

In [None]:
## Examples
haplotype_cooccurence(path, "Mamu-B", "2022 Mamu-B  1", "2022 Mamu-B  2", "2022 Mamu-B  1 Sire", "2022 Mamu-B  2 Sire", "2022 Mamu-B  1 Dam", "2022 Mamu-B  2 Dam", calc_rate=True)
haplotype_cooccurence(path, "Mamu-DQA", "Mamu-DQA Haplotype 1", "Mamu-DQA Haplotype 2", "Mamu-DQA Haplotype 1 Sire", "Mamu-DQA Haplotype 2 Sire", "Mamu-DQA Haplotype 1 Dam", "Mamu-DQA Haplotype 2 Dam", calc_rate=False)