In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Set chromosome
chr = 10

# Define path to files below
home = '/users/username'
roh_file = f"{home}/variant-analysis/results/rhesus/relatedness/roh/all_samples.SNP.chr{chr}.roh_poisson.pickle"
froh_file = f"{home}/variant-analysis/results/rhesus/relatedness/roh/all_samples.SNP.chr{chr}.froh_poisson.pickle"
samples_list = f"{home}/variant-analysis/resources/rhesus/samples/all_samples_without_GBS.list"
contig_length = f"{home}/variant-analysis/results/rhesus/relatedness/roh/contig_lengths.tsv"
roh_df = pd.read_pickle(roh_file)
froh_df = pd.read_pickle(froh_file)
sns.set_theme()

# Create list of sample names
sample_names = []
with open(samples_list, "r") as f:
    for sample in f:
        sample_names.append(sample.strip())

# Create contig lists
contigs = pd.read_table(contig_length, names=["chr", "length"])
chrom_length = contigs.loc[contigs["chr"] == "10", "length"].item()

In [None]:
# Plot ROHs by groups along with a second plot containing counts
# Modify groups for samples accordingly
sample_groups = {
    "founders": ["WGS1", "WGS2"],
    "descendants": ["WGS3", "WGS4", "WGS5"],
}

num_of_bins = 60

all_samples = []
for samples in sample_groups.values():
    all_samples += samples

samples_with_roh = []
for sample in all_samples:
    subset = roh_df[roh_df["sample"] == sample]
    if not subset.empty:
        samples_with_roh.append(sample)

# Figure
height = len(samples_with_roh) * 0.22
fig, ax = plt.subplots(2, sharex=True, figsize=(8, height + 2), height_ratios=[height, 2])
fig.suptitle("Runs of homozygosity")
fig.tight_layout()
#plt.subplots_adjust(hspace=0)

# Subplot 1
def barh_group(samples, color):
    """Draw group of same colored samples onto horizontal bar plot."""
    for sample in samples:
        subset = roh_df[roh_df["sample"] == sample]
        if not subset.empty:
            ax[0].barh(subset["sample"], subset["length"], left=subset["start"], color=color)
        else:
            print("Sample " + sample + " has no ROHs.")

# Draw groups on horizontal bar plot by color.
barh_group(sample_groups["descendants"], "orange")
barh_group(sample_groups["founders"], "teal")

#plt.gca().invert_yaxis()  # Makes samples be inserted from top
ax[0].set_xlabel("Position along chromosome")
ax[0].set_xlim(1, chrom_length)

# Subplot 2
subset_df = roh_df[roh_df["sample"].isin(all_samples)]
bins = np.linspace(0, chrom_length, num=num_of_bins + 1)
bin_width = chrom_length/(num_of_bins)
ax[1].hist(subset_df["start"], bins=bins)
ax[1].set_xlabel("Position along chromosome")
ax[1].set_ylabel("Counts")
ax[1].set_xlim(1, chrom_length)

#plt.gca().invert_yaxis()  # Makes samples be inserted from top

# # Create counts for bins
# for bin_start, bin_end in zip(bins[:-1], bins[1:]):
#     for sample in descendants:
#         sample_roh = roh_df[roh_df["sample"] == sample]
#         for row in sample_roh["roh"]:
#             pass

In [None]:
froh_df.froh.mean()