In [None]:
import allel
import altair as alt
import pandas as pd
import polars as pl

# For when chromosomes are in one file (currently for SVs)

file = "/master/abagwell/variant-analysis/results/rhesus/structural_variants/SVs/merged/U42_WGS_WES.genotyped.pass.weir.fst"
table = pl.read_csv(file, has_header=True, separator="\t", infer_schema_length=10000).filter(
    pl.col("WEIR_AND_COCKERHAM_FST") != "-nan"
).with_columns(
    pl.col("WEIR_AND_COCKERHAM_FST").cast(pl.Float32),
    pl.col("POS").cast(pl.Int32),  # To match datatype of `allel` VCF
    pl.concat_str([
        pl.col("CHROM"),
        pl.lit(":"),
        pl.col("POS"),
    ]).alias("locus")
)

vcf = "/master/abagwell/variant-analysis/results/rhesus/structural_variants/SVs/merged/U42_WGS_WES.genotyped.pass.vcf.gz"
variants = pl.from_pandas(
    pd.DataFrame(
        allel.read_vcf(vcf, ['variants/CHROM', 'variants/POS', 'variants/SVTYPE'])
    )
)

data = table.join(variants, how="inner", left_on=["CHROM", "POS"], right_on=["variants/CHROM", "variants/POS"])

In [None]:
import allel
import altair as alt
import pandas as pd
import polars as pl

# For when chromosomes are in multiple files (currently for SNPs)
tables = []
#for chr in ["18"]:
for chr in ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "X"]:
    file = f"/master/abagwell/variant-analysis/results/rhesus/relatedness/fst/SNPRC_WGS_WES.SNP.chr{chr}.weir.fst"
    table = pl.read_csv(file, has_header=True, separator="\t", infer_schema_length=10000).with_columns(
        pl.col("CHROM").cast(str),
        pl.col("WEIR_AND_COCKERHAM_FST").cast(pl.Float32),
        pl.col("POS").cast(pl.Int32),  # To match datatype of `allel` VCF
        pl.concat_str([
            pl.col("CHROM"),
            pl.lit(":"),
            pl.col("POS"),
        ]).alias("locus")
    )
    tables.append(table)
data = pl.concat(tables)

In [None]:
filtered_data = data.filter(
    pl.col("WEIR_AND_COCKERHAM_FST") >= 0.12,
)

In [None]:
alt.data_transformers.disable_max_rows()
alt.Chart(filtered_data.to_arrow().to_pandas()).mark_circle().encode(
    alt.X("locus", title="SNPs by Locus", axis=alt.Axis(labels=False, tickSize=0), sort=['1','2','3','4','5']),
    alt.Y("WEIR_AND_COCKERHAM_FST", title="Fst", scale=alt.Scale(domain=[0.12, 0.41])),
    color=alt.Color("CHROM", title="Chrom", sort=['1','2','3','4','5']),
    tooltip = [
        alt.Tooltip("locus", title="Locus"),
        #alt.Tooltip("WEIR_AND_COCKERHAM_FST", title="Fst"),
    ]
).properties(
    title = "Fst between RPL and non-RPL Rhesus Macaques",
    width = 300,
)#.save("/master/abagwell/figures/fst/fst_SNPS_RPL.html")

In [None]:
import allel
import altair as alt
import pandas as pd
import polars as pl

## For windowed Fst with chromosomes in different files

tables = []
for chr in ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "X"]:
    file = f"/master/abagwell/variant-analysis/results/rhesus/relatedness/fst/SNPRC_WGS_WES.SNP.chr{chr}.windowed.weir.fst"
    table = pl.read_csv(file, has_header=True, separator="\t", infer_schema_length=10000).with_columns(
        pl.col("CHROM").cast(str),
        pl.col("WEIGHTED_FST").cast(pl.Float32),
        #pl.col("POS").cast(pl.Int32),  # To match datatype of `allel` VCF
        pl.concat_str([
            pl.col("CHROM"),
            pl.lit(":"),
            pl.col("BIN_START"),
            pl.lit("-"),
            pl.col("BIN_END"),
        ]).alias("interval")
    )
    tables.append(table)
data = pl.concat(tables).filter(
    pl.col("WEIGHTED_FST") >= 0.03,  # Removes values for when there are too many to render
)

In [None]:
alt.data_transformers.disable_max_rows()
alt.Chart(data.to_arrow().to_pandas()).mark_circle().encode(
    alt.X("interval", title="100kb Intervals", axis=alt.Axis(labels=False, tickSize=0), sort=['1','2','3','4','5']),
    #alt.Y("WEIGHTED_FST", title="Fst", scale=alt.Scale(domain=[0.12, 0.41])),
    alt.Y("WEIGHTED_FST", title="Weighted Fst", scale=alt.Scale(domain=[0.03, 0.30])),  # Change domain depending on data
    color=alt.Color("CHROM", title="Chrom", sort=['1','2','3','4','5']),
    tooltip = [
        alt.Tooltip("interval", title="Interval"),
        alt.Tooltip("WEIGHTED_FST", title="Weighted Fst"),
        alt.Tooltip("N_VARIANTS", title="# of SNPs")
    ]
).properties(
    title = "Fst between RPL and non-RPL Rhesus Macaques",
    width = 300,
)#.save("/master/abagwell/figures/fst/fst_windowed_SNPs_RPL_fewer.html")