# Imports

In [1]:
import numpy as np
import pandas as pd
import altair as alt
from vega_datasets import data
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Functions

In [2]:
def create_structure_plot(melt_df, region):
    """
    """
    pops = (melt_df[melt_df["Region"]=="region"].groupby("clst")
                .agg("median")
                .sort_values(["Latitude", "Longitude"])
                .index.tolist())
    df = melt_df[melt_df["Region"]==region]
    Kmax = df["fitk"].max()
    select_box = alt.binding_select(options=list(range(2, Kmax)))
    selection = alt.selection_single(name="k", 
                                     fields=['fitk'], 
                                     bind=select_box,
                                     init={'fitk': 2})

    p = alt.Chart(df).mark_bar().encode(
            x=alt.X("ID", axis=None),
            order=alt.Order("K", sort="ascending"),
            y=alt.Y("loading"),
            color="K:N",
            column=alt.Column("clst:N", title=None, spacing=5, sort=pops, 
                              header=alt.Header(labelAngle=90, 
                                                labelPadding=10,
                                                labelAlign="left",
                                                labelOrient="bottom"))
    ).add_selection(
        selection
    ).transform_filter(
        selection
    ).properties(
        width=15,
        height=60
    ).resolve_scale(
        x="independent",
    )
    
    return(p)

# Data

Here are the paths to relevant files

In [3]:
K = 9
admixture_path = "../output/admixture/hoa_global/HumanOriginsPublic2068_auto_maf05_geno005_mind02.K{}r1.Q".format(K)
fam_path = "../data/datasets/hoa_global/HumanOriginsPublic2068_auto_maf05_geno005_mind02.fam"
meta_path = "../data/meta/HumanOriginsPublic2068.meta"

Prepare the data

In [4]:
melt_df = pd.DataFrame({})
Kmax = 10
for k in range(2, Kmax+1):
    admixture_path = "../output/admixture/hoa_global/HumanOriginsPublic2068_auto_maf05_geno005_mind02.K{}r1.Q".format(k)

    # read
    l_df = pd.DataFrame(np.loadtxt(admixture_path))
    l_df.columns = list(range(1, k+1))
    iids = pd.read_table(fam_path, header=None, sep=" ").iloc[:,1].tolist()
    l_df["ID"] = iids

    # join
    meta_df = pd.read_table(meta_path)
    df = l_df.merge(meta_df)
    df = df[~pd.isna(df["Region"])]
    df = df[~pd.isna(df["Simple.Population.ID"])]
    df["clst"] = df["Simple.Population.ID"]
    df["fitk"] = k
    
    # melt
    cols = df.columns.tolist()
    melt_df_k = df.melt(id_vars=cols[k:], 
                        value_vars=cols[:k], 
                        var_name="K", 
                        value_name="loading")
    melt_df = pd.concat([melt_df, melt_df_k])

# Vizualization

In [6]:
p = alt.vconcat(create_structure_plot(melt_df, "EastAsia"))

In [7]:
p