In [1]:
import pyft
import pandas as pd
import altair as alt
alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

In [2]:
df = pyft.read_footprint_table("../../tests/data/ctcf-footprints.bed.gz", long=True)
df.head(2)

Unnamed: 0,chrom,start,end,strand,n_spanning_fibers,n_spanning_msps,n_overlapping_nucs,module:0-8,module:8-16,module:16-23,module:23-29,module:29-35,fire_qual,fiber_name,n_modules,has_spanning_msp
0,chr11,5204946,5204981,+,181,92,85,False,False,False,False,False,247,m64076_211222_124721/148505307/ccs,5,True
0,chr11,5204946,5204981,+,181,92,85,False,False,False,False,False,-1,m64076_211222_124721/51053256/ccs,5,False


In [3]:
dfm = pyft.read_and_center_footprint_table("../../tests/data/ctcf-footprints.bed.gz")
dfm.head(2)
dfm.dtypes

chrom                     object
strand                    object
fire_qual                  int64
has_spanning_msp            bool
footprinted                 bool
centered_start             int64
centered_end               int64
region                    object
centering_position         int64
centered_position_type    object
query_name                object
dtype: object

In [4]:
center = pyft.read_center_table("../../tmp.bed.gz")
center.head(2)
center.dtypes

chrom                     object
centering_position         int64
strand                    object
subset_sequence           object
reference_start            int64
reference_end              int64
query_name                object
RG                        object
HP                        object
centered_query_start       int64
centered_query_end         int64
query_length               int64
centered_position_type    object
centered_start             int64
centered_end               int64
centered_qual              int64
dtype: object

In [5]:
both_dfs = pd.concat([center, dfm], axis=0).reset_index(drop=True)
both_dfs.head(2)

Unnamed: 0,chrom,centering_position,strand,subset_sequence,reference_start,reference_end,query_name,RG,HP,centered_query_start,centered_query_end,query_length,centered_position_type,centered_start,centered_end,centered_qual,fire_qual,has_spanning_msp,footprinted,region
0,chr11,5204946,+,N,5184260.0,5205600.0,m64076_211222_124721/148505307/ccs,7be8b5ca/2--2,UNK,-20686.0,654.0,21333.0,m6a,-399,-398,253.0,,,,
1,chr11,5204946,+,N,5184260.0,5205600.0,m64076_211222_124721/148505307/ccs,7be8b5ca/2--2,UNK,-20686.0,654.0,21333.0,m6a,-396,-395,251.0,,,,


In [6]:

def make_chart(dfm): 
    dfm = dfm.copy()[['chrom', 'centering_position', 'centered_start', 'centered_end', 'centered_position_type', 'query_name', 'strand']].dropna().reset_index(drop=True).infer_objects().query("centered_position_type != '5mC'")
    
    z = dfm.groupby(["query_name", "centering_position", "centered_position_type", "strand"]).size().reset_index(name="count")
    
    # combine the centered_position_type and count into a set of wide columns
    z = z.pivot_table(index=["query_name", "centering_position", "strand"], columns="centered_position_type", values="count").reset_index().fillna(0)

    # join z with both_dfs
    dfm = dfm.merge(z, on=["query_name", "centering_position", "strand"], how="left")
       
    dfm.sort_values(
        ["chrom", "centering_position", "strand", "footprinted", "msp", "m6a"], 
        inplace=True,
        ascending=[True, True, True, False, False, False]
    )
    
       
    dfm["region"] = dfm["chrom"] + ":" + dfm["centering_position"].astype(str) + " " + dfm.strand
    
    # set the colors
    domain = ["5mC", "m6a", "nuc", "msp", "footprinted", "not-footprinted"]
    range_ = ["brown", "purple", "lightgray", "pink", "green", "lightgray"]
    opacity = dict(zip(domain, [1.0, 1.0, 0.25, 0.25, 0.1, 0.1]))
    
    # add opacity column to the dataframe
    dfm = dfm.assign(opacity = dfm.centered_position_type.map(opacity))
    
    input_dropdown = alt.binding_select(
        # Add the empty selection which shows all when clicked
        options=dfm.region.unique(),
        name='Region: '
    )

    selection = alt.selection_point(
        fields=['region'],
        bind=input_dropdown,
        value=dfm.region[0], 
    )


    bind_range_w = alt.binding_range(min=200, max=1600, name='Chart width: ')
    param_width = alt.param('width', bind=bind_range_w)
    bind_range_h = alt.binding_range(min=200, max=1600, name='Chart height: ')
    param_height = alt.param('height', bind=bind_range_h)


    chart = alt.Chart(dfm).mark_rect().encode(
        x='centered_start:Q',
        x2='centered_end:Q',
        color=alt.Color('centered_position_type:O').scale(domain=domain, range=range_),
        y=alt.Y('query_name:O', sort=None),
        opacity=alt.Opacity('opacity:Q'),
    ).transform_filter(
        selection
    ).properties(
        width=800,
        height=800
    ).add_params(
        selection,
        param_width,
        param_height
    ).interactive()
    return chart
    
    
make_chart(both_dfs).save('/Users/mrvollger/Desktop/chart.html')