In [None]:
import numpy as np
import pandas as pd 
import altair as alt
import re 
import ntpath 

from commons import data_processing
from commons.DataProcessors import msfragger

alt.data_transformers.disable_max_rows()

In [None]:
files = data_processing.get_files(r'D:\MSFragger\20221109', exts=['psm.tsv'])
files = [f for f in files if re.search('F\d', f)]
msf = None
for file in files:
    m = msfragger.msf_processor([file])
    dirname = ntpath.dirname(file)
    fraction = dirname.split('\\')[-1]
    column = dirname.split('\\')[-2].split('_')[1]
    m.add_special_column('fraction', fraction)
    m.add_special_column('column', column)

    if msf is None:
        msf = m
    else:
        msf.join_processors(m)
df = msf.data
msf

In [None]:
no_mods = df[df.modified_peptide.isna()].index
df.loc[no_mods, "modified_peptide"] = df.loc[no_mods, "peptide"]
df

In [None]:
avg = df.groupby(['column', 'fraction', 'modified_peptide', 'peptide',]).mean()
avg.loc[:, 'retention_min'] = avg.retention/60

In [None]:
overlap = 1

alt.Chart(avg.reset_index(), height=40).mark_area(
    opacity=0.8,
    stroke='lightgray',
    strokeWidth=0.5
).transform_density(
    'retention_min',
    as_ = ['retention_min', 'density'],
    groupby=['fraction', 'column',]
).encode(
    x=alt.X('retention_min:Q', title='Retention Time (min)',
        axis=alt.Axis(grid=False)),
    y=alt.Y('density:Q', axis=None,
        scale=alt.Scale(range=[40, -5])),
    color='column:N'
).facet(
    row=alt.Row('fraction:N', title=None,
        header=alt.Header(labelAngle=0)),
    column=alt.Column('column:N', title=None)
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).properties(
    bounds='flush'
)

In [None]:
d = avg.reset_index()
# d = d.drop_duplicates(['modified_peptide', 'fraction'])
total = alt.Chart(d).mark_bar().encode(
    x=alt.X('column:N', title='', 
        axis=alt.Axis(labels=False, ticks=False)),
    y=alt.Y('distinct(modified_peptide)', title='Unique Peptides'),
    color=alt.Color('column:N'),
    column=alt.Column('fraction:N')
)

unique = alt.Chart(d.drop_duplicates(['modified_peptide', 'fraction'])).mark_bar().encode(
    x=alt.X('column:N', title='', 
        axis=alt.Axis(labels=False, ticks=False)),
    y=alt.Y('distinct(modified_peptide)', title='Unique Peptides'),
    color=alt.Color('column:N'),
    column=alt.Column('fraction:N')
)

total | unique

In [None]:
from modlamp.descriptors import PeptideDescriptor

def pour(seq):
    desc = PeptideDescriptor(seq, 'gravy')
    desc.calculate_global()
    return desc.descriptor[0][0]

df.loc[:, 'gravy'] = df.peptide.map(pour)
avg = df.groupby(['column', 'modified_peptide', 'peptide']).mean()
avg.loc[:, 'retention_min'] = avg.retention/60

In [None]:
d = avg.reset_index()
# d.drop_duplicates('modified_peptide', inplace=True)
base = alt.Chart(d).encode(
    x=alt.X('retention_min:Q',
        bin=alt.Bin(
            step=5
        )),
    y=alt.Y('mean(gravy):Q'),
    color='column:N'
)

base.mark_line(interpolate='basis') + base.mark_errorband(extent='ci', interpolate='basis')

In [None]:
d = avg.reset_index()
d.drop_duplicates('modified_peptide', inplace=True)
alt.Chart(d).mark_boxplot().encode(
    x='column:N',
    y='peptide_length:Q'
)

# import seaborn as sns 
# sns.violinplot(x='column', y='gravy', data=avg.reset_index())

In [None]:
import altair as alt
from vega_datasets import data

source = data.seattle_weather.url

step = 20
overlap = 1

alt.Chart(source, height=step).transform_timeunit(
    Month='month(date)'
).transform_joinaggregate(
    mean_temp='mean(temp_max)', groupby=['Month']
).transform_bin(
    ['bin_max', 'bin_min'], 'temp_max'
).transform_aggregate(
    value='count()', groupby=['Month', 'mean_temp', 'bin_min', 'bin_max']
).transform_impute(
    impute='value', groupby=['Month', 'mean_temp'], key='bin_min', value=0
).mark_area(
    interpolate='monotone',
    fillOpacity=0.8,
    stroke='lightgray',
    strokeWidth=0.5
).encode(
    alt.X('bin_min:Q', bin='binned', title='Maximum Daily Temperature (C)'),
    alt.Y(
        'value:Q',
        scale=alt.Scale(range=[step, -step * overlap]),
        axis=None
    ),
    alt.Fill(
        'mean_temp:Q',
        legend=None,
        scale=alt.Scale(domain=[30, 5], scheme='redyellowblue')
    )
).facet(
    row=alt.Row(
        'Month:T',
        title=None,
        header=alt.Header(labelAngle=0, labelAlign='right', format='%B')
    )
).properties(
    title='Seattle Weather',
    bounds='flush'
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).configure_title(
    anchor='end'
)