In [12]:
from commons import data_processing
from commons import my_mzml
import pandas as pd 
import altair as alt 
import re 
import ntpath
from matplotlib import pyplot as plt
from scipy import ndimage

The following cells require access to mzXML versions of the .raw data provided in the Pride repository. Please use ProteoWizard to convert the data before proceeding.

In [17]:
xmls = data_processing.get_files(r'F:\Research\MS Data\QE\20220524\mzxml')
len(xmls)

21

In [23]:
mass1 = 849.70 # SGP with 1 sialic acid
mass2 = 956.06 # SGP with 2 sialic acid

# fig, axs = plt.subplots(21, 2, figsize=(20, 40))

masses = [mass1, mass2]

data = pd.DataFrame()

for i, ax in enumerate(axs.flat):
    mass = masses[i%2]
    file = xmls[i//2]
    base = ntpath.basename(file)
    base, _ = ntpath.splitext(base)
    print(base)

    

    spl = base.split('_')
    temp, run = spl[-2:]
    conc = spl[-4]
    if conc == '005':
        conc = 0.05
    else:
        conc = 0.2

    m = my_mzml.mzXML(file)
    x1, y1 = m.ms1_extract(mass, tolerance=5)
    y1 = ndimage.gaussian_filter(y1, 2)
    
    df = pd.DataFrame({
        'concentration':conc,
        'temperature':temp,
        'run':run,
        'mass':mass,
        'time':x1,
        'abundance':y1
    })

    data = pd.concat([data, df])
    data.reset_index(drop=True, inplace=True)


20220524_GD_SGP_005_ugul_30C_run1
20220524_GD_SGP_005_ugul_30C_run1
20220524_GD_SGP_005_ugul_30C_run2
20220524_GD_SGP_005_ugul_30C_run2
20220524_GD_SGP_005_ugul_30C_run3
20220524_GD_SGP_005_ugul_30C_run3
20220524_GD_SGP_005_ugul_45C_run1
20220524_GD_SGP_005_ugul_45C_run1
20220524_GD_SGP_005_ugul_45C_run2-2
20220524_GD_SGP_005_ugul_45C_run2-2
20220524_GD_SGP_005_ugul_45C_run2
20220524_GD_SGP_005_ugul_45C_run2
20220524_GD_SGP_005_ugul_45C_run3-2
20220524_GD_SGP_005_ugul_45C_run3-2
20220524_GD_SGP_005_ugul_45C_run3
20220524_GD_SGP_005_ugul_45C_run3
20220524_GD_SGP_005_ugul_60C_run1
20220524_GD_SGP_005_ugul_60C_run1
20220524_GD_SGP_005_ugul_60C_run2
20220524_GD_SGP_005_ugul_60C_run2
20220524_GD_SGP_005_ugul_60C_run3
20220524_GD_SGP_005_ugul_60C_run3
20220524_GD_SGP_02_ugul_30C_run1
20220524_GD_SGP_02_ugul_30C_run1
20220524_GD_SGP_02_ugul_30C_run2
20220524_GD_SGP_02_ugul_30C_run2
20220524_GD_SGP_02_ugul_30C_run3
20220524_GD_SGP_02_ugul_30C_run3
20220524_GD_SGP_02_ugul_45C_run1-2
20220524_GD

In [66]:
small_data = data[(data.time>=12) & (data.time<=22)]

my_colors = alt.Color(
    "temperature:N",
    scale=alt.Scale(
        domain=["30C", "45C", "60C"],
        range=["#6E6581", "#B0B2BB", "#6B8A97"]
    ),
)

data_left = alt.Chart(
    small_data[(small_data.concentration==0.05) & (small_data.mass==849.70)]
).encode(
    x=alt.X('time:Q', title='Time',
        axis=alt.Axis(
            labelFlush=False,
            labelFontSize=14,
            labelFontWeight='bold'
        )),
    y=alt.Y('abundance:Q', title='Relative Abundance',
        axis=alt.Axis(
            format='.2e',
            labelFontSize=14,
            labelFontWeight='bold'
        )),
    color=my_colors
).properties(
    width=200,
    height=100
)

data_right = alt.Chart(
    small_data[(small_data.concentration==0.05) & (small_data.mass==956.06)]
).encode(
    x=alt.X('time:Q', title='Time',
        axis=alt.Axis(
            labelFlush=False,
            labelFontSize=14,
            labelFontWeight='bold'
        )),
    y=alt.Y('abundance:Q', title='Relative Abundance',
        axis=alt.Axis(
            format='.2e',
            labelFontSize=14,
            labelFontWeight='bold'
        )),
    color=my_colors
).properties(
    width=200,
    height=100
)

In [48]:
small_data

Unnamed: 0,concentration,temperature,run,mass,time,abundance
1811,0.05,30C,run1,849.70,12.006350,0.0
1812,0.05,30C,run1,849.70,12.012233,0.0
1813,0.05,30C,run1,849.70,12.016700,0.0
1814,0.05,30C,run1,849.70,12.026050,0.0
1815,0.05,30C,run1,849.70,12.031983,0.0
...,...,...,...,...,...,...
200860,0.20,60C,run3,956.06,21.860167,0.0
200861,0.20,60C,run3,956.06,21.894333,0.0
200862,0.20,60C,run3,956.06,21.925667,0.0
200863,0.20,60C,run3,956.06,21.959333,0.0


In [68]:
(data_left.mark_area(opacity=0.3) + data_left.mark_line()).facet(
    row='run',
    column='temperature'
).save('./figures/sgp_light.svg')

(data_right.mark_area(opacity=0.3) + data_right.mark_line()).facet(
    row='run',
    column='temperature'
).save('./figures/sgp_heavy.svg')