In [22]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import plotly.express as px
import zipfile
!pip -q install itables
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)
import itables.options as opt
opt.maxBytes = 0
opt.classes = ["display", "nowrap","compact","hover"]
opt.showIndex = False
opt.style = "max-width:6000px"
pd.set_option('display.max_colwidth', 20)

<IPython.core.display.Javascript object>

This notebook executes the analysis of one or more samples at the peptide and modfication.

For each sample, the `ionbot.twbx` result file needs to be put into a seperate folder with the folder name corresponding to a unique sample name.  

In this case we will analyse the ionbot search results of three samples and created one folder for each sample:

```
PXD000561
├── PXD000561_closed
│   └── ionbot.twbx
├── PXD000561_full
│   └── ionbot.twbx
├── PXD000561_nocorr_nort
│   └── ionbot.twbx
```

We can specify this as follows:

In [23]:
experiment = "PXD000561"
samples = ["PXD000561_closed","PXD000561_full","PXD000561_nocorr_nort"] 

In [None]:
! wget http://genesis.ugent.be/uvpublicdata/workshop-ml-proteomics/PXD000561.zip
! unzip PXD000561.zip

Next we unpack the result files:

In [24]:
for sample in samples:
    folder = "%s/%s/"%(experiment,sample)
    archive = zipfile.ZipFile("%s/ionbot.twbx"%folder)
    for file in archive.namelist():
        if file.startswith('Data/'):
            archive.extract(file, folder)

Next we parse the result files and create one Pandas DataFrame that contains all the results:

In [25]:
tmp = []
for sample in samples:
    print(sample)
    folder = "%s/%s/Data/ionbot_result/"%(experiment,sample)
    ionbot_first = pd.read_csv("%s/ionbot.first.csv"%folder)
    ionbot_first["rank"] = ["first"]*len(ionbot_first)
    ionbot_lower = pd.read_csv("%s/ionbot.lower.csv"%folder)
    ionbot_lower["rank"] = ["lower"]*len(ionbot_lower)
    ionbot = pd.concat([ionbot_first,ionbot_lower])
    ionbot = ionbot[(ionbot["database"]=="T")&(ionbot["q-value"]<=0.01)]
    ionbot["modifications"].fillna("-",inplace=True)
    ionbot_features = pd.read_csv("%s/ionbot.features.csv"%folder)
    ionbot = ionbot.merge(ionbot_features,on="ionbot_match_id",how="left")
    ionbot["sample"] = [sample]*len(ionbot)
    tmp.append(ionbot)
ionbot = pd.concat(tmp)

PXD000561_closed
PXD000561_full
PXD000561_nocorr_nort


In [26]:
ionbot.columns

Index(['ionbot_match_id', 'spectrum_title', 'scan', 'spectrum_file',
       'precursor_mass', 'peptide_mass', 'observed_retention_time', 'charge',
       'database_peptide', 'matched_peptide', 'modifications',
       'modifications_delta', 'best_tag_rank', 'corrected_retention_time',
       'unexpected_modification', 'database', 'psm_score', 'q-value', 'PEP',
       'proteins', 'rank', 'by-count', 'all-count', 'by-explained',
       'all-explained', 'by-intensity-pattern-correlation',
       'predicted_retention_time', 'rt-pred-error', 'sample'],
      dtype='object')

In [27]:
tmp = ionbot["sample"].value_counts().reset_index(level=0)
tmp.columns = ["sample","#ids"]
fig = px.bar(tmp, y='sample', x='#ids', orientation='h')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

In [28]:
tmp = ionbot.drop_duplicates(["sample","matched_peptide","modifications"],keep="first")
tmp = tmp["sample"].value_counts().reset_index(level=0)
tmp.columns = ["sample","#ids"]
fig = px.bar(tmp, y='sample', x='#ids', orientation='h')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

In [29]:
tmp = ionbot.drop_duplicates(["sample","matched_peptide"],keep="first")
tmp = tmp["sample"].value_counts().reset_index(level=0)
tmp.columns = ["sample","#ids"]
fig = px.bar(tmp, y='sample', x='#ids', orientation='h')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

In [30]:
p = {}
for sample in samples:
    p[sample] = list(ionbot[ionbot["sample"]==sample]["matched_peptide"].unique())

In [31]:
d = []
for sample1 in p:
    dd = []
    for sample2 in p:
        dd.append(len(set(p[sample1]).intersection(set(p[sample2]))))
    d.append(dd)
    
intersect = pd.DataFrame(d,columns=p,index=p)

In [32]:
intersect.style.background_gradient(cmap='Blues')

Unnamed: 0,PXD000561_closed,PXD000561_full,PXD000561_nocorr_nort
PXD000561_closed,64630,59578,54835
PXD000561_full,59578,75915,65157
PXD000561_nocorr_nort,54835,65157,68366


In [33]:
tmp = []
for sample in samples:
    folder = "%s/%s/Data/ionbot_result/"%(experiment,sample)
    proteins = pd.read_csv("%s/ionbot.coeluting.proteins.csv"%folder)
    proteins = proteins[proteins["is_shared_peptide"]==False]
    proteins.drop_duplicates("ionbot_match_id",inplace=True)
    proteins["sample"] = [sample]*len(proteins)
    tmp.append(proteins)
proteins = pd.concat(tmp)
proteins = proteins.merge(ionbot,on=["sample","ionbot_match_id"],how="inner")

In [34]:
proteins.head(200)



ionbot_match_id,is_shared_peptide,protein_group,protein_group_q-value,protein_group_PEP,protein,position_in_protein,uniprot_id,protein_length,protein_description,PEP,proteins,rank,by-count,all-count,by-explained,all-explained,by-intensity-pattern-correlation,predicted_retention_time,rt-pred-error
Loading... (need help?),,,,,,,,,,,,,,,,,,,


In [35]:
tmp = proteins["sample"].value_counts().reset_index(level=0)
tmp.columns = ["sample","#ids"]
fig = px.bar(tmp, y='sample', x='#ids', orientation='h')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

In [36]:
tmp = proteins.drop_duplicates(["sample","matched_peptide","modifications"],keep="first")
tmp = tmp["sample"].value_counts().reset_index(level=0)
tmp.columns = ["sample","#ids"]
fig = px.bar(tmp, y='sample', x='#ids', orientation='h')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

In [37]:
tmp = proteins.drop_duplicates(["sample","matched_peptide"],keep="first")
tmp = tmp["sample"].value_counts().reset_index(level=0)
tmp.columns = ["sample","#ids"]
fig = px.bar(tmp, y='sample', x='#ids', orientation='h')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()