In [None]:
import os

import pandas as pd
from markdown import markdown

In [None]:
filename = '../../data/suspect_library_freq.csv'
suspects = pd.read_csv(filename)

In [None]:
suspects['dataset'] = suspects['SuspectPath'].str.slice(2, 14)
suspects['usi1'] = ('mzspec:' + suspects['dataset'] + ':' +
                    suspects['SuspectPath'].apply(os.path.basename) +
                    ':scan:' + suspects['SuspectScanNr'].astype(str))
suspects['usi2'] = 'mzspec:GNPSLIBRARY:' + suspects['LibraryID']
# TODO: New style USI.
# suspects['usi2'] = ('mzdraft:GNPS:GNPS-LIBRARY:accession:' +
#                     suspects['LibraryID'])
suspects['usi3'] = ('mzspec:MSV000084314:' + suspects['dataset'] +
                    '.mgf:scan:' + suspects['ClusterScanNr'].astype(str))

In [None]:
explanations = [
    ' / '.join([f'{atomic_diffs} ({rationales})'
                for atomic_diffs, rationales in zip(atomic_diffs.split('|'),
                                                    rationales.split('|'))])
    for atomic_diffs, rationales in zip(
        suspects['AtomicDifference'].fillna('unknown'),
        suspects['Rationale'].fillna('unknown'))]

output_list = ['Suspect | Mirror Library | Mirror Dataset Cluster | Image',
               '--- | --- | --- | ---']
suspects_str = ('<ul><li><b>Suspect:</b> ' + suspects['CompoundName'] + ' ['
                + suspects['Adduct'] + '] '
                + suspects['DeltaMZ'].map('{:=+9.3f}'.format) + ' ['
                + suspects['GroupDeltaMZ'].map('{:+.2f}'.format) + ']'
                + '</li>' +
                '<li><b>Library:</b> [' + suspects['LibraryID'] + ']'
                '(https://gnps.ucsd.edu/ProteoSAFe/gnpslibraryspectrum.jsp?'
                'SpectrumID=' + suspects['LibraryID'] + ')'
                + '</li>'
                + '<li><b>Putative explanation:</b> ' + explanations
                + '</li></ul>')
mirror_urls = ('https://metabolomics-usi.ucsd.edu/svg/mirror?usi1=' +
               suspects['usi1'] + '&usi2=' + suspects['usi2'] +
               '&mz_min=50&mz_max=500')
mirror_alt_urls = ('https://metabolomics-usi.ucsd.edu/svg/mirror?usi1=' +
                   suspects['usi1'] + '&usi2=' + suspects['usi3'] +
                   '&mz_min=50&mz_max=500')
spectrum_urls = ('https://metabolomics-usi.ucsd.edu/svg/?usi=' +
                 suspects['usi1'] + '&mz_min=50&mz_max=500')
for sus, url1, url2, url3 in zip(
        suspects_str, mirror_urls, mirror_alt_urls, spectrum_urls):
    output_list.append(f'{sus} | ![]({url1}) | ![]({url2}) | [View USI]({url3})')

In [None]:
with open(filename.replace('.csv', '.md'), 'w') as f_out:
    f_out.write('\n'.join(output_list))
with open(filename.replace('.csv', '.html'), 'w') as f_out:
    f_out.write(markdown('\n'.join(output_list), extensions=['tables']))