In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem

# Helper functions for conversions of SMILES to XYZ and list handling

In [2]:
def smi2conf(smiles):
    '''Convert SMILES to rdkit.Mol with 3D coordinates'''
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol)
        AllChem.MMFFOptimizeMolecule(mol, maxIters=200)
        return mol
    else:
        return None
    
def get_XYZ_from_SMILES(smiles):
    '''Get Atom X Y Z coordinates as string of lines for each atom in molecule'''
    mol = smi2conf(smiles)
    xyzMol = Chem.rdmolfiles.MolToXYZBlock(mol)
    
    lines = ""
    count = 0
    for line in xyzMol.splitlines():
        if count < 1:
            count = count + 1
            continue
        if line != '':
            lines = lines+(' '.join(line.split()))+'\n'
    return lines

def get_side_effect_binary(side_effects, side_effect_list):
    text = ""
    drug_side_effects = get_list_from_cs_string(side_effects)
    for possible_side_effect in side_effect_list:
            if possible_side_effect in drug_side_effects:
                text = text + "1 "
            else: text = text + "0 "
    return text[:-1]

def get_list_from_cs_string(cs_string):
    string_list = []
    texts = cs_string.split (",")
    for text in texts:
        string_list.append(text.strip().lower())
    return string_list

# Create pKa data set

In [3]:
data_file = './../data/pKaInWater.txt'
df = pd.read_csv(data_file, sep='\t', header=(0))
df_data = df[['Smiles', 'pKa']]

In [4]:
df_data

Unnamed: 0,Smiles,pKa
0,c1nnnn1-c1ccccc1,-3.41
1,c1nnn[nH]1,4.84
2,c1ncn[nH]1,2.50
3,c1nc2ncncc2[nH]1,8.92
4,c1nc2ncncc2[nH]1,2.45
...,...,...
7908,Brc(cc1)ccc1-c1nnn[nH]1,3.73
7909,Brc(cc1)ccc1-c1ncc(-c2ccccc2)o1,0.40
7910,Brc(cc1)ccc1-c1cnc(-c2ccccc2)o1,0.65
7911,Brc(cc1)cc2c1NCC2,3.70


### Create dataset from data
Need to create test.txt (10%), train.txt (80%), val.txt (10%) in the dataset folder for model use

The file format should be<br>
name<br>
atom x y z<br>
val1, val2, etc.<br>


In [26]:
file_text = ""
temp_text = ""
for index, row in df_data.iterrows():
    try:
        temp_text = ""
        smiles = row['Smiles']
        val1 = row['pKa']
        temp_text = "index_"+ str(index) + '\n'
        temp_text = temp_text + get_XYZ_from_SMILES(smiles)  
        temp_text = temp_text + str(val1) + '\n\n'
        file_text = file_text + temp_text
    except:
        temp_text = ""

file_text = ""
temp_text = ""
for index, row in df_data.iterrows():
    try:
        temp_text = ""
        smiles = row['Smiles']
        val1 = row['pKa']
        temp_text = smiles + '\n'
        temp_text = temp_text + get_XYZ_from_SMILES(smiles)  
        temp_text = temp_text + str(val1) + '\n\n'
        file_text = file_text + temp_text
    except:
        temp_text = ""

In [6]:
file_text

'c1nnnn1-c1ccccc1\nC 2.568690 0.538905 -0.430234\nN 3.720179 -0.092572 -0.437775\nN 3.430826 -1.394045 -0.125918\nN 2.130283 -1.559886 0.069183\nN 1.565139 -0.344305 -0.119357\nC 0.173186 -0.120531 0.004295\nC -0.671386 -1.191475 0.335478\nC -2.046825 -0.986812 0.461149\nC -2.585988 0.281693 0.258167\nC -1.752320 1.349323 -0.070967\nC -0.374105 1.154458 -0.199062\nH 2.420713 1.586594 -0.634428\nH -0.278385 -2.193768 0.499920\nH -2.697536 -1.819567 0.717923\nH -3.657617 0.438086 0.356726\nH -2.179004 2.337542 -0.228286\nH 0.234150 2.016360 -0.456814\n-3.41\n\nc1nnn[nH]1\nC -0.375454 0.707059 -0.113467\nN 0.884985 1.042173 -0.258430\nN 1.602848 -0.117081 -0.078880\nN 0.796722 -1.140580 0.170926\nN -0.438017 -0.623030 0.148722\nH -1.235427 1.354484 -0.184641\nH -1.235657 -1.223025 0.315770\n4.84\n\nc1ncn[nH]1\nC -0.736310 -0.686174 0.099327\nN 0.494983 -1.138862 0.131110\nC 1.240527 -0.004840 -0.023784\nN 0.538041 1.124184 -0.149603\nN -0.720236 0.662353 -0.067753\nH -1.643994 -1.265301 0

In [9]:
with open('./../dataset/pKa/train.txt', 'w') as f:
    f.write(file_text)

smiles = 'c1nnnn1-c1ccccc1'

# Create drug dataset

In [3]:
data_file = './../data/ApprovedDrugs2015.txt'
df = pd.read_csv(data_file, sep='\t', header=(0))

In [4]:
df

Unnamed: 0,CAS number,chemical name,name IUPAC,merck index ref,literature ref,patent date,Launched date,FDA approved,code ATC,solubility,...,Structure [idcode],Synonyms,T max,protein binding rate,bioavailability,half life time,absorption rate,distribution volume,Smiles,Unnamed: 33
0,134-58-7,Azaguanine-8,"5-Amino-3H-[1,2,3]triazolo[4,5-d]pyrimidin-7-ol","MI, 12, 928",Cancer Res. 1975 Oct 35(10):2872-8,08.09.1964,,,Not yet attributed,DMSO,...,dmMh@DkaePRYYe[iUjefh@@ !Bg}HS_}k|_FozMwwp...,,,,,,,,NC(NC1=O)=Nc2c1nn[nH]2,
1,57-67-0,Sulfaguanidine,4-amino-N-[amino(imino)methyl]benzenesulfonamide,"MI, 13, 8993",Contact Dermatitis. 2002 Mar 46(3):186-7,23.05.1950,,,A07AB03,DMSO,...,dg\d`LF[a@BLddJbbQvfmPLA@@@ !BZn@oLcvOpKemh_...,,,,,,,,NC(NS(c(cc1)ccc1N)(=O)=O)=N,
2,31430-18-9,Nocodazole,Methyl (5-(2-thienylcarbonyl)-1H-benz-imidazol...,,Mol Cancer Ther. 2011 Oct 10(10):1886-96,10.06.1983,,,Not yet attributed,DMSO 10mg/mL,...,fj}qb@C^BqNPAFRJIRqISIYSDijsoUJpACTuD@@ !BYi|...,,,,,,,,COC(Nc1nc(cc(cc2)C(c3cccs3)=O)c2[nH]1)=O,
3,992-21-2,Lymecycline,(+)-N-(5-amino-5-carboxypentylaminomethyl)-4-d...,"MI, 13, 5648",Eur J Dermatol. 2003 Mar-Apr 13(2):130-5,24.02.1982,1992,,J01AA04,,...,emWRE@@EkhUPAC@djmcjV\bbbbbbbvfd|VbRbaJTTQRxtT...,,3 - 4,,,8 - 10,,,C[C@@]([C@H](C[C@H]([C@H](C(O)=C(C(NCNC(CCCCN)...,
4,23930-37-2,Alfadolone acetate,"[2-[(3R,5S,8S,9S,10S,13S,14S,17S)-3-hydroxy-10...","MI, 13, 231",Anesthesiology. 1979 Apr 50(4):350-2,09.09.1987,Phase I,,Not yet attributed,,...,fcAP@@UzM^QQQJQQQQ[SV[YQbm^BVNVjjjjjjZijhRDQT...,,,,,,,,C[C@](C1)([C@@H](CC2)[C@H](CC[C@@H](C3)[C@]4(C...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580,540737-29-9,Tofacitinib citrate (CP-690550 citrate),,,,,,Yes,,,...,ebZVA@@OFnchTPDBIEMCKLbfdrTTCTTRRTLRaTRUPwtJNK...,,,,,,,,C[C@H](CCN(C1)C(CC#N)=O)[C@H]1N(C)c1ncnc2c1cc[...,
1581,104987-11-3,FK-506 (Tacrolimus),,,,,,Yes,,,...,eisxC@@D@FMCGOImbWTv|bbbbbtbbvftLTRbbRVaffVRbb...,"Fujimycin, Prograf",,,,,,,C[C@H](C[C@@H]1OC)[C@@](C(C(N(CCCC2)[C@@H]2C(O...,
1582,137071-32-0,Pimecrolimus,,,,,,Yes,,,...,ea}xMH@FbKDbcgl^QRzpHcHhhdbeELhhhYeDhhcMLmHimE...,"Elidel, SDZ-ASM-981",,,,,,,CC[C@H](/C=C(/C)\C[C@H](C)C[C@@H]([C@H]([C@H](...,
1583,110078-46-1,Plerixafor,,,,,,Yes,,,...,ebVQ@@@@NCJfmglTjUUeUWU~UUUUrKJiGFjj`hJJjjjjjb...,,,,,,,,C(c1ccc(CN2CCNCCCNCCNCCC2)cc1)N1CCNCCCNCCNCCC1,


In [5]:
df_data = df[['Smiles', 'chemical name', 'FDA approved', 'blood brain barrier', 'absorption tissue', 'side effect name', 'therapeutic class name', 'therapeutic effect name']]
df_data

Unnamed: 0,Smiles,chemical name,FDA approved,blood brain barrier,absorption tissue,side effect name,therapeutic class name,therapeutic effect name
0,NC(NC1=O)=Nc2c1nn[nH]2,Azaguanine-8,,,,,Oncology,Antineoplastic
1,NC(NS(c(cc1)ccc1N)(=O)=O)=N,Sulfaguanidine,,,,"Diarrhoea, Hepatic enzyme levels elevated, Hyp...","Infectiology, Metabolism",Antibacterial
2,COC(Nc1nc(cc(cc2)C(c3cccs3)=O)c2[nH]1)=O,Nocodazole,,,,"Diarrhoea, Skin dryness",Oncology,Antineoplastic
3,C[C@@]([C@H](C[C@H]([C@H](C(O)=C(C(NCNC(CCCCN)...,Lymecycline,,,,"Blurred vision, Diarrhoea, Headache, Hypersens...",Metabolism,Antibacterial
4,C[C@](C1)([C@@H](CC2)[C@H](CC[C@@H](C3)[C@]4(C...,Alfadolone acetate,,,,,Central Nervous System,Anesthetic
...,...,...,...,...,...,...,...,...
1580,C[C@H](CCN(C1)C(CC#N)=O)[C@H]1N(C)c1ncnc2c1cc[...,Tofacitinib citrate (CP-690550 citrate),Yes,,,,Cancer,
1581,C[C@H](C[C@@H]1OC)[C@@](C(C(N(CCCC2)[C@@H]2C(O...,FK-506 (Tacrolimus),Yes,,,,Cancer,
1582,CC[C@H](/C=C(/C)\C[C@H](C)C[C@@H]([C@H]([C@H](...,Pimecrolimus,Yes,,,,Cancer,
1583,C(c1ccc(CN2CCNCCCNCCNCCC2)cc1)N1CCNCCCNCCNCCC1,Plerixafor,Yes,,,,Immunology,


In [6]:
side_effects = df_data['side effect name'].unique()

In [7]:
side_effect_list = []
for side_effect_cs in side_effects:
    if not pd.isna(side_effect_cs):
        cs_list = side_effect_cs.split (",")
        for side_effect in cs_list:
            side_effect_list.append(side_effect.strip().lower())
side_effect_list

['diarrhoea',
 'hepatic enzyme levels elevated',
 'hypersensitivity reactions',
 'nausea',
 'nephrotoxicity',
 'photosensitization',
 'diarrhoea',
 'skin dryness',
 'blurred vision',
 'diarrhoea',
 'headache',
 'hypersensitivity reactions',
 'intracranial hypertension',
 'nausea',
 'photosensitization',
 'arrhythmia',
 'bradycardia',
 'dyspnea',
 'hypoxia',
 'seizure',
 'vomiting',
 'depression',
 'dizziness',
 'drowsiness',
 'epigastric misery',
 'haematuria',
 'headache',
 'hypersensitivity reactions',
 'insomnia',
 'photosensitization',
 'tinnitus',
 'constipation',
 'diarrhoea',
 'dizziness',
 'drowsiness',
 'epigastric misery',
 'headache',
 'nausea',
 'rash',
 'sweat',
 'hypersensitivity reactions',
 'diarrhoea',
 'epigastric misery',
 'hypersensitivity reactions',
 'nausea',
 'rash',
 'confusion',
 'fatigue',
 'nausea',
 'orthostatic hypotension',
 'pruritus',
 'sweat',
 'diarrhoea',
 'hypersensitivity reactions',
 'nausea',
 'purpura',
 'rash',
 'anorexia',
 'diarrhoea',
 'dizz

In [8]:
# we want the unique values
side_effect_list = set(side_effect_list)
side_effect_list

{'acne',
 'addiction',
 'aggressiveness',
 'agranulocytosis',
 'amenorrhoea',
 'anaphylactic shock',
 'anemia',
 'anorexia',
 'antabuse effect',
 'anticholinergic effects',
 'anxiety',
 'arrhythmia',
 'arthralgia',
 'asthenia',
 'asthma',
 'ataxia',
 'atropinic effects',
 'bitterness',
 'bleeding',
 'blockade of platelet aggregation',
 'blurred vision',
 'bone marrow suppression',
 'bone weakness',
 'bradycardia',
 'breast pain',
 'breast tenderness',
 'bronchospasm',
 'burning',
 'carcinogenic',
 'cardiac toxicity',
 'cheese effect',
 'chills',
 'cholinergic syndrome',
 'color blindness',
 'confusion',
 'constipation',
 'cough',
 'cystitis',
 'depression',
 'diabetes',
 'diarrhoea',
 'discoloration',
 'dizziness',
 'drowsiness',
 'dryness',
 'dyschromatopsia',
 'dyskinesia',
 'dyspepsia',
 'dysphonia',
 'dyspnea',
 'eczema',
 'electrocardiogram disturbances',
 'eosinophilia',
 'epigastric misery',
 'erectile dysfunction',
 'erythema',
 'esophageal ulcer',
 'estrogenic',
 'euphoria',
 

In [9]:
# merge the two dataframes together so we have the side effects for each molecule
df_side_effect_data = df_data
for side_effect in side_effect_list:
    df_side_effect_data[side_effect] = 0
df_side_effect_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_side_effect_data[side_effect] = 0
  df_side_effect_data[side_effect] = 0


Unnamed: 0,Smiles,chemical name,FDA approved,blood brain barrier,absorption tissue,side effect name,therapeutic class name,therapeutic effect name,taste alteration,erectile dysfunction,...,neutropenia,anemia,gingivitis,hypocalcemia,parkinsonian syndrome,metabolic acidosis,cheese effect,mouth dryness,leucopenia,hot flushes
0,NC(NC1=O)=Nc2c1nn[nH]2,Azaguanine-8,,,,,Oncology,Antineoplastic,0,0,...,0,0,0,0,0,0,0,0,0,0
1,NC(NS(c(cc1)ccc1N)(=O)=O)=N,Sulfaguanidine,,,,"Diarrhoea, Hepatic enzyme levels elevated, Hyp...","Infectiology, Metabolism",Antibacterial,0,0,...,0,0,0,0,0,0,0,0,0,0
2,COC(Nc1nc(cc(cc2)C(c3cccs3)=O)c2[nH]1)=O,Nocodazole,,,,"Diarrhoea, Skin dryness",Oncology,Antineoplastic,0,0,...,0,0,0,0,0,0,0,0,0,0
3,C[C@@]([C@H](C[C@H]([C@H](C(O)=C(C(NCNC(CCCCN)...,Lymecycline,,,,"Blurred vision, Diarrhoea, Headache, Hypersens...",Metabolism,Antibacterial,0,0,...,0,0,0,0,0,0,0,0,0,0
4,C[C@](C1)([C@@H](CC2)[C@H](CC[C@@H](C3)[C@]4(C...,Alfadolone acetate,,,,,Central Nervous System,Anesthetic,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580,C[C@H](CCN(C1)C(CC#N)=O)[C@H]1N(C)c1ncnc2c1cc[...,Tofacitinib citrate (CP-690550 citrate),Yes,,,,Cancer,,0,0,...,0,0,0,0,0,0,0,0,0,0
1581,C[C@H](C[C@@H]1OC)[C@@](C(C(N(CCCC2)[C@@H]2C(O...,FK-506 (Tacrolimus),Yes,,,,Cancer,,0,0,...,0,0,0,0,0,0,0,0,0,0
1582,CC[C@H](/C=C(/C)\C[C@H](C)C[C@@H]([C@H]([C@H](...,Pimecrolimus,Yes,,,,Cancer,,0,0,...,0,0,0,0,0,0,0,0,0,0
1583,C(c1ccc(CN2CCNCCCNCCNCCC2)cc1)N1CCNCCCNCCNCCC1,Plerixafor,Yes,,,,Immunology,,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
therapeutic_class = df_data['therapeutic class name'].unique()

In [11]:
therapeutic_class_list = []
for therapeutic_class_cs in therapeutic_class:
    if not pd.isna(therapeutic_class_cs):
        cs_list = therapeutic_class_cs.split (",")
        for side_effect in cs_list:
            therapeutic_class_list.append(side_effect.strip().lower())

therapeutic_class_list = set(therapeutic_class_list)
therapeutic_class_list

{'allergology',
 'cancer',
 'cardiovascular',
 'cardiovascular disease',
 'central nervous system',
 'dermatology',
 'diagnostic',
 'digestive system disease',
 'endocrinology',
 'free base',
 'gastroenterology',
 'hematology',
 'immunology',
 'infectiology',
 'infection',
 'inflammation',
 'metabolic disease',
 'metabolism',
 'neurological disease',
 'neuromuscular',
 'oncology',
 'ophthalmology',
 'respiratory',
 'respiratory disease',
 'vermifuge'}

In [12]:
therapeutic_effects = df_data['therapeutic effect name'].unique()

In [13]:
therapeutic_effects_list = []
for therapeutic_effects_cs in therapeutic_effects:
    if not pd.isna(therapeutic_effects_cs):
        cs_list = therapeutic_effects_cs.split (",")
        for side_effect in cs_list:
            therapeutic_effects_list.append(side_effect.strip().lower())

therapeutic_effects_list = set(therapeutic_effects_list)
therapeutic_effects_list

{'abortifacient',
 'alcohol addiction treatment',
 'anabolic',
 'analeptic',
 'analgesic',
 'anesthetic',
 'anorectic',
 'antabuse effect',
 'anti-alopecia',
 'anti-alzheimer',
 'anti-anorectic',
 'anti-fatigue',
 'anti-haemorrhoids',
 'anti-inflammatory',
 'anti-ischemic',
 'anti-oxidant',
 'antiallergic',
 'antiamebic',
 'antianemic',
 'antianginal',
 'antiarrhythmic',
 'antiarthritic',
 'antiasthmatic',
 'antibacterial',
 'anticoagulant',
 'anticoccidial',
 'anticonvulsant',
 'antidepressant',
 'antidiabetic',
 'antidiarrheal',
 'antidiuretic',
 'antiemetic',
 'antiepileptic',
 'antifibrinolytic',
 'antifungal',
 'antiglaucoma',
 'antigonadotropin',
 'antigout',
 'antihelmintic',
 'antihistaminic',
 'antihypertensive',
 'antihyperthyroid',
 'antihypotensive',
 'antihypothyroid',
 'antileishmanial',
 'antilipemic',
 'antimalarial',
 'antimigraine',
 'antineoplastic',
 'antiosteoporetic',
 'antiparasitic',
 'antiparkinsonian',
 'antiplatelet',
 'antiprolactin',
 'antiprotozoal',
 'ant

## Create Side Effects Dataset

In [17]:
df_side_effect_data = df_side_effect_data.fillna(0)
file_text = ""
temp_text = ""
error_text = ""
for index, row in df_side_effect_data.iterrows():
    try:
        temp_text = ""
        smiles = row['Smiles']
        if not pd.isna(row['side effect name']):
            side_effect_cs = str(row['side effect name'])
            cs_list = side_effect_cs.split (",")
            for side_effect in cs_list:
                df_side_effect_data.at[index, side_effect.strip().lower()] = 1
        temp_text = "index_" + str(index) + '\n'
        temp_text = temp_text + get_XYZ_from_SMILES(smiles)  
        temp_text = temp_text + get_side_effect_binary(side_effect_cs, side_effect_list) + '\n\n'
        file_text = file_text + temp_text
    except:
        error_text = error_text + smiles + ", "

[15:33:19] Cannot write molecules with no conformers to XYZ block
[15:33:37] Cannot write molecules with no conformers to XYZ block
[15:33:55] Cannot write molecules with no conformers to XYZ block
[15:34:01] UFFTYPER: Unrecognized atom type: Au6 (7)
[15:34:32] Cannot write molecules with no conformers to XYZ block
[15:34:33] UFFTYPER: Unrecognized atom type: Ca (0)
[15:35:05] UFFTYPER: Unrecognized charge state for atom: 9
[15:35:06] UFFTYPER: Unrecognized atom type: Co5 (65)
[15:35:59] UFFTYPER: Unrecognized atom type: Ca (0)
[15:35:59] UFFTYPER: Unrecognized atom type: Se2+2 (9)
[15:35:59] UFFTYPER: Unrecognized atom type: Se2+2 (9)
[15:36:00] SMILES Parse Error: syntax error while parsing: CC(C(CC1)N(C)C)OC1O[C@H]([C@H](C)C[C@H](CC=O)[C@@H]([C@H]([C@@H](C1)O[R])OC)OC(C(C2N(C)C)O)OC(C)C2OC(CC2(C)O)OC(C)C2O)/C=C/C=C/C[C@@H](C)OC1=O
[15:36:00] SMILES Parse Error: Failed parsing SMILES 'CC(C(CC1)N(C)C)OC1O[C@H]([C@H](C)C[C@H](CC=O)[C@@H]([C@H]([C@@H](C1)O[R])OC)OC(C(C2N(C)C)O)OC(C)C2OC

In [18]:
with open('./../dataset/drug_side_effects/train.txt', 'w') as f:
    f.write(file_text)

In [19]:
error_text

'C=CC[N@+](CC1)(C/C(/[C@H](C2)/C(/[C@H]34)=C/N([C@@H]5[C@@]6(CC7)[C@H](C8)[N@+]7(CC=C)C7)c9c6cccc9)=C\\CO)[C@@H]2[C@@]14c(cccc1)c1N3/C=C5/[C@@H]8/C7=C/CO.[Cl-].[Cl-], C[C@H](CC[C@H]1[C@H]2C)[C@H](CC3)[C@]11OO[C@@]3(C)O[C@H]1OC2=O, CN([C@@H](CC1)C2)[C@@H]1CC2OC(C(CO)c1ccccc1)=O.CN([C@@H](CC1)C2)[C@@H]1CC2OC(C(CO)c1ccccc1)=O.OS(O)(=O)=O.O, CC(C(CC1)N(C)C)OC1O[C@H]([C@H](C)C[C@H](CC=O)[C@@H]([C@H]([C@@H](C1)O[R])OC)OC(C(C2N(C)C)O)OC(C)C2OC(CC2(C)O)OC(C)C2O)/C=C/C=C/C[C@@H](C)OC1=O, CC(C)(C)CC(C)(C)c(cc1)cc(Cc2cc(C(C)(C)CC(C)(C)C)cc(Cc(cc(C(C)(C)CC(C)(C)C)cc3)c3OCOCCOCCO)c2OCOCCOCCO)c1OCOCCOCCO, C[C@H]([C@H]([C@H](C)NC([C@H](C(c1c[nH]cn1)O[C@H]([C@@H]([C@@H]1O)O[C@H]([C@H]([C@H]2OC(N)=O)O)O[C@H](CO)[C@H]2O)O[C@H](CO)[C@@H]1O)NC(c1nc([C@H](CC(N)=O)NC[C@H](C(N)=O)N)nc(N)c1C)=O)=O)O)C(N[C@H]([C@@H](C)O)C(NCCc1nc(-c2nc(C(NCCC[S+](C)C)=O)cs2)cs1)=O)=O.[O-]S(O)(=O)=O, CCCCCCCCCC(N[C@@H](Cc1c[nH]c2c1cccc2)C(N[C@H](CC(N)=O)C(N[C@@H](CC(O)=O)C(N[C@@H]([C@@H](C)OC([C@H](CC(c(cccc1)c1N)=O)NC([C@H]([C