In [1]:
import numpy as np
import snorkel
from snorkel.labeling import labeling_function
from snorkel.labeling import LabelingFunction
from snorkel.labeling import PandasLFApplier
from snorkel.labeling import LFAnalysis
import re
from snorkel.labeling.model import MajorityLabelVoter
import json
import pandas as pd
import label_improve

In [None]:
@labeling_function()
def lf_amino_acid(x):
    return 0 if 'amino acid' in x.sentence.lower() else ABSTAIN
#1
@labeling_function()
def lf_replace(x):
    return 0 if 'replace' in x.sentence.lower() else ABSTAIN
#2 TODO: 0.1988
@labeling_function()
def lf_mutant(x):
    return 0 if 'mutant' in x.sentence.lower() or 'mutat' in x.sentence.lower() else ABSTAIN
#3
## Regulator
@labeling_function()
def lf_bind(x):
    return 1 if 'bind' in x.sentence.lower() else ABSTAIN
#4
@labeling_function()
def lf_interact(x):
    return 1 if 'interact' in x.sentence.lower() else ABSTAIN
#5
@labeling_function()
def lf_affinity(x):
    return 1 if 'affinit' in x.sentence.lower() else ABSTAIN
#6 TODO: 0.3578
## Upregulator
# Activator
@labeling_function()
def lf_activate(x):
    return 2 if 'activat' in x.sentence.lower() else ABSTAIN
#7
@labeling_function()
def lf_increase(x):
    return 2 if 'increas' in x.sentence.lower() else ABSTAIN
#8 TODO: 
@labeling_function()
def lf_induce(x):
    return 2 if 'induc' in x.sentence.lower() else ABSTAIN
#9 TODO: 
@labeling_function()
def lf_stimulate(x):
    return 2 if 'stimulat' in x.sentence.lower() else ABSTAIN
#10
@labeling_function()
def lf_upregulate(x):
    return 2 if 'upregulat' in x.sentence.lower() else ABSTAIN
#11
## Downregulator
@labeling_function()
def lf_downregulate(x):
    return 3 if 'downregulat' in x.sentence.lower() or 'down-regulat' in x.sentence.lower() else ABSTAIN
#12
@labeling_function()
def lf_reduce(x):
    return 3 if 'reduc' in x.sentence.lower() else ABSTAIN
#13
@labeling_function()
def lf_inhibit(x):
    return 3 if 'inhibit' in x.sentence.lower() else ABSTAIN
#14
@labeling_function()
def lf_decrease(x):
    return 3 if 'decreas' in x.sentence.lower() else ABSTAIN
#15
## Agonist
@labeling_function()
def lf_agonist(x):
    return 4 if ' agoni' in x.sentence.lower() or "\tagoni" in x.sentence.lower() else ABSTAIN

#16
## Antagonist
@labeling_function()
def lf_antagonist(x):
    return 5 if 'antagon' in x.sentence.lower() else ABSTAIN

#17
## Modulator
# TODO: Delete this LF, or change this to modulator ??
@labeling_function()
def lf_modulate(x):
    return 6 if 'modulat' in x.sentence.lower() else ABSTAIN

#18
@labeling_function()
def lf_allosteric(x):
    return 6 if 'allosteric' in x.sentence.lower() else ABSTAIN
#19
## Cofactor
@labeling_function()
def lf_cofactor(x):
    return 7 if 'cofactor' in x.sentence.lower() else ABSTAIN
#20
## Substrate/Product
@labeling_function()
def lf_substrate(x):
    return 8 if 'substrate' in x.sentence.lower() else ABSTAIN
#21
@labeling_function()
def lf_transport(x):
    return 8 if 'transport' in x.sentence.lower() else ABSTAIN
#22
@labeling_function()
def lf_catalyze(x):
    return 8 if 'catalyz' in x.sentence.lower() or 'catalys' in x.sentence.lower() else ABSTAIN
#23
@labeling_function()
def lf_product(x):
    return 8 if "produc" in x.sentence.lower() else ABSTAIN
#24
@labeling_function()
def lf_convert(x):
    return 8 if "conver" in x.sentence.lower() else ABSTAIN
#25
## NOT
@labeling_function()
def lf_not(x):
    return 9 if 'not' in x.sentence.lower() else ABSTAIN

# 26 replace the 17 (18)
@labeling_function()
def lf_combined_modulator(x):
    sentence_lower = x.text.lower()

    specific_terms = ['allosteric modulator', 'positive modulator', 'negative modulator', 'non-competitive modulator', 'positive allosteric modulator']
    if any(term in sentence_lower for term in specific_terms):
        return 6

    modulating_terms = ['modulat', 'allosteric', 'potentiate', 'regulate']
    for term in modulating_terms:
        if term in sentence_lower:
            term_index = sentence_lower.index(term)
            if x.entity1_index == -1 or x.entity2_index == -1:
                return ABSTAIN
            if abs(term_index - x.entity1_index) < 20 or abs(term_index - x.entity2_index) < 20:
                return 6
    # check the first indcidences of modulator and positive
    if x.entity1 in sentence_lower and x.entity2 in sentence_lower:
        entity1_index = sentence_lower.index(x.entity1.lower())
        entity2_index = sentence_lower.index(x.entity2.lower())
        between_entities = sentence_lower[min(entity1_index, entity2_index):max(entity1_index, entity2_index)]
        if 'modulate' in between_entities or 'regulate' in between_entities:
            return 6

    if 'positive' in sentence_lower and 'modulator' in sentence_lower:
        pos_indices = [i for i, word in enumerate(sentence_lower.split()) if word == 'positive']
        mod_indices = [i for i, word in enumerate(sentence_lower.split()) if 'modulator' in word]
        min_distance = min(abs(p - m) for p in pos_indices for m in mod_indices)
        if min_distance <= 3:
            return 6

    return ABSTAIN


In [None]:
"While SAR within the HTS series was very shallow and unable to be optimized, grafting the phenethyl ether linkage onto the ML129/ML172 cores led to the first sub-micromolar M5 PAM, ML326 (VU0467903), (human and rat M5 EC50s of 409nM and 500nM, respectively) with excellent mAChR selectivity (M1-M4 EC50s >30\u03bcM) and a robust 20-fold leftward shift of the ACh CRC."

In [None]:
lf_upregulate
DBDCT up-regulated the expression of Bax, down-regulated the expression of Bcl-2, and significantly increased the ratio of Bax/Bcl-2.

In [2]:
len([-1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])

len([-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  3 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1])

SyntaxError: invalid syntax (3951801930.py, line 2)

In [None]:
# chemprot functions:

ABSTAIN = -1
### Keyword based labeling functions ###

## Part of
#0
@labeling_function()
def lf_amino_acid(x):
    return 0 if 'amino acid' in x.text.lower() else ABSTAIN
#1
@labeling_function()
def lf_replace(x):
    return 0 if 'replace' in x.text.lower() else ABSTAIN
#2 TODO: 0.1988
@labeling_function()
def lf_mutant(x):
    return 0 if 'mutant' in x.text.lower() or 'mutat' in x.text.lower() else ABSTAIN
#3
## Regulator
@labeling_function()
def lf_bind(x):
    return 1 if 'bind' in x.text.lower() else ABSTAIN
#4
@labeling_function()
def lf_interact(x):
    return 1 if 'interact' in x.text.lower() else ABSTAIN
#5
@labeling_function()
def lf_affinity(x):
    return 1 if 'affinit' in x.text.lower() else ABSTAIN
#6 TODO: 0.3578
## Upregulator
# Activator
@labeling_function()
def lf_activate(x):
    return 2 if 'activat' in x.text.lower() else ABSTAIN
#7
@labeling_function()
def lf_increase(x):
    return 2 if 'increas' in x.text.lower() else ABSTAIN
#8 TODO: 
@labeling_function()
def lf_induce(x):
    return 2 if 'induc' in x.text.lower() else ABSTAIN
#9 TODO: 
@labeling_function()
def lf_stimulate(x):
    return 2 if 'stimulat' in x.text.lower() else ABSTAIN
#10
(z1+z2)/2 <= 2- z3-z4
@labeling_function()
@labeling_function()
def lf_upregulate(x):
    if ('upregulat' in x.text.lower() or 'up-regulat' in x.text.lower()) and ('downregulat' in x.text.lower() or 'down-regulat' in x.text.lower()):
        if x.entity1_index == -1 or x.entity2_index == -1:
            return ABSTAIN
        # if up regulate is between the two entities
        if isinstance(x.entity1_index, int) and isinstance(x.entity2_index, int):
            if x.entity1_index < x.entity2_index:
                if x.text[x.entity1_index:x.entity2_index].count('upregulat') > 0 or x.text[x.entity1_index:x.entity2_index].count('up-regulat') > 0:
                    return 2
            else:
                if x.text[x.entity2_index:x.entity1_index].count('upregulat') > 0 or x.text[x.entity2_index:x.entity1_index].count('up-regulat') > 0:
                    return 2
        return ABSTAIN
    else:
        return 2 if 'upregulat' in x.text.lower() or 'up-regulat' in x.text.lower() else ABSTAIN
#11
## Downregulator
@labeling_function()
def lf_downregulate(x):
    if('downregulat' in x.text.lower() or 'down-regulat' in x.text.lower()) and ('upregulat' in x.text.lower() or 'up-regulat' in x.text.lower()):
        if x.entity1_index == -1 or x.entity2_index == -1:
            return ABSTAIN
        # if up regulate is between the two entities
        if isinstance(x.entity1_index, int) and isinstance(x.entity2_index, int):
            if x.entity1_index < x.entity2_index:
                if x.text[x.entity1_index:x.entity2_index].count('downregulat') > 0 or x.text[x.entity1_index:x.entity2_index].count('down-regulat') > 0:
                    return 3
            else:
                if x.text[x.entity2_index:x.entity1_index].count('downregulat') > 0 or x.text[x.entity2_index:x.entity1_index].count('down-regulat') > 0:
                    return 3
        return ABSTAIN
    return 3 if 'downregulat' in x.text.lower() or 'down-regulat' in x.text.lower() else ABSTAIN
#12
@labeling_function()
def lf_reduce(x):
    return 3 if 'reduc' in x.text.lower() else ABSTAIN
#13
@labeling_function()
def lf_inhibit(x):
    return 3 if 'inhibit' in x.text.lower() else ABSTAIN
#14
@labeling_function()
def lf_decrease(x):
    return 3 if 'decreas' in x.text.lower() else ABSTAIN
#15
## Agonist
@labeling_function()
def lf_agonist(x):
    return 4 if ' agoni' in x.text.lower() or "\tagoni" in x.text.lower() else ABSTAIN

#16
## Antagonist
@labeling_function()
def lf_antagonist(x):
    return 5 if 'antagon' in x.text.lower() else ABSTAIN

#17
## Modulator
# TODO: Delete this LF, or change this to modulator ??
@labeling_function()
def lf_modulate(x):
    return 6 if 'modulat' in x.text.lower() else ABSTAIN

#18
@labeling_function()
def lf_allosteric(x):
    return 6 if 'allosteric' in x.text.lower() else ABSTAIN
#19
## Cofactor
@labeling_function()
def lf_cofactor(x):
    return 7 if 'cofactor' in x.text.lower() else ABSTAIN
#20
## Substrate/Product
@labeling_function()
def lf_substrate(x):
    return 8 if 'substrate' in x.text.lower() else ABSTAIN
#21
@labeling_function()
def lf_transport(x):
    return 8 if 'transport' in x.text.lower() else ABSTAIN
#22
@labeling_function()
def lf_catalyze(x):
    return 8 if 'catalyz' in x.text.lower() or 'catalys' in x.text.lower() else ABSTAIN
#23
@labeling_function()
def lf_product(x):
    return 8 if "produc" in x.text.lower() else ABSTAIN
#24
@labeling_function()
def lf_convert(x):
    return 8 if "conver" in x.text.lower() else ABSTAIN
#25
## NOT
@labeling_function()
def lf_not(x):
    return 9 if 'not' in x.text.lower() else ABSTAIN

# 26 replace the 17 (18)
@labeling_function()
def lf_combined_modulator(x):
    sentence_lower = x.text.lower()

    specific_terms = ['allosteric modulator', 'positive modulator', 'negative modulator', 'non-competitive modulator', 'positive allosteric modulator']
    if any(term in sentence_lower for term in specific_terms):
        return 6

    modulating_terms = ['modulat', 'allosteric', 'potentiate']
    for term in modulating_terms:
        if term in sentence_lower:
            term_index = sentence_lower.index(term)
            if x.entity1_index == -1 or x.entity2_index == -1:
                return ABSTAIN
            if abs(term_index - x.entity1_index) < 20 or abs(term_index - x.entity2_index) < 20:
                return 6
    # check the first indcidences of modulator and positive
    if x.entity1 in sentence_lower and x.entity2 in sentence_lower:
        entity1_index = sentence_lower.index(x.entity1.lower())
        entity2_index = sentence_lower.index(x.entity2.lower())
        between_entities = sentence_lower[min(entity1_index, entity2_index):max(entity1_index, entity2_index)]
        if 'modulate' in between_entities:
            return 6

    if 'positive' in sentence_lower and 'modulator' in sentence_lower:
        pos_indices = [i for i, word in enumerate(sentence_lower.split()) if word == 'positive']
        mod_indices = [i for i, word in enumerate(sentence_lower.split()) if 'modulator' in word]
        if len(pos_indices) == 0 or len(mod_indices) == 0:
            return ABSTAIN
        min_distance = min(abs(p - m) for p in pos_indices for m in mod_indices)
        if min_distance <= 3:
            return 6

    return ABSTAIN

lfs = [lf_amino_acid, lf_replace, lf_mutant, lf_bind, lf_interact, lf_affinity, lf_activate, lf_increase, lf_induce, lf_stimulate, lf_upregulate, lf_downregulate, lf_reduce, lf_inhibit, lf_decrease, lf_agonist, lf_antagonist, lf_combined_modulator, lf_allosteric, lf_cofactor, lf_substrate, lf_transport, lf_catalyze, lf_product, lf_convert, lf_not]


In [None]:

def lf_mutant(x):
    words = "The refolding kinetics of guanidine-denatured disulfide-intact bovine pancreatic ribonuclease A (RNase A) and its proline-42-to-alanine mutant (Pro42Ala) have been studied by monitoring tyrosine burial and 2'-cytidine monophosphate (2'CMP) inhibitor binding.".lower().split()
    
    if 'mutant' in words or 'mutat' in words:
        # if mutant is between the two entities
        if x.entity1_index == -1 or x.entity2_index == -1:
            return ABSTAIN
        if isinstance(x.entity1_index, int) and isinstance(x.entity2_index, int):
            if x.entity1_index < x.entity2_index:
                if 'mutant' in words[x.entity1_index:x.entity2_index] or 'mutat' in words[x.entity1_index:x.entity2_index]:
                    return 0
            else:
                if 'mutant' in words[x.entity2_index:x.entity1_index] or 'mutat' in words[x.entity2_index:x.entity1_index]:
                    return 0
        # if mutant is close to either of the entities
        if abs(x.entity1_index - words.index('mutant')) < 4 or abs(x.entity2_index - words.index('mutant')) < 4:
            return 0
        # if mutat is close to either of the entities
        if abs(x.entity1_index - words.index('mutat')) < 4 or abs(x.entity2_index - words.index('mutat')) < 4:
            return 0
    return ABSTAIN

In [None]:
# chemprot functions:

ABSTAIN = -1
### Keyword based labeling functions ###

## Part of
#0
@labeling_function()
def lf_amino_acid(x):
    return 0 if 'amino acid' in x.text.lower() else ABSTAIN
#1
@labeling_function()
def lf_replace(x):
    return 0 if 'replace' in x.text.lower() else ABSTAIN
#2 TODO: 0.1988
@labeling_function()
def lf_mutant(x):
    def find_word_index(words, target):
        for i, word in enumerate(words):
            if target in word:
                return i
        return -1

    words = x.text.lower().split()
    if any('mutant' in word or 'mutat' in word for word in words):
        # if mutant is between the two entities
        if x.entity1_index == -1 or x.entity2_index == -1:
            return ABSTAIN
        if isinstance(x.entity1_index, int) and isinstance(x.entity2_index, int):
            if x.entity1_index < x.entity2_index:
                if any('mutant' in word or 'mutat' in word for word in words[x.entity1_index:x.entity2_index]):
                    return 0
            else:
                if any('mutant' in word or 'mutat' in word for word in words[x.entity2_index:x.entity1_index]):
                    return 0
        # if mutant is close to either of the entities
        mutant_index = find_word_index(words, 'mutant')
        mutat_index = find_word_index(words, 'mutat')
        if (mutant_index != -1 and (abs(x.entity1_index - mutant_index) < 4 or abs(x.entity2_index - mutant_index) < 4)) or \
           (mutat_index != -1 and (abs(x.entity1_index - mutat_index) < 4 or abs(x.entity2_index - mutat_index) < 4)):
            return 0
    return ABSTAIN

#3
## Regulator
@labeling_function()
def lf_bind(x):
    return 1 if 'bind' in x.text.lower() else ABSTAIN
#4
@labeling_function()
def lf_interact(x):
    return 1 if 'interact' in x.text.lower() else ABSTAIN
#5
@labeling_function()
def lf_affinity(x):
    return 1 if 'affinit' in x.text.lower() else ABSTAIN
#6 TODO: 0.3578
## Upregulator
# Activator
@labeling_function()
def lf_activate(x):
    return 2 if 'activat' in x.text.lower() else ABSTAIN
#7
@labeling_function()
def lf_increase(x):
    return 2 if 'increas' in x.text.lower() else ABSTAIN
#8 TODO: 
@labeling_function()
def lf_induce(x):
    return 2 if 'induc' in x.text.lower() else ABSTAIN
#9 TODO: 
@labeling_function()
def lf_stimulate(x):
    return 2 if 'stimulat' in x.text.lower() else ABSTAIN
#10
@labeling_function()
def lf_upregulate(x):
    if ('upregulat' in x.text.lower() or 'up-regulat' in x.text.lower()) and ('downregulat' in x.text.lower() or 'down-regulat' in x.text.lower()):
        if x.entity1_index == -1 or x.entity2_index == -1:
            return ABSTAIN
        # if up regulate is between the two entities
        if isinstance(x.entity1_index, int) and isinstance(x.entity2_index, int):
            if x.entity1_index < x.entity2_index:
                if x.text[x.entity1_index:x.entity2_index].count('upregulat') > 0 or x.text[x.entity1_index:x.entity2_index].count('up-regulat') > 0:
                    return 2
            else:
                if x.text[x.entity2_index:x.entity1_index].count('upregulat') > 0 or x.text[x.entity2_index:x.entity1_index].count('up-regulat') > 0:
                    return 2
        return ABSTAIN
    else:
        return 2 if 'upregulat' in x.text.lower() or 'up-regulat' in x.text.lower() else ABSTAIN
#11
## Downregulator
@labeling_function()
def lf_downregulate(x):
    if('downregulat' in x.text.lower() or 'down-regulat' in x.text.lower()) and ('upregulat' in x.text.lower() or 'up-regulat' in x.text.lower()):
        if x.entity1_index == -1 or x.entity2_index == -1:
            return ABSTAIN
        # if up regulate is between the two entities
        if isinstance(x.entity1_index, int) and isinstance(x.entity2_index, int):
            if x.entity1_index < x.entity2_index:
                if x.text[x.entity1_index:x.entity2_index].count('downregulat') > 0 or x.text[x.entity1_index:x.entity2_index].count('down-regulat') > 0:
                    return 3
            else:
                if x.text[x.entity2_index:x.entity1_index].count('downregulat') > 0 or x.text[x.entity2_index:x.entity1_index].count('down-regulat') > 0:
                    return 3
        return ABSTAIN
    return 3 if 'downregulat' in x.text.lower() or 'down-regulat' in x.text.lower() else ABSTAIN
#12
@labeling_function()
def lf_reduce(x):
    return 3 if 'reduc' in x.text.lower() else ABSTAIN
#13
@labeling_function()
def lf_inhibit(x):
    return 3 if 'inhibit' in x.text.lower() else ABSTAIN
#14
@labeling_function()
def lf_decrease(x):
    return 3 if 'decreas' in x.text.lower() else ABSTAIN
#15
## Agonist
@labeling_function()
def lf_agonist(x):
    return 4 if ' agoni' in x.text.lower() or "\tagoni" in x.text.lower() else ABSTAIN

#16
## Antagonist
@labeling_function()
def lf_antagonist(x):
    return 5 if 'antagon' in x.text.lower() else ABSTAIN

#17
## Modulator
# TODO: Delete this LF, or change this to modulator ??
@labeling_function()
def lf_modulate(x):
    return 6 if 'modulat' in x.text.lower() else ABSTAIN

#18
@labeling_function()
def lf_allosteric(x):
    return 6 if 'allosteric' in x.text.lower() else ABSTAIN
#19
## Cofactor
@labeling_function()
def lf_cofactor(x):
    return 7 if 'cofactor' in x.text.lower() else ABSTAIN
#20
## Substrate/Product
@labeling_function()
def lf_substrate(x):
    return 8 if 'substrate' in x.text.lower() else ABSTAIN
#21
@labeling_function()
def lf_transport(x):
    return 8 if 'transport' in x.text.lower() else ABSTAIN
#22
@labeling_function()
def lf_catalyze(x):
    return 8 if 'catalyz' in x.text.lower() or 'catalys' in x.text.lower() else ABSTAIN
#23
@labeling_function()
def lf_product(x):
    return 8 if "produc" in x.text.lower() else ABSTAIN
#24
@labeling_function()
def lf_convert(x):
    return 8 if "conver" in x.text.lower() else ABSTAIN
#25
## NOT
@labeling_function()
def lf_not(x):
    return 9 if 'not' in x.text.lower() else ABSTAIN

# 26 replace the 17 (18)
@labeling_function()
def lf_combined_modulator(x):
    sentence_lower = x.text.lower()

    specific_terms = ['allosteric modulator', 'positive modulator', 'negative modulator', 'non-competitive modulator', 'positive allosteric modulator']
    if any(term in sentence_lower for term in specific_terms):
        return 6

    modulating_terms = ['modulat', 'allosteric', 'potentiate']
    for term in modulating_terms:
        if term in sentence_lower:
            term_index = sentence_lower.index(term)
            if x.entity1_index == -1 or x.entity2_index == -1:
                return ABSTAIN
            if abs(term_index - x.entity1_index) < 20 or abs(term_index - x.entity2_index) < 20:
                return 6
    # check the first indcidences of modulator and positive
    if x.entity1 in sentence_lower and x.entity2 in sentence_lower:
        entity1_index = sentence_lower.index(x.entity1.lower())
        entity2_index = sentence_lower.index(x.entity2.lower())
        between_entities = sentence_lower[min(entity1_index, entity2_index):max(entity1_index, entity2_index)]
        if 'modulate' in between_entities:
            return 6

    if 'positive' in sentence_lower and 'modulator' in sentence_lower:
        pos_indices = [i for i, word in enumerate(sentence_lower.split()) if word == 'positive']
        mod_indices = [i for i, word in enumerate(sentence_lower.split()) if 'modulator' in word]
        if len(pos_indices) == 0 or len(mod_indices) == 0:
            return ABSTAIN
        min_distance = min(abs(p - m) for p in pos_indices for m in mod_indices)
        if min_distance <= 3:
            return 6

    return ABSTAIN

lfs = [lf_amino_acid, lf_replace, lf_mutant, lf_bind, lf_interact, lf_affinity, lf_activate, lf_increase, lf_induce, lf_stimulate, lf_upregulate, lf_downregulate, lf_reduce, lf_inhibit, lf_decrease, lf_agonist, lf_antagonist, lf_combined_modulator, lf_allosteric, lf_cofactor, lf_substrate, lf_transport, lf_catalyze, lf_product, lf_convert, lf_not]