In [1]:
from pymongo import MongoClient

# Connect to mongodb and create a new database
client = MongoClient("mongodb://localhost:27017/")
database = client["MedGraph"]

# Base pubmed url: https://pubmed.ncbi.nlm.nih.gov/ + paper_id

In [None]:
import requests

# Web scraping from bern2 online deployment
def query_plain(text, url="http://bern2.korea.ac.kr/plain"):
    return requests.post(url, json={'text': text}).json()

if __name__ == '__main__':
    text = "BACKGROUND\nLactoferricin (LFcin) is a strong cationic peptide released from the N-terminus of lactoferrin by gastric pepsin digestion. LFcin has some important properties, including high antimicrobial activity. To date, lactoferricins have been isolated and characterised from various animal species, but not from camel. The aim of this study was to characterise and express recombinant camel lactoferricin (LFcinC) in Pichia pastoris and investigate its antimicrobial activity.\n\n\nRESULTS\nAfter methanol induction, LFcinC was expressed and secreted into a culture broth medium and the results determined by concentrated supernatant culture medium showed high antimicrobial activity against the following microorganisms: Escherichia coli PTCC 1330 (ATCC 8739), Staphylococcus aureus PTCC 1112 (ATCC 6538), Pseudomonas aeruginosa PTCC 1074 (ATCC 9027), Bacillus subtilis PTCC 1023 (ATCC 6633), and Candida albicans PTCC 5027 (ATCC 10231). Thermal stability was clarified with antibacterial activity against Escherichia coli PTCC 1330 (ATCC 8739).\n\n\nCONCLUSION\nResults confirmed that camel lactoferricin had suitable antimicrobial activity and its production by Pichia pastoris can be used for recombinant production."
    print(query_plain(text))

In [None]:
# GLiNER custom build (https://github.com/urchade/GLiNER)

from gliner import GLiNER

model = GLiNER.from_pretrained("urchade/gliner_large_bio-v0.1")
labels = [
    "antibiotic", "clinical_event", "diagnostic_procedure", "drug", "disease_disorder", 
    "drug_dosage", "treatment_duration", "drug_strength", "drug_frequency", "severity", "lab_value",
    "sign_symptom", "gene", "species", "tissue", "treatment",
    "age", "sex", "height", "weight", "date", "protein", 
    "cell", "plant", "volume", "subject", "body_part", "anatomical_system", 
    "chemical_system", "enzyme", "biological_process", "molecular_function", "mutation", 
    "immunologic_marker", "organism", "compound", "virus", "bacteria", "parasite", "clinical_trial", 
    "surgical_procedure", "molecule", "protein_family", "environmental_factor",
    "diet", "symptom_complex", "study_design", "fungus", "cell_type", "cell_line", 
    "device", "side_effect", "condition_outcome", "clinical_guideline",
    "medical_abbreviation", "medical_term", "peptide", "activity"
]

text = """
Lactoferricin (LFcin) is a strong cationic peptide released from the N-terminus of lactoferrin by gastric pepsin digestion. LFcin has some important properties, including high antimicrobial activity. To date, lactoferricins have been isolated and characterised from various animal species, but not from camel. The aim of this study was to characterise and express recombinant camel lactoferricin (LFcinC) in Pichia pastoris and investigate its antimicrobial activity.After methanol induction, LFcinC was expressed and secreted into a culture broth medium and the results determined by concentrated supernatant culture medium showed high antimicrobial activity against the following microorganisms: Escherichia coli PTCC 1330 (ATCC 8739), Staphylococcus aureus PTCC 1112 (ATCC 6538), Pseudomonas aeruginosa PTCC 1074 (ATCC 9027), Bacillus subtilis PTCC 1023 (ATCC 6633), and Candida albicans PTCC 5027 (ATCC 10231). Thermal stability was clarified with antibacterial activity against Escherichia coli PTCC 1330 (ATCC 8739). Results confirmed that camel lactoferricin had suitable antimicrobial activity and its production by Pichia pastoris can be used for recombinant production.
"""

entities = model.predict_entities(text, labels)

# type(entity): dict
for entity in entities:
    print(entity["text"], "=>", entity["label"], " | score: ", entity["score"])

In [None]:
entities_dict = {}
for entity in entities:
    if (entity["text"], entity["label"]) not in entities_dict:
        entities_dict[(entity["text"], entity["label"])] = (1, float(entity["score"]))
    else:
        if entities_dict[(entity["text"], entity["label"])][1] < entity["score"]:
            entities_dict[(entity["text"], entity["label"])] = (entities_dict[(entity["text"], entity["label"])][0] + 1, entity["score"])
        else:
            entities_dict[(entity["text"], entity["label"])] = (entities_dict[(entity["text"], entity["label"])][0] + 1, entities_dict[(entity["text"], entity["label"])][1])

for key, value in entities_dict.items():
    print(key[0], "=>", key[1], " | max_score: ", value[1], " | count: ", value[0])
