# MeSH terms search

In [1]:
import requests

In [2]:
def perform_mesh_query(label, match: str="contains", year: str="2023", limit: int=10):
    base_url = "https://id.nlm.nih.gov/mesh/lookup/descriptor"
    params = {
        "label": label,
        "match": match,
        "year": year,
        "limit": limit
    }
    headers = {
        "accept": "application/json"
    }

    try:
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error occurred: {e}")
        return None

In [3]:
label = "gaba"
match = "contains"
year = "2023"
limit = 20

result = perform_mesh_query(label, match, year, limit)
if result:
    for r in result:
        print(r)

{'resource': 'http://id.nlm.nih.gov/mesh/2023/D018682', 'label': 'GABA Agents'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D018755', 'label': 'GABA Agonists'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D018756', 'label': 'GABA Antagonists'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D018757', 'label': 'GABA Modulators'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D050485', 'label': 'GABA Plasma Membrane Transport Proteins'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D058805', 'label': 'GABA Uptake Inhibitors'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D058785', 'label': 'GABA-A Receptor Agonists'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D058787', 'label': 'GABA-A Receptor Antagonists'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D058786', 'label': 'GABA-B Receptor Agonists'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D058788', 'label': 'GABA-B Receptor Antagonists'}
{'resource': 'http://id.nlm.nih.gov/mesh/2023/D059330', 'label': 'GABAergic Neurons'}
{'reso

# Load text from pdf

In [4]:
from langchain.document_loaders import PyPDFLoader

In [None]:
file_path = "/media/luiz/storage2/taufferconsulting/client_catalystneuro/project_llms/papers/science.1125572.pdf"
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
del pages[0]
full_text = " ".join([p.page_content for p in pages])
split_text = "Conjunctive Representation of"
full_text = split_text + full_text.split(split_text)[1]
full_text

In [None]:
file_path = "/media/luiz/storage2/taufferconsulting/client_catalystneuro/project_llms/papers/sargolini_som.pdf"
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
del pages[0]
supporting_material = " ".join([p.page_content for p in pages])
supporting_material

# Extract terms from text

In [6]:
import openai
from langchain.chat_models import ChatOpenAI
from langchain.chains import create_extraction_chain

In [30]:
def keywords_extraction(user_input: str, model: str = "gpt-3.5-turbo"):
    llm = ChatOpenAI(
        model=model,
        temperature=0
    )
    schema = {
        "properties": {
            "neuroscience_terms": {
                "type": "string",
                "description": """extensive list of neuroscience and biological scientific experiment terms present in the text, Medical Subject Headings, neuroscience ontology. 
These can include but are not limited to: 
species - Biological species taxonomies, 
approaches - Experimental approaches in neuroscience, such as electrophysiology, calcium imaging, etc.
measurement techniques - Measurement techniques, such as patch clamp, two-photon imaging, spike sorting, etc.
variables measured - Variables measured, such as membrane potential, spike rate, position, etc.
anatomy - Anatomical regions, such as hippocampus, cortex, etc.
disease - Disease models, such as Alzheimer's, Parkinson's, etc.
cell types - Cell types, such as pyramidal, interneuron, etc.
drugs - Drugs, such as ketamine, nitrous oxide, etc.""",
            },
            # "species": {
            #     "type": "string",
            #     "description": "Biological species taxonomies",
            # },
            # "approaches": {
            #     "type": "string",
            #     "description": "Experimental approaches in neuroscience, such as electrophysiology, calcium imaging, etc.",
            # },
            # "measurement_techniques": {
            #     "type": "string",
            #     "description": "Measurement techniques, such as patch clamp, two-photon imaging, spike sorting, etc.",
            # },
            # "variables_measured": {
            #     "type": "string",
            #     "description": "Variables measured, such as membrane potential, spike rate, position, etc.",
            # },
            # "anatomy": {
            #     "type": "string",
            #     "description": "Anatomical regions, such as hippocampus, cortex, etc.",
            # },
            # "disease": {
            #     "type": "string",
            #     "description": "Disease models, such as Alzheimer's, Parkinson's, etc.",
            # },
            # "cell_types": {
            #     "type": "string",
            #     "description": "Cell types, such as pyramidal, interneuron, etc.",
            # },
            # "drugs": {
            #     "type": "string",
            #     "description": "Drugs, such as ketamine, nitrous oxide, etc.",
            # }
        },
        "required": [],
    }
    chain = create_extraction_chain(schema, llm)
    return chain.run(user_input)

In [31]:
r = keywords_extraction(full_text, model="gpt-3.5-turbo-16k")
r

[{'neuroscience_terms': 'Conjunctive Representation, Position, Direction, Velocity, Entorhinal Cortex, Grid cells, Medial entorhinal cortex, Principal cell layer, Head-direction cells, Conjunctive grid /C2head-direction cells'}]

In [35]:
terms_list = [t for t in r[0]["neuroscience_terms"].split(", ")]
terms_dict = dict()
for t in terms_list:
    terms_dict[t] = perform_mesh_query(label=t)

for k, v in terms_dict.items():
    print(k)
    print(v)
    print()

Conjunctive Representation
[]

Position
[{'resource': 'http://id.nlm.nih.gov/mesh/2023/D065635', 'label': 'Benign Paroxysmal Positional Vertigo'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D055012', 'label': 'Chromosomal Position Effects'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D045584', 'label': 'Chromosome Positioning'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D056446', 'label': 'Knee-Chest Position'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D056888', 'label': 'Patient Positioning'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D056510', 'label': 'Position-Specific Scoring Matrices'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D016684', 'label': 'Prone Position'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D000077708', 'label': 'Sitting Position'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D000078783', 'label': 'Standing Position'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D016683', 'label': 'Supine Position'}]

Direction
[]

Velocity
[{'resour

In [38]:
r = keywords_extraction(supporting_material, model="gpt-3.5-turbo-16k")
r

[{'neuroscience_terms': 'Sargolini, rats, Long Evans rats, Plexiglass cages, light/dark schedule, food deprivation schedule, surgery, electrode preparation, microdrive, tetrodes, platinum-iridium wire, electrode impedances, dorsocaudal medial entorhinal cortex, transverse sinus, saggital plane, microdrive, skull, dental cement, stainless steel screws, recording procedures, behavioural testing, training, Axona recording station, AC-coupled unity-gain operational amplifiers, electrodes, signals, spikes, EEG signals, video camera, light-emitting diodes, enclosures, cue card, chocolate crumbs, trials, water, spike sorting, cluster-cutting software, clustering, projections, parameter space, autocorrelation, crosscorrelation functions, place fields, position estimates, firing rate, smoothing algorithm, spatial smoothing, spatial correlation, spatial information rate, autocorrelation of rate maps, grid structure, gridness, grid spacing, grid orientation, spatial periodicity, head-direction ce

In [39]:
terms_list = [t for t in r[0]["neuroscience_terms"].split(", ")]
terms_dict = dict()
for t in terms_list:
    terms_dict[t] = perform_mesh_query(label=t)

for k, v in terms_dict.items():
    print(k)
    print(v)
    print()

Sargolini
[]

rats
[{'resource': 'http://id.nlm.nih.gov/mesh/2023/D019577', 'label': 'Mole Rats'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D051381', 'label': 'Rats'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D011910', 'label': 'Rats, Brattleboro'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D011911', 'label': 'Rats, Gunn'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D054772', 'label': 'Rats, Hairless'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D011912', 'label': 'Rats, Inbred ACI'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D011913', 'label': 'Rats, Inbred BB'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D011914', 'label': 'Rats, Inbred BN'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D011915', 'label': 'Rats, Inbred BUF'}, {'resource': 'http://id.nlm.nih.gov/mesh/2023/D020303', 'label': 'Rats, Inbred Dahl'}]

Long Evans rats
[]

Plexiglass cages
[]

light/dark schedule
[]

food deprivation schedule
[]

surgery
[{'resource': 'http://id.nlm.nih.gov/mesh/2