## Import libraries

In [1]:
import itertools
import os
import pandas as pd
import re
from pandas.core.frame import DataFrame
import periodictable

In [48]:
def search(query:str, max_results:int):
    """
    Search for results in Pubmed with a given query
    """
    Entrez.email = 'your.email@example.com'
    try:
        handle = Entrez.esearch(db='pubmed', 
                                sort='relevance', 
                                retmax=max_results,
                                retmode='xml', 
                                term=query
                                )
        results = Entrez.read(handle)
        return results
    except:
        return None

def fetch_details(id_list:list):
    """
    Get details from a given list of id papers
    """
    try:
        ids = ','.join(id_list)
        Entrez.email = 'your.email@example.com'
        handle = Entrez.efetch(db='pubmed',
                               retmode='xml',
                               id=ids)
        results = Entrez.read(handle)
        return results
    except:
        return None

# Search with some typical keywords

In [None]:
# Get some typical keywords
keywords_food = ['nutrition', 'nutriment', 'nourishment', 'sustenance', 'aliment', 'alimentation', 'victuals', 'food', 'nutrient']
keywords_mood = ['psychology', 'mental health', 'feel', 'mood', 'sensation']
combine_keywords = [p for p in itertools.product(*[keywords_food, keywords_mood])]
combine_keywords

[('nutrition', 'psychology'),
 ('nutrition', 'mental health'),
 ('nutrition', 'feel'),
 ('nutrition', 'mood'),
 ('nutrition', 'sensation'),
 ('nutriment', 'psychology'),
 ('nutriment', 'mental health'),
 ('nutriment', 'feel'),
 ('nutriment', 'mood'),
 ('nutriment', 'sensation'),
 ('nourishment', 'psychology'),
 ('nourishment', 'mental health'),
 ('nourishment', 'feel'),
 ('nourishment', 'mood'),
 ('nourishment', 'sensation'),
 ('sustenance', 'psychology'),
 ('sustenance', 'mental health'),
 ('sustenance', 'feel'),
 ('sustenance', 'mood'),
 ('sustenance', 'sensation'),
 ('aliment', 'psychology'),
 ('aliment', 'mental health'),
 ('aliment', 'feel'),
 ('aliment', 'mood'),
 ('aliment', 'sensation'),
 ('alimentation', 'psychology'),
 ('alimentation', 'mental health'),
 ('alimentation', 'feel'),
 ('alimentation', 'mood'),
 ('alimentation', 'sensation'),
 ('victuals', 'psychology'),
 ('victuals', 'mental health'),
 ('victuals', 'feel'),
 ('victuals', 'mood'),
 ('victuals', 'sensation'),
 ('fo

In [None]:
#https://pubmed.ncbi.nlm.nih.gov/?term=mental+health+and+nutrition&filter=pubt.systematicreview
from Bio import Entrez
i = 1
for (food, mood) in combine_keywords:
    query = food + ' and ' + mood
    results = search(query, 3)
    id_list = results['IdList']
    papers = fetch_details(id_list)
    print('-----------\n\nQuery:', query)
    if papers is not None:
        for i, paper in enumerate(papers['PubmedArticle']):
            print("\n{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))
    #        if (paper['MedlineCitation']['Article'].get('Abstract') is not None):
    #            print("----\n{}".format(paper['MedlineCitation']['Article'].get('Abstract').get('AbstractText')))
                #print("----\n{}".format(paper['MedlineCitation']['Article']))

-----------

Query: nutrition and psychology

1) Nutritional status, physical activity and associated nutrition knowledge of primary school learners.

2) Social networks and their influences on nutrient intake, nutritional status and physical function in community-dwelling ethnically diverse older adults: a mixed-methods longitudinal study.

3) A poorer nutritional status impacts quality of life in a sample population of elderly cancer patients.
-----------

Query: nutrition and mental health

1) Correlates associated with mental health and nutritional status in Lebanese older adults: A cross-sectional study.

2) Mental health symptoms and their relations with dietary diversity and nutritional status among mothers of young children in eastern Democratic Republic of the Congo.

3) Community canteen services for the rural elderly: determining impacts on general mental health, nutritional status, satisfaction with life, and social capital.
-----------

Query: nutrition and feel

1) Remain

-----------

Query: aliment and feel

1) Systematic review: psychosocial factors associated with pain in inflammatory bowel disease.

2) Social Representation of "Loud Music" in Young Adults: A Cross-Cultural Study.

3) Effects of disturbed sleep on gastrointestinal and somatic pain symptoms in irritable bowel syndrome.
-----------

Query: aliment and mood

1) Review article: bugs, inflammation and mood-a microbiota-based approach to psychiatric symptoms in inflammatory bowel diseases.

2) Letter: the relationship between diet, mood and mucosal healing in coeliac disease remains to be verified-authors' reply.

3) Letter: the relationship between diet, mood and mucosal healing in coeliac disease remains to be verified.
-----------

Query: aliment and sensation

1) Physicochemical and sensory analysis of craft beer made with soursop (Annona muricata L.).

2) An evaluation of the mechanical and sensory characteristics of selected foodstuffs for senior citizens.

3) Improvement of viabilit

#### ✎ Only some keywords that render the revelant results

#### Try the most effective keywords

In [None]:
effective_kw = ['nutrition and mental health', 'nutrition and feel', 'nutrition and mood',
               'food and psychology', 'food and mental health', 'food and mood',
               'nutrient and psychology', 'nutrient and mental health']

from Bio import Entrez

for kw in effective_kw:
    results = search(kw, 3)
    id_list = results['IdList']
    papers = fetch_details(id_list)
    print('-----------\n\nQuery:', kw)
    if papers is not None:
        for i, paper in enumerate(papers['PubmedArticle']):
            print("\n{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))
            if (paper['MedlineCitation']['Article'].get('Abstract') is not None):
                print("----\n{}".format(paper['MedlineCitation']['Article'].get('Abstract').get('AbstractText')))


-----------

Query: nutrition and mental health

1) Correlates associated with mental health and nutritional status in Lebanese older adults: A cross-sectional study.
----
[StringElement('To assess correlates of mental and nutritional health among elderly in Lebanon, inside nursing homes compared to their private homes.', attributes={'Label': 'OBJECTIVE'}), StringElement('This cross-sectional study was conducted between June and August 2016 on 500 elderly.', attributes={'Label': 'METHODS'}), StringElement('Higher somatic (Beta\u2009=\u20090.259) and cognitive anxiety (Beta\u2009=\u20090.508), increased age (Beta\u2009=\u20090.174) were significantly associated with higher depression, whereas having a secondary (Beta\u2009=\u2009-4.006) and a university (Beta\u2009=\u2009-6.829) levels of education compared to illiteracy, living home (Beta\u2009=\u2009-2.557) compared to living in a nursing home and male gender (Beta\u2009=\u2009-1.280) were significantly associated with lower depressio

-----------

Query: nutrition and feel

1) Remaining Socially Connected at 100 and Beyond Reduces Impact of Loneliness on Nutritional Status.
----
[StringElement("Understanding factors influencing centenarians' nutritional status can offer insight into effective nutrition interventions to improve quality of life among this population.", attributes={'Label': 'BACKGROUND', 'NlmCategory': 'UNASSIGNED'}), StringElement('This cross-sectional study was conducted to evaluate the role of social support and loneliness on nutritional status among Oklahoma centenarians (<i>N</i>\u2009=\u2009151).', attributes={'Label': 'OBJECTIVE', 'NlmCategory': 'UNASSIGNED'}), StringElement('Nutritional status was assessed with the Short Form Mini Nutrition Assessment (MNA-SF). Perceived social support was assessed with the 24-item Social Provisions Scale. Loneliness was examined with the 10-item UCLA loneliness scale.', attributes={'Label': 'METHODS', 'NlmCategory': 'UNASSIGNED'}), StringElement('Ordinal logis

-----------

Query: food and psychology

1) COVID-19 disease and nutritional choices: How will the pandemic reconfigure our food psychology and habits? A case study of the Italian population.
----
[StringElement("In Italy, the spread of the novel coronavirus (SARS-Cov-2) required lifestyle changes that have affected food choices and people's health condition. We explore people's perception of the role of food consumption as a preventive measure and how it reconfigures consumption habits.", attributes={'Label': 'BACKGROUND AND AIMS'}), StringElement('We conducted an online survey of a representative sample of 1004 Italian citizens. Around 40% of the population perceive that strengthening the immune defences through nutrition is not important to reduce the risk of coronavirus disease 2019 (COVID-19) infection. People with lower levels of perceived importance are slightly younger and have a less healthy lifestyle. They are less worried about the emergency. During the last months, they hav

-----------

Query: food and mood

1) Food for mood: Experimentally induced negative affect triggers loss of control over eating in adolescents with low inhibitory control.
----
[StringElement('Loss of control over eating (LOC) is common among adolescents and is associated with negative developmental outcomes. Low self-regulation, and specifically low inhibitory control, is increasingly emphasized as an underlying factor in LOC. However, the specific context in which these capacities fail remains unclear. The affect regulation model proposes that negative affect may trigger LOC; however, research has mostly assessed trait negative affect using questionnaires, whereas measuring state negative affect is needed to determine its triggering role. Therefore, this study examined the interaction between inhibitory control and state negative affect in predicting LOC among adolescents using an experimental mood-induction design.', attributes={'Label': 'OBJECTIVE'}), StringElement('Participants w

-----------

Query: nutrient and mental health

1) Mental health problems in relation to eating behavior patterns, nutrient intakes and health related quality of life among Iranian female adolescents.
----
[StringElement('To identify the association between mental health problems, eating behavior patterns, nutrient intakes and health related quality of life (HRQoL) among Iranian female adolescents.', attributes={'Label': 'AIMS'}), StringElement('The current cross-sectional study conducted among three high-schools randomly selected from 10-day-public high schools in the selected sub-county from Tabriz city-Iran between December 2015 through March 2016. Participants were a sample of 107 adolescent girls aged 15-17 years old. Anthropometric parameters were measured and assessments of HRQoL, mental health problems and eating behavioral patterns were performed by Short Form 36 (SF-36), Strengths and Difficulties Questionnaires (SDQ) and Eating Behavioral Pattern Questionnaire (EBPQ) respect

#### ✎ The abstract of results is not really clear for relationship between Food and Mental Health.

-------------------------

# Search From Particular Entities

## Utiles to get Entities From Ontologies

In [12]:
def readCSV(fileName:str)->DataFrame:
    """
    Read file csv
    """
    filePath = './resource_csv/' + fileName
    return pd.read_csv(filePath)

In [14]:
def getEntities(df:DataFrame, classes=[])->list:
    """
    Get all entities in an ontology dataframe
    """
    n = len(classes)
    i = 0
    while i < n:
      p = classes[i]
      classes = classes + [subclass for subclass in df.loc[df['Parents'].str.contains(f'.*{p}.*'), :]['Class ID'].tolist() if subclass not in classes]
      i += 1
      n = len(classes)
    
    vocabularies = []
    sample = (df[df['Class ID'].isin(classes)] if len(classes) > 0 else df)
    for idx, row in sample.iterrows():
        vocabularies.append(row['Preferred Label'])
        if isinstance(row['Synonyms'], str):
            vocabularies += row['Synonyms'].split('|')
    
    return vocabularies

In [95]:
def toTerm(word:str)->str:
    """
    Convert an ontology term to a short term
    """
    for el in periodictable.elements:
        if word == el.symbol:
            return el.name # Fe ==> iron
    
    regexes = [#r'(.*)\s+\(.*', r'^\([\+\−].*\)-(.*)',
               r'([^\(\,]+)', # berry (whole) | Mullus barbatus Linnaeus, 1758
               #r'.*\w*\(\w+\)\w*', 
               #r'obsolete: (.*)',
               #r'([^.]+\s)',
               #r'^(\w+)\(\d*\-\)',
               #r'https://en.wikipedia.org/wiki/(.*)'
              ]

    for regex in regexes:
        if len(re.compile(regex).findall(word)) > 0:
            return re.compile(regex).findall(word)[0].strip().lower()
    return word

def isMolecular(word:str)->bool:
    """
    Check if the given word is molecular
    """
    regex_molecular = '.*\d+.*(\d+|\w{1}).*'
    return True if len(re.compile(regex_molecular).findall(word)) > 0 else False

def getCleanTerms(terms:list)->list:
    """
    Get only clean terms in a list of terms
    """
    new_terms = set(terms.copy())

    # Remove molecular terms
    deleted_terms = []
    for term in new_terms:
        if isMolecular(term) or len(re.compile(r"(.+\. )|(.\)\-.+)").findall(term)) > 0:
            deleted_terms.append(term)
    new_terms.difference_update(deleted_terms)

    # Convert to term
    deleted_terms = []
    new_terms = set(map(lambda x: toTerm(x), list(new_terms)))
    for term in new_terms:
        if len(term) >= 1 and len(term) <= 3: # too short => trash term
            deleted_terms.append(term)
    new_terms.difference_update(deleted_terms)
    return new_terms
    

In [74]:
#toTerm('Menippe frontalis A. Milne Edwards, 1879')
toTerm('Mullus barbatus Linnaeus, 1758')

'mullus barbatus linnaeus'

In [85]:
isMolecular('naringenin 7-o-alpha-l-rhamnosyl-(1->2)-beta-d-glucoside'), isMolecular('naringenin')

(True, False)

---------------------

## Food Entities

### Get the all sub-classes these classes:
* **FODMAP diet** : http://purl.obolibrary.org/obo/ONS_1000048
* **coeliac diet**: http://purl.obolibrary.org/obo/ONS_1000047
* **daily average nutritional intake**: http://purl.obolibrary.org/obo/ONS_1000060
* **daily food intake**: http://purl.obolibrary.org/obo/ONS_1000059
* **non-organic agriculture diet**: http://purl.obolibrary.org/obo/ONS_1000035
* **organic agriculture diet**: http://purl.obolibrary.org/obo/ONS_1000034
* **diet by nutritional composition**: http://purl.obolibrary.org/obo/ONS_1000005
* **diet by food organism**: http://purl.obolibrary.org/obo/ONS_1000019
* **liquid diet**: http://purl.obolibrary.org/obo/ONS_1000033
* **ethnographic diet**: http://purl.obolibrary.org/obo/ONS_1000051
* **weight loss diet**: http://purl.obolibrary.org/obo/ONS_1000049
* **dietary pattern by nutritional composition**: http://purl.obolibrary.org/obo/ONS_2000005
* **dietary pattern by food organism**: http://purl.obolibrary.org/obo/ONS_2000019
* **dietary regimen**: http://purl.obolibrary.org/obo/ONS_1000062
* **fruit**: http://purl.obolibrary.org/obo/PO_0009001
* **chemical food component**: http://purl.obolibrary.org/obo/FOODON_03411041
* **mixture**: http://purl.obolibrary.org/obo/CHEBI_60004
* **food product organismal source**: http://purl.obolibrary.org/obo/FOODON_03411564
* **egg or egg component**: http://purl.obolibrary.org/obo/FOODON_03420194
* **honey**: http://purl.obolibrary.org/obo/UBERON_0036016
* **root**: http://purl.obolibrary.org/obo/PO_0009005
* **potato (whole)**: http://purl.obolibrary.org/obo/FOODON_03315354
* **yarm (whole)**: http://purl.obolibrary.org/obo/FOODON_00003448
* **meal**: http://purl.obolibrary.org/obo/NCIT_C80248
* **pharmaceutical**: http://purl.obolibrary.org/obo/CHEBI_52217
* **vitamin**: http://purl.obolibrary.org/obo/CHEBI_33229

In [16]:
# Read FOODON ontology
foodon_df = readCSV('FOODON.csv')
#classes = ['http://purl.obolibrary.org/obo/OBI_0100026', 'http://purl.obolibrary.org/obo/FOODON_00001002', 'http://purl.obolibrary.org/obo/FOODON_03411041']
classes_food = ['http://purl.obolibrary.org/obo/ONS_1000048', 'http://purl.obolibrary.org/obo/ONS_1000047', 'http://purl.obolibrary.org/obo/ONS_1000060',
                'http://purl.obolibrary.org/obo/ONS_1000059', 'http://purl.obolibrary.org/obo/ONS_1000035', 'http://purl.obolibrary.org/obo/ONS_1000034', 
                'http://purl.obolibrary.org/obo/ONS_1000005', 'http://purl.obolibrary.org/obo/ONS_1000019', 'http://purl.obolibrary.org/obo/ONS_1000033', 
                'http://purl.obolibrary.org/obo/ONS_1000051', 'http://purl.obolibrary.org/obo/ONS_1000049', 'http://purl.obolibrary.org/obo/ONS_2000005',
                'http://purl.obolibrary.org/obo/ONS_2000019', 'http://purl.obolibrary.org/obo/ONS_1000062', 'http://purl.obolibrary.org/obo/PO_0009001',
                'http://purl.obolibrary.org/obo/FOODON_03411041', 'http://purl.obolibrary.org/obo/FOODON_03411564', 'http://purl.obolibrary.org/obo/FOODON_03420194', 
                'http://purl.obolibrary.org/obo/UBERON_0036016', 'http://purl.obolibrary.org/obo/PO_0009005', 'http://purl.obolibrary.org/obo/FOODON_03315354', 
                'http://purl.obolibrary.org/obo/FOODON_00003448','http://purl.obolibrary.org/obo/NCIT_C80248']

classes_chebi = ['http://purl.obolibrary.org/obo/CHEBI_52217', 'http://purl.obolibrary.org/obo/CHEBI_60004', 'http://purl.obolibrary.org/obo/CHEBI_33229']

foodon_df['Parents'] = foodon_df['Parents'].fillna('')
food_entities = getEntities(foodon_df, classes_food)
chebi_entities = getEntities(foodon_df, classes_chebi)

  foodon_df = readCSV('FOODON.csv')


In [96]:
food_terms = getCleanTerms(food_entities)
chebi_terms = getCleanTerms(chebi_entities)
print(f"There are {len(food_terms)} NUTRITION terms and {len(chebi_terms)} CHEBI terms in FOODON.")

There are 11395 NUTRITION terms and 198 CHEBI terms in FOODON.


### Get terms with English only

In [93]:
import langdetect

In [97]:
# Get only english terms in food terms
food_terms_en = []
for term in food_terms:
    try:
        if langdetect.detect(term) == 'en':
            food_terms_en.append(term)
    except:
        continue
print('There are', len(food_terms_en), 'food terms.')

There are 2921 food terms.


---------------

## CHEBI Entities

### Get the all sub-classes these classes:
* allergen : http://purl.obolibrary.org/obo/CHEBI_50904
* carcinogenic agent: http://purl.obolibrary.org/obo/CHEBI_50903
* cardiotoxic agent: http://purl.obolibrary.org/obo/CHEBI_50912
* hepatotoxic agent: http://purl.obolibrary.org/obo/CHEBI_50908
* nephrotoxic agent: http://purl.obolibrary.org/obo/CHEBI_50909
* neurotoxin: http://purl.obolibrary.org/obo/CHEBI_50910
* antigen: http://purl.obolibrary.org/obo/CHEBI_59132
* antimicrobial agent: http://purl.obolibrary.org/obo/CHEBI_33281
* pharmacological role: http://purl.obolibrary.org/obo/CHEBI_52210
* food additive:http://purl.obolibrary.org/obo/CHEBI_64047
* poison: http://purl.obolibrary.org/obo/CHEBI_64909
* provitamin: http://purl.obolibrary.org/obo/CHEBI_50188
* vitamin: http://purl.obolibrary.org/obo/CHEBI_33229

In [98]:
# Read CHEBI ontology
chebi_df = readCSV('CHEBI.csv')
classes = ['http://purl.obolibrary.org/obo/CHEBI_50904', 'http://purl.obolibrary.org/obo/CHEBI_50903', 'http://purl.obolibrary.org/obo/CHEBI_50912',
          'http://purl.obolibrary.org/obo/CHEBI_50908', 'http://purl.obolibrary.org/obo/CHEBI_50909', 'http://purl.obolibrary.org/obo/CHEBI_50910',
          'http://purl.obolibrary.org/obo/CHEBI_59132', 'http://purl.obolibrary.org/obo/CHEBI_33281', 'http://purl.obolibrary.org/obo/CHEBI_52210',
           'http://purl.obolibrary.org/obo/CHEBI_64047', 'http://purl.obolibrary.org/obo/CHEBI_64909', 'http://purl.obolibrary.org/obo/CHEBI_50188',
           'http://purl.obolibrary.org/obo/CHEBI_33229']

chebi_df['Parents'] = chebi_df['Parents'].fillna('')
chebi_entities_from_CHEBI = getEntities(chebi_df, classes)

  chebi_df = readCSV('CHEBI.csv')


In [99]:
# Remove molecular terms in food set
chebi_terms.update(getCleanTerms(chebi_entities_from_CHEBI))
print(f"There are total {len(chebi_terms)} CHEBI terms from FOODON and CHEBI.")

There are total 1480 CHEBI terms from FOODON and CHEBI.


-------------------------------------

## All Nutrition Terms

In [112]:
print(f'There are {len(food_terms_en)} nutrition terms and {len(chebi_terms)} Chebi terms.')

There are 2921 nutrition terms and 1480 Chebi terms.


--------------------------------

## Mental Health Entities

2 Ontologies:
* **MFODM**: Mental disease Ontology is developed to facilitate representation for all aspects of mental disease.

*Source*: https://www.ebi.ac.uk/ols/ontologies/mfomd

* **Disorders Clusters**: Ontology about Disorders

*Source*: https://bioportal.bioontology.org/ontologies/APADISORDERS/

* **ASDPTO**: Autism Spectrum Disorder Phenotype Ontology

*Source*: https://bioportal.bioontology.org/ontologies/ASDPTO

------------------------

### MFOMD Ontology

In [117]:
mfomd_df = readCSV('MFOMD.csv')
mfomd_df['Parents'] = mfomd_df['Parents'].fillna('')
classes = [_class for _class in mfomd_df['Parents'].tolist() if len(re.compile(r'.*(MFOMD|DOID).*').findall(_class)) > 0]
mfomd_entities = getEntities(mfomd_df, classes)
print('There are', len(mfomd_entities), 'entities in MFOMD ontology')

There are 561 entities in MFOMD ontology


In [118]:
mfomd_terms = set(mfomd_entities)
print('There are', len(mfomd_entities), 'terms in MFOMD ontology')

There are 561 terms in MFOMD ontology


-----------------------------

### APADISORDERS
* **PsychologicalDisorders**: http://www.semanticweb.org/ontologies/2015/0/disorderscluster.owl#PsychologicalDisorders
* **Agitation**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Agitation
* **Amnesia**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Amnesia
* **Anhedonia**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Anhedonia
* **Anxiety**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Anxiety
* **Chronic_Stress**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Chronic_Stress
* **Craving**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Craving
* **Depersonalization**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Depersonalization
* **Distractibility**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Distractibility
* **Dyskinesia**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Dyskinesia
* **Emotional_Disturbances**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Emotional_Disturbances
* **Fatigue**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Fatigue
* **Headache**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Headache
* **Insomnia**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Insomnia
* **Intrusive Thoughts**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Intrusive_Thoughts
* **Mental_Confusion**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Mental_Confusion
* **Purging_Eating_Disorder**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Purging_(Eating_Disorders)
* **Restlessness**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Restlessness
* **Sleepiness**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Sleepiness
* **Sommatization**: http://ontology.apa.org/apaonto/termsonlyOUT%20(5).owl#Somatization

In [119]:
disorders_df = readCSV('APADISORDERS.csv')
classes = ['http://www.semanticweb.org/ontologies/2015/0/disorderscluster.owl#PsychologicalDisorders']
disorders_df['Parents'] = disorders_df['Parents'].fillna('')
disorders_entities = getEntities(disorders_df, classes)
print('There are', len(disorders_entities), 'entities in APADISORDERS ontology')

  classes = classes + [subclass for subclass in df.loc[df['Parents'].str.contains(f'.*{p}.*'), :]['Class ID'].tolist() if subclass not in classes]


There are 162 entities in APADISORDERS ontology


In [120]:
another_terms = ['Agitation', 'Amnesia', 'Anhedonia', 'Anxiety', 'Chronic Stress', 'Craving', 'Depersonalization', 'Distractibility',
                 'Dyskinesia', 'Emotional Disturbances', 'Fatigue', 'Headache', 'Insomnia', 'Intrusive Thoughts', 'Mental Confusion',
                 'Purging', 'Eating_Disorder', 'Restlessness', 'Sleepiness', 'Sommatization']
disorders_entities += another_terms

In [121]:
disorders_terms = set(map(lambda x: x.replace('_', ' '), disorders_entities))
print('There are', len(disorders_entities), 'terms in APADISORDERS ontology')

There are 182 terms in APADISORDERS ontology


------------------------------------

### ASDPTO
* **Mental Disorders**: http://cbmi.med.harvard.edu/asdphenotype#Class_455

In [122]:
asdpto_df = readCSV('ASDPTO.csv')
classes = ['http://cbmi.med.harvard.edu/asdphenotype#Class_455']
asdpto_df['Parents'] = asdpto_df['Parents'].fillna('')
asdpto_entities = getEntities(asdpto_df, classes)
print('There are', len(asdpto_entities), 'entities in ASDPTO ontology')

There are 15 entities in ASDPTO ontology


In [123]:
asdpto_terms = set(asdpto_entities)
print('There are', len(asdpto_terms), 'terms in ASDPTO ontology')

There are 15 terms in ASDPTO ontology


----------------------------

### Combine 3 list of terms

In [130]:
mental_health_terms = getCleanTerms(list(mfomd_terms.union(disorders_terms).union(asdpto_terms)))
print('There are', len(mental_health_terms), 'mental health terms that be collected.')
mental_health_terms

There are 676 mental health terms that be collected.


{'abnormal sexual function',
 'abstinence syndrome',
 'academic skill disorder',
 'acquired dyslexia',
 'acrophobia',
 'acute psychosis',
 'acute schizophrenia',
 'acute stress disorder',
 'addiction disorder',
 'addiction to use of a sunbed',
 'adhd',
 'adjustment disease',
 'adjustment disorder',
 'adjustment disorders',
 'adjustment reaction',
 'advanced sleep phase syndrome',
 'affective disorders',
 'affective flattening',
 'affective personality disorder',
 'affective psychosis',
 'agitation',
 'agnosia',
 'agoraphobia',
 'agraphia',
 'aids dementia complex',
 'aids phobia',
 'akinesia',
 'akinetopsia',
 'alcohol abuse',
 'alcohol dependence',
 'alcohol-induced mental disorder',
 'alcohol-related birth defect',
 'alcohol-related neurodevelopmental disorder',
 'alcoholic hallucinosis',
 'alcoholic psychoses',
 'alcoholic psychosis',
 'alexia',
 'alexithymia',
 'alogia',
 'alzheimer disease',
 "alzheimer's disease",
 'alzheimers dementia',
 'amnesia',
 'amnestic disorder',
 'amnest

------------------

## All Nutrition terms and Mental health Terms

In [133]:
print('There are', len(food_terms_en), 'nutrition terms.')
print('There are', len(chebi_terms), 'chebi terms.')
print('There are', len(mental_health_terms), 'mental health terms.')

There are 2921 nutrition terms.
There are 1480 chebi terms.
There are 676 mental health terms.


## Write entities to files

In [134]:
with open('./entities/nutrition_entities.txt', 'w', encoding='utf-8') as f:
    for t in food_terms_en:
        f.write(t + '\n')

with open('./entities/chebi_entities.txt', 'w', encoding='utf-8') as f:
        for t in chebi_terms:
            f.write(t + '\n')
        
with open('./entities/mental_health_entities.txt', 'w', encoding='utf-8') as f:
        for t in mental_health_terms:
            f.write(t + '\n')

# Get Papers with Food terms

## Search with some food_terms

In [None]:
from Bio import Entrez
keywords = list(food_terms)[:10].copy()
i = 9
for kw in keywords:
    print('\n'+kw)
    results = search(kw + '[Title/Abstract] AND ' + 'mental health[Title/Abstract]', 2)
    if results is not None:
        id_list = results['IdList']
        papers = fetch_details(id_list)
        if papers is not None:
            for i, paper in enumerate(papers['PubmedArticle']):
                print("\n{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))
                if (paper['MedlineCitation']['Article'].get('Abstract') is not None):
                    print("----\n{}".format(paper['MedlineCitation']['Article'].get('Abstract').get('AbstractText')))



(R,S)-Aspartic acid

1) Lumateperone: New Drug or Same Old Drug With a New Dress?
----
['Lumateperone (Caplyta<sup>®</sup>) is a drug recently approved by the U.S. Food and Drug Administration for the treatment of schizophrenia. But is it a new drug with promise, or a similar drug with new wrappings? This drug, similar to other second- and third-generation serotonin dopamine antagonists, is a potent antagonist at higher serotonin 2A receptors as well as brief binding to dopamine 1 and dopamine 2 (D2) receptors, but also has partial agonism at presynaptic D2 and indirect modulation of N-Methyl-D-aspartic acid (NMDA) and alpha-amino-3-hydroxy-5-methyl-isoxazolepropionic acid (AMPA) of the glutamine receptors. The current article reviews the putative effects of this novel mechanism of action on symptoms of schizophrenia as discussed in Phase II and III trials. A case study applies the information to a clinical situation. [Journal of Psychosocial Nursing and Mental Health Services, 58(6),

--------------------------------

# Get papers with Food terms and Mental Health terms

## Create keywords

**Notes:** These entity files have been manually modified some keywords.

In [None]:
with open('nutrition_entities.txt', 'r', encoding='utf-8') as f:
    foods = f.readlines()
len(foods)

2358

In [None]:
with open('mental_health_entities.txt', 'r', encoding='utf-8') as f:
    mental_healths = f.readlines()
len(mental_healths)

194

In [None]:
combine_keywords = [p for p in itertools.product(*[list([f.replace('\n', '') for f in foods]), list([m.replace('\n', '') for m in mental_healths])])]
print('There are totally', len(combine_keywords), 'keywords.')
print('Some examples:')
print(combine_keywords[:20])

There are totally 457452 keywords.
Some examples:
[('tribonyx', 'picks disease'), ('tribonyx', 'acrophobia'), ('tribonyx', 'factitious disorders'), ('tribonyx', 'mixed receptive-expressive language disorder'), ('tribonyx', 'flying phobia'), ('tribonyx', 'motor-verbal tic disorder'), ('tribonyx', 'phencyclidine abuse'), ('tribonyx', 'atypical autism'), ('tribonyx', 'mathematics disorder'), ('tribonyx', 'organic brain syndromes'), ('tribonyx', 'dysfunctional family'), ('tribonyx', 'substance abuse'), ('tribonyx', 'sexual addiction'), ('tribonyx', 'psychogenic tics'), ('tribonyx', 'claustrophobia'), ('tribonyx', 'stereotypic movement disorder'), ('tribonyx', 'symbiotic infantile psychosis'), ('tribonyx', 'separation anxiety'), ('tribonyx', 'autistic disorder of childhood onset'), ('tribonyx', 'intrusive thoughts')]


## Get Papers

In [None]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.79-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 8.7 MB/s 
Installing collected packages: biopython
Successfully installed biopython-1.79


In [None]:
# Split keywords into 20 groups
n = len(combine_keywords)//20
groups_keywords = [combine_keywords[x:x+n] for x in range(0, len(combine_keywords), n)]

In [None]:
len(groups_keywords[0])

22872

In [9]:
from Bio import Entrez
count = 19
abstracts = []
for (food, mood) in groups_keywords[count]:
    print('\n', food, '---', mood)
    results = search(food + '[Title/Abstract] AND ' + mood + '[Title/Abstract]', max_results=5)
    if results is not None:
        id_list = results['IdList']
        papers = fetch_details(id_list)
        if papers is not None:
            for i, paper in enumerate(papers['PubmedArticle']):
                print("\n{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))
                if (paper['MedlineCitation']['Article'].get('Abstract') is not None):
                    abstracts.append((paper['MedlineCitation']['Article']['ArticleTitle'], paper['MedlineCitation']['Article'].get('Abstract')['AbstractText'][0]))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

 type 2 cannabinoid receptor antagonists --- ophidiophobia

 type 2 cannabinoid receptor antagonists --- speech anxiety

 type 2 cannabinoid receptor antagonists --- process schizophrenia

 type 2 cannabinoid receptor antagonists --- alcoholic psychoses

 type 2 cannabinoid receptor antagonists --- inhibited sexual desire

 type 2 cannabinoid receptor antagonists --- schizophrenia and disorders with psychotic features

 type 2 cannabinoid receptor antagonists --- pathological gambling

 type 2 cannabinoid receptor antagonists --- sadomasochistic personality

 type 2 cannabinoid receptor antagonists --- cardiovascular malfunction arising from mental factors

 type 2 cannabinoid receptor antagonists --- borderline personality disorder

 type 2 cannabinoid receptor antagonists --- hypnagogic hallucinations

 type 2 cannabinoid receptor antagonists --- hallucinations

 type 2 cannabinoid receptor antagonists --- shared psych

In [10]:
len(abstracts)

3659

In [11]:
abstracts_df = pd.DataFrame(abstracts)
abstracts_df.columns = ['Title', 'Abstract']
abstracts_df.tail(5)

Unnamed: 0,Title,Abstract
3654,Pharmacotherapy for tics in adult patients wit...,Tourette syndrome (TS) and persistent motor/vo...
3655,Effects of age and surface instability on the ...,"During standing, posture can be controlled by ..."
3656,Expression and characterization of a family 45...,"To efficiently decompose biomass, fungi have d..."
3657,Food proteins in human breast milk and probabi...,Previous reports suggested that food proteins ...
3658,The proteome reveals the involvement of serine...,The self-incompatibility recognition mechanism...


In [12]:
from google.colab import files
import time
abstracts_df.to_csv(f'papers_{count}.csv', index=False)
time.sleep(5)
files.download(f'papers_{count}.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

-------------------------------------

## Filter and clean papers

In [2]:
# Get all papers and drop duplicates
old_papers = pd.concat([pd.read_csv("./papers_raw/final_papers.csv"), pd.read_csv("./papers_raw/final_papers_2.csv")]).reset_index(drop=True)
add_papers = []

# 1. Read all papers csv
for i in range(13,17):
    add_papers.append(pd.read_csv(f"./papers_raw/papers_{i}.csv"))

papers = pd.concat(add_papers).reset_index(drop=True)

# Drop duplicates and null values
papers = papers.drop_duplicates(keep='first')
papers = papers[papers['Title'].notna() & papers['Abstract'].notna()]
papers.drop(papers[papers['PMID'].isin(old_papers['PMID'].tolist())].index, inplace = True)
papers = papers.reset_index(drop=True)
papers.head(5)

Unnamed: 0,PMID,Title,Abstract
0,28470822,Pericyte-derived bone morphogenetic protein 4 ...,Subcortical small vessel disease (SVD) is char...
1,31792039,The risk of malnutrition in children with auti...,A 9-year-old boy presented with a 2-day histor...
2,31019473,"Neurological, Psychiatric, and Biochemical Asp...",Thiamine (vitamin B1) is an essential nutrient...
3,21453474,Prenatal exposure of a girl with autism spectr...,Autism is a complex neurodevelopmental disorde...
4,34990378,[Clinical characteristics and treatment of tre...,Depression represents the predominant mood pol...


In [3]:
# Check null values
papers.isnull().sum()

PMID        0
Title       0
Abstract    0
dtype: int64

In [4]:
len(papers)

8050

In [5]:
# remove html tag
def removeHTML(content:str)->str:
    """
    Remove the HTML tags in a given content
    """
    import re
    html_regex = r"\<[^>]*\>"
    return re.sub(html_regex, '', content)

In [6]:
# remove HTML tags for title and Abstract
papers['Abstract'] = papers['Abstract'].map(lambda t: removeHTML(t))
papers.head(5)

Unnamed: 0,PMID,Title,Abstract
0,28470822,Pericyte-derived bone morphogenetic protein 4 ...,Subcortical small vessel disease (SVD) is char...
1,31792039,The risk of malnutrition in children with auti...,A 9-year-old boy presented with a 2-day histor...
2,31019473,"Neurological, Psychiatric, and Biochemical Asp...",Thiamine (vitamin B1) is an essential nutrient...
3,21453474,Prenatal exposure of a girl with autism spectr...,Autism is a complex neurodevelopmental disorde...
4,34990378,[Clinical characteristics and treatment of tre...,Depression represents the predominant mood pol...


In [7]:
# how many papers finally?
papers.to_csv("final_papers_3.csv", index=False)
len(papers)

8050

# Conclusion

### What have been tried to get papers ?


|Methods     | Examples| Results | 
|:----       | :----   | :----   |
| General Keywords      | Food and Nutrition       | Abstracts didn't explain the particular relationship between food and mental health       |
|Specific food and 'Mental Health'|mixed veggies and Mental Health| There are few useful abstracts can be extracted by this methods, but not much.
|Specific food and Specific Mental Health|(R,S)-Aspartic acid and attention deficit hyperactivity disorder | Many abstracts are useful