In [98]:
pip install openai

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Collecting openai
  Downloading openai-1.56.0-py3-none-any.whl (389 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.8/389.8 KB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m[31m3.5 MB/s[0m eta [36m0:00:01[0m
Collecting jiter<1,>=0.4.0
  Downloading jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (343 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m343.6/343.6 KB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m[31m2.7 MB/s[0m eta [36m0:00:01[0m
Installing collected packages: jiter, openai
Successfully installed jiter-0.8.0 openai-1.56.0
Note: you may need to restart the kernel to use updated packages.


# Processamento do Texto

Obtenção e processamento dos textos que serão usados para a construção do grafo

In [10]:
import wikipedia
wikipedia.set_lang('en')

pages_search = [ "Anxiety", "Psychological_stress", "Depression_(mood)", "Bipolar_disorder", 
                "Post-traumatic_stress_disorder", "Eating_disorder", "Neurodevelopmental_disorder", "Mental_health" ]

pages_content = []

for page_title in pages_search:
    page = wikipedia.page(title=page_title, auto_suggest=False)
    pages_content.append(page.content.lower())

text = "\n\n".join(pages_content)
len(text)

375196

Divisão do texto em parte menores para realizar o processamento de entidades

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [12]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    separators=["\n\n","\n"]
)

chunks = text_splitter.split_text(text)
len(chunks)

1037

In [13]:
len(chunks[0])

397

In [14]:
chunks[2]

'\nthe emotion of anxiety can persist beyond the developmentally appropriate time-periods in response to specific events, and thus turning into one of the multiple anxiety disorders (e.g. generalized anxiety disorder, panic disorder). the difference between anxiety disorder (as mental disorder) and anxiety (as normal emotion), is that people with an anxiety disorder experience anxiety excessively or persistently during approximately 6 months, or even during shorter time-periods in children. anxiety disorders are among the most persistent mental problems and often last decades. anxiety can also be experienced within other mental disorders, e.g., obsessive-compulsive disorder, post-traumatic stress disorder.'

Carregando o modelo que será usado para NER (Named Entity Recognition)

In [9]:
from gliner import GLiNER
from tqdm import tqdm

model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

model.safetensors:  86%|########5 | 668M/781M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Definição dos labels que serão buscados no texto

In [21]:
labels = [ "symptom", "treatment", "disease", "feeling", "risk factor", "behavior" ]

In [39]:
chunks_entities = []
chunks_labels = []
entity_list = []
duplicates = set()

for text_chunk in tqdm(chunks):
    entities = model.predict_entities(text_chunk, labels)
    chunk_entities = set()
    chunk_labels = set()
    for entity in entities:
        chunk_entities.add((entity["text"],entity["label"]))
        chunk_labels.add(entity["label"])
        if entity["text"] in duplicates:
            continue
        duplicates.add(entity["text"])
        entity_list.append((entity["text"], "=>", entity["label"]))
    chunks_entities.append(list(chunk_entities))
    chunks_labels.append(list(chunk_labels))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1037/1037 [02:07<00:00,  8.13it/s]


In [23]:
tratamentos = []
doencas = []
sentimentos = []
sintomas = []
fatores = []
comportamentos = []

for entity in entity_list:
    subject, p, obj = entity

    if obj == "treatment":
        tratamentos.append(subject.lower())
    elif obj == "disease":
        doencas.append(subject.lower())
    elif obj == "feeling":
        sentimentos.append(subject.lower())
    elif obj == "symptom":
        sintomas.append(subject.lower())
    elif obj == "risk factor":
        fatores.append(subject.lower())
    elif obj == "behavior":
        comportamentos.append(subject.lower())

print(f'Tratamento: {len(tratamentos)}')
print(f'Doenças: {len(doencas)}')
print(f'Sentimentos: {len(sentimentos)}')
print(f'Sintomas: {len(sintomas)}')
print(f'Fatores de Risco: {len(fatores)}')
print(f'Comportamentos: {len(comportamentos)}')

Tratamento: 260
Doenças: 250
Sentimentos: 263
Sintomas: 233
Fatores de Risco: 33
Comportamentos: 296


In [38]:
entity_list[:10]

[('anxiety', '=>', 'feeling'),
 ('fear', '=>', 'feeling'),
 ('nervous behavior', '=>', 'behavior'),
 ('pacing back and forth', '=>', 'behavior'),
 ('somatic complaints', '=>', 'symptom'),
 ('rumination', '=>', 'behavior'),
 ('muscular tension', '=>', 'symptom'),
 ('restlessness', '=>', 'symptom'),
 ('fatigue', '=>', 'symptom'),
 ("inability to catch one's breath", '=>', 'symptom')]

In [46]:
def format_entities(entities):
    return "\n".join(e for e in entities)

def format_entities_with_labels(entities):
    return "\n".join(": ".join([e[0],e[1]]) for e in entities)

In [42]:
"\n".join(":".join([e[0],e[1]]) for e in chunks_entities[i])

'increase or decrease in food intake:behavior\nwithdrawal from situations:behavior\nfoot tapping:behavior\nchanges in sleeping patterns:behavior\nincreased motor tension:behavior\nnegative feelings:feeling\nanxiety:symptom\nchanges in habits:behavior'

In [145]:
i = 10
entity = format_entities_with_labels(chunks_entities[i])
text_relations = chunks[i]

print(entity)
print(text_relations)

increase or decrease in food intake: behavior
withdrawal from situations: behavior
foot tapping: behavior
changes in sleeping patterns: behavior
increased motor tension: behavior
negative feelings: feeling
anxiety: symptom
changes in habits: behavior

the behavioral effects of anxiety may include withdrawal from situations which have provoked anxiety or negative feelings in the past. other effects may include changes in sleeping patterns, changes in habits, increase or decrease in food intake, and increased motor tension (such as foot tapping).


# Relation Entities

disease related disease 
disease related symptom 
disease related behavior
disease related risk factor
disease related treatment
disease related feeling
behavior related feeling
behavior related symptom
behavior related risk factor
symptom related feeling



In [144]:
system_message = """Extract all relationships between the following entities ONLY based on the provided text using relationships by tag in entity.
Return a list of JSON objects, for example:

<Examples>
[{{ "subject": "anxiety", "relationship": "related", "object": "fear" }},
{{ "subject": "stress", "relationship: "related", "object": "depression" }}]
</Examples>

- ONLY return triples and nothing else. None of "subject", "relationship" and "object" can be empty.

Relationships:

disease_related_disease 
disease_related_symptom 
disease_related_behavior
disease_related_risk factor
disease_related_treatment
disease_related_feeling
behavior_related_feeling
behavior_related_symptom
behavior_related_risk_factor
symptom_related_feeling

Entities: \n\n{entities}
"""

user_message = """Context: {text}\n\nTriples:"""

In [146]:
print(system_message.format(entities=entity))

Extract all relationships between the following entities ONLY based on the provided text using relationships by tag in entity.
Return a list of JSON objects, for example:

<Examples>
[{ "subject": "anxiety", "relationship": "related", "object": "fear" },
{ "subject": "stress", "relationship: "related", "object": "depression" }]
</Examples>

- ONLY return triples and nothing else. None of "subject", "relationship" and "object" can be empty.

Relationships:

disease_related_disease 
disease_related_symptom 
disease_related_behavior
disease_related_risk factor
disease_related_treatment
disease_related_feeling
behavior_related_feeling
behavior_related_symptom
behavior_related_risk_factor
symptom_related_feeling

Entities: 

increase or decrease in food intake: behavior
withdrawal from situations: behavior
foot tapping: behavior
changes in sleeping patterns: behavior
increased motor tension: behavior
negative feelings: feeling
anxiety: symptom
changes in habits: behavior



In [147]:
from openai import OpenAI
import json
client = OpenAI(
    organization='',
    project='',
    api_key='',
)


completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": system_message.format(entities=entity)},
    {"role": "user", "content": user_message.format(text=text_relations)}
  ]
)

triples = json.loads(completion.choices[0].message.content)
triples

[{'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'withdrawal from situations'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'changes in sleeping patterns'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'changes in habits'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'increase or decrease in food intake'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'increased motor tension'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_feeling',
  'object': 'negative feelings'}]

In [55]:
import time

errors = []
all_triples = []

for i in tqdm(range(len(chunks_entities))):
    try:
        
        entity = format_entities_with_labels(chunks_entities[i])
        text_relations = chunks[i]

        completion = client.chat.completions.create(
          model="gpt-3.5-turbo",
          messages=[
            {"role": "system", "content": system_message.format(entities=entity)},
            {"role": "user", "content": user_message.format(text=text_relations)}
          ]
        )
        
        triples = json.loads(completion.choices[0].message.content)

        all_triples.append(triples)
        time.sleep(3)
        
    except Exception as e:
        print(f'Error chunk {i}: {e}')
        errors.append(completion.choices[0].message.content)
        all_triples.append([])


  1%|█                                                                                                                                           | 8/1037 [00:36<1:01:47,  3.60s/it]

Error chunk 7: Expecting value: line 1 column 1 (char 0)


  2%|██▌                                                                                                                                        | 19/1037 [01:26<1:05:24,  3.86s/it]

Error chunk 18: Expecting value: line 1 column 1 (char 0)


  4%|██████                                                                                                                                     | 45/1037 [03:22<1:08:29,  4.14s/it]

Error chunk 44: Expecting value: line 1 column 1 (char 0)


  5%|██████▉                                                                                                                                      | 51/1037 [03:43<51:52,  3.16s/it]

Error chunk 50: Expecting value: line 1 column 1 (char 0)


  6%|████████▏                                                                                                                                    | 60/1037 [04:30<56:22,  3.46s/it]

Error chunk 59: Expecting value: line 1 column 1 (char 0)


  7%|█████████▌                                                                                                                                   | 70/1037 [05:09<52:34,  3.26s/it]

Error chunk 69: Expecting value: line 1 column 1 (char 0)


  7%|█████████▋                                                                                                                                   | 71/1037 [05:10<40:13,  2.50s/it]

Error chunk 70: Expecting value: line 1 column 1 (char 0)


  8%|███████████                                                                                                                                  | 81/1037 [05:50<53:15,  3.34s/it]

Error chunk 80: Expecting value: line 1 column 1 (char 0)


  8%|███████████▏                                                                                                                                 | 82/1037 [05:51<41:07,  2.58s/it]

Error chunk 81: Expecting value: line 1 column 1 (char 0)


  8%|███████████▌                                                                                                                               | 86/1037 [06:12<1:04:32,  4.07s/it]

Error chunk 85: Expecting value: line 1 column 1 (char 0)


  9%|████████████▉                                                                                                                                | 95/1037 [06:52<53:08,  3.39s/it]

Error chunk 94: Expecting value: line 1 column 1 (char 0)


  9%|█████████████▎                                                                                                                               | 98/1037 [07:04<50:05,  3.20s/it]

Error chunk 97: Expecting value: line 1 column 1 (char 0)


 10%|█████████████▌                                                                                                                              | 100/1037 [07:10<45:15,  2.90s/it]

Error chunk 99: Expecting value: line 1 column 1 (char 0)


 10%|██████████████▌                                                                                                                             | 108/1037 [07:41<48:20,  3.12s/it]

Error chunk 107: Expecting value: line 1 column 1 (char 0)


 11%|███████████████▍                                                                                                                            | 114/1037 [08:06<49:57,  3.25s/it]

Error chunk 113: Expecting value: line 1 column 1 (char 0)


 12%|████████████████▏                                                                                                                           | 120/1037 [08:29<47:08,  3.08s/it]

Error chunk 119: Expecting value: line 1 column 1 (char 0)


 12%|████████████████▌                                                                                                                           | 123/1037 [08:37<40:07,  2.63s/it]

Error chunk 122: Expecting value: line 1 column 1 (char 0)


 12%|█████████████████                                                                                                                           | 126/1037 [08:48<44:54,  2.96s/it]

Error chunk 125: Expecting value: line 1 column 1 (char 0)


 13%|█████████████████▋                                                                                                                          | 131/1037 [09:08<49:21,  3.27s/it]

Error chunk 130: Expecting value: line 1 column 1 (char 0)


 13%|██████████████████▊                                                                                                                         | 139/1037 [09:36<43:11,  2.89s/it]

Error chunk 138: Expecting value: line 1 column 1 (char 0)


 14%|███████████████████                                                                                                                         | 141/1037 [09:42<41:18,  2.77s/it]

Error chunk 140: Expecting value: line 1 column 1 (char 0)


 14%|███████████████████▊                                                                                                                        | 147/1037 [10:02<39:59,  2.70s/it]

Error chunk 146: Expecting value: line 1 column 1 (char 0)


 15%|████████████████████▌                                                                                                                       | 152/1037 [10:20<46:07,  3.13s/it]

Error chunk 151: Expecting value: line 1 column 1 (char 0)


 15%|████████████████████▊                                                                                                                     | 156/1037 [10:39<1:00:47,  4.14s/it]

Error chunk 155: Expecting value: line 1 column 1 (char 0)


 15%|█████████████████████▏                                                                                                                      | 157/1037 [10:40<45:12,  3.08s/it]

Error chunk 156: Expecting value: line 1 column 1 (char 0)


 15%|█████████████████████▍                                                                                                                      | 159/1037 [10:45<36:42,  2.51s/it]

Error chunk 158: Expecting value: line 1 column 1 (char 0)


 16%|██████████████████████▌                                                                                                                     | 167/1037 [11:22<56:30,  3.90s/it]

Error chunk 166: Expecting value: line 1 column 1 (char 0)


 17%|███████████████████████▎                                                                                                                    | 173/1037 [11:43<45:56,  3.19s/it]

Error chunk 172: Expecting value: line 1 column 1 (char 0)


 17%|███████████████████████▊                                                                                                                    | 176/1037 [11:53<42:38,  2.97s/it]

Error chunk 175: Expecting value: line 1 column 1 (char 0)


 18%|████████████████████████▌                                                                                                                   | 182/1037 [12:20<57:02,  4.00s/it]

Error chunk 181: Expecting value: line 1 column 1 (char 0)


 18%|█████████████████████████▊                                                                                                                  | 191/1037 [12:55<48:23,  3.43s/it]

Error chunk 190: Expecting value: line 1 column 1 (char 0)


 19%|██████████████████████████                                                                                                                  | 193/1037 [13:00<38:12,  2.72s/it]

Error chunk 192: Expecting value: line 1 column 1 (char 0)


 20%|████████████████████████████                                                                                                                | 208/1037 [13:59<43:28,  3.15s/it]

Error chunk 207: Expecting value: line 1 column 1 (char 0)


 21%|█████████████████████████████▌                                                                                                              | 219/1037 [14:42<42:55,  3.15s/it]

Error chunk 218: Expecting value: line 1 column 1 (char 0)


 21%|█████████████████████████████▋                                                                                                              | 220/1037 [14:43<34:11,  2.51s/it]

Error chunk 219: Expecting value: line 1 column 1 (char 0)


 22%|██████████████████████████████                                                                                                              | 223/1037 [14:52<34:23,  2.54s/it]

Error chunk 222: Expecting value: line 1 column 1 (char 0)


 22%|██████████████████████████████▋                                                                                                             | 227/1037 [15:06<37:23,  2.77s/it]

Error chunk 226: Expecting value: line 1 column 1 (char 0)


 22%|███████████████████████████████▍                                                                                                            | 233/1037 [15:30<48:19,  3.61s/it]

Error chunk 232: Expecting value: line 1 column 1 (char 0)


 23%|███████████████████████████████▌                                                                                                            | 234/1037 [15:31<37:29,  2.80s/it]

Error chunk 233: Expecting value: line 1 column 1 (char 0)


 23%|███████████████████████████████▋                                                                                                            | 235/1037 [15:32<31:08,  2.33s/it]

Error chunk 234: Expecting value: line 1 column 1 (char 0)


 25%|███████████████████████████████████▏                                                                                                        | 261/1037 [17:26<44:30,  3.44s/it]

Error chunk 260: Expecting value: line 1 column 1 (char 0)


 27%|█████████████████████████████████████▍                                                                                                      | 277/1037 [18:36<44:35,  3.52s/it]

Error chunk 276: Expecting value: line 1 column 1 (char 0)


 28%|██████████████████████████████████████▉                                                                                                     | 288/1037 [19:20<40:49,  3.27s/it]

Error chunk 287: Expecting value: line 1 column 1 (char 0)


 29%|███████████████████████████████████████▉                                                                                                    | 296/1037 [19:56<46:53,  3.80s/it]

Error chunk 295: Expecting value: line 1 column 1 (char 0)


 30%|██████████████████████████████████████████▌                                                                                                 | 315/1037 [21:20<40:42,  3.38s/it]

Error chunk 314: Expecting value: line 1 column 1 (char 0)


 31%|██████████████████████████████████████████▉                                                                                                 | 318/1037 [21:32<39:25,  3.29s/it]

Error chunk 317: Expecting value: line 1 column 1 (char 0)


 31%|███████████████████████████████████████████▌                                                                                                | 323/1037 [21:53<41:24,  3.48s/it]

Error chunk 322: Expecting value: line 1 column 1 (char 0)


 32%|████████████████████████████████████████████▏                                                                                               | 327/1037 [22:08<38:26,  3.25s/it]

Error chunk 326: Expecting value: line 1 column 1 (char 0)


 32%|█████████████████████████████████████████████▎                                                                                              | 336/1037 [22:53<44:42,  3.83s/it]

Error chunk 335: Expecting value: line 1 column 1 (char 0)


 33%|█████████████████████████████████████████████▉                                                                                              | 340/1037 [23:09<39:05,  3.37s/it]

Error chunk 339: Expecting value: line 1 column 1 (char 0)


 34%|██████████████████████████████████████████████▉                                                                                             | 348/1037 [23:45<40:20,  3.51s/it]

Error chunk 347: Expecting value: line 1 column 1 (char 0)


 34%|███████████████████████████████████████████████▊                                                                                            | 354/1037 [24:07<37:15,  3.27s/it]

Error chunk 353: Expecting value: line 1 column 1 (char 0)


 34%|████████████████████████████████████████████████                                                                                            | 356/1037 [24:14<35:04,  3.09s/it]

Error chunk 355: Expecting value: line 1 column 1 (char 0)


 35%|████████████████████████████████████████████████▊                                                                                           | 362/1037 [24:39<36:04,  3.21s/it]

Error chunk 361: Expecting value: line 1 column 1 (char 0)


 35%|█████████████████████████████████████████████████▏                                                                                          | 364/1037 [24:44<30:15,  2.70s/it]

Error chunk 363: Expecting value: line 1 column 1 (char 0)


 35%|█████████████████████████████████████████████████▍                                                                                          | 366/1037 [24:50<30:23,  2.72s/it]

Error chunk 365: Expecting value: line 1 column 1 (char 0)


 36%|██████████████████████████████████████████████████▎                                                                                         | 373/1037 [25:21<36:49,  3.33s/it]

Error chunk 372: Expecting value: line 1 column 1 (char 0)


 36%|███████████████████████████████████████████████████                                                                                         | 378/1037 [25:42<36:37,  3.33s/it]

Error chunk 377: Expecting value: line 1 column 1 (char 0)


 37%|███████████████████████████████████████████████████▎                                                                                        | 380/1037 [25:47<31:37,  2.89s/it]

Error chunk 379: Expecting value: line 1 column 1 (char 0)


 37%|███████████████████████████████████████████████████▊                                                                                        | 384/1037 [26:02<32:18,  2.97s/it]

Error chunk 383: Expecting value: line 1 column 1 (char 0)


 37%|████████████████████████████████████████████████████                                                                                        | 386/1037 [26:09<32:41,  3.01s/it]

Error chunk 385: Expecting value: line 1 column 1 (char 0)


 38%|████████████████████████████████████████████████████▋                                                                                       | 390/1037 [26:24<33:30,  3.11s/it]

Error chunk 389: Expecting value: line 1 column 1 (char 0)


 38%|█████████████████████████████████████████████████████▏                                                                                      | 394/1037 [26:44<40:33,  3.78s/it]

Error chunk 393: Expecting value: line 1 column 1 (char 0)


 39%|██████████████████████████████████████████████████████▏                                                                                     | 401/1037 [27:17<49:56,  4.71s/it]

Error chunk 400: Expecting value: line 1 column 1 (char 0)


 39%|██████████████████████████████████████████████████████▌                                                                                     | 404/1037 [27:30<42:16,  4.01s/it]

Error chunk 403: Expecting value: line 1 column 1 (char 0)


 39%|███████████████████████████████████████████████████████                                                                                     | 408/1037 [27:45<36:53,  3.52s/it]

Error chunk 407: Expecting value: line 1 column 1 (char 0)


 39%|███████████████████████████████████████████████████████▏                                                                                    | 409/1037 [27:46<27:42,  2.65s/it]

Error chunk 408: Expecting value: line 1 column 1 (char 0)


 40%|███████████████████████████████████████████████████████▊                                                                                    | 413/1037 [28:01<32:48,  3.15s/it]

Error chunk 412: Expecting value: line 1 column 1 (char 0)


 40%|████████████████████████████████████████████████████████▌                                                                                   | 419/1037 [28:23<31:14,  3.03s/it]

Error chunk 418: Expecting value: line 1 column 1 (char 0)


 41%|████████████████████████████████████████████████████████▊                                                                                   | 421/1037 [28:31<32:21,  3.15s/it]

Error chunk 420: Expecting value: line 1 column 1 (char 0)


 41%|█████████████████████████████████████████████████████████                                                                                   | 423/1037 [28:38<33:13,  3.25s/it]

Error chunk 422: Expecting value: line 1 column 1 (char 0)


 42%|██████████████████████████████████████████████████████████▎                                                                                 | 432/1037 [29:32<43:51,  4.35s/it]

Error chunk 431: Expecting value: line 1 column 1 (char 0)


 42%|██████████████████████████████████████████████████████████▌                                                                                 | 434/1037 [29:37<33:07,  3.30s/it]

Error chunk 433: Expecting value: line 1 column 1 (char 0)


 43%|████████████████████████████████████████████████████████████▏                                                                               | 446/1037 [30:33<40:12,  4.08s/it]

Error chunk 445: Expecting value: line 1 column 1 (char 0)


 43%|████████████████████████████████████████████████████████████▍                                                                               | 448/1037 [30:38<31:13,  3.18s/it]

Error chunk 447: Expecting value: line 1 column 1 (char 0)


 44%|██████████████████████████████████████████████████████████████                                                                              | 460/1037 [31:27<36:10,  3.76s/it]

Error chunk 459: Expecting value: line 1 column 1 (char 0)


 45%|███████████████████████████████████████████████████████████████▎                                                                            | 469/1037 [32:04<32:58,  3.48s/it]

Error chunk 468: Expecting value: line 1 column 1 (char 0)


 45%|███████████████████████████████████████████████████████████████▍                                                                            | 470/1037 [32:05<25:04,  2.65s/it]

Error chunk 469: Expecting value: line 1 column 1 (char 0)


 47%|█████████████████████████████████████████████████████████████████▌                                                                          | 486/1037 [33:18<33:45,  3.68s/it]

Error chunk 485: Expecting value: line 1 column 1 (char 0)


 47%|█████████████████████████████████████████████████████████████████▋                                                                          | 487/1037 [33:19<24:59,  2.73s/it]

Error chunk 486: Expecting value: line 1 column 1 (char 0)


 49%|████████████████████████████████████████████████████████████████████▎                                                                       | 506/1037 [34:33<26:29,  2.99s/it]

Error chunk 505: Expecting value: line 1 column 1 (char 0)


 49%|████████████████████████████████████████████████████████████████████▋                                                                       | 509/1037 [34:42<23:44,  2.70s/it]

Error chunk 508: Expecting value: line 1 column 1 (char 0)


 49%|████████████████████████████████████████████████████████████████████▊                                                                       | 510/1037 [34:43<18:29,  2.11s/it]

Error chunk 509: Expecting value: line 1 column 1 (char 0)


 50%|█████████████████████████████████████████████████████████████████████▍                                                                      | 514/1037 [34:56<23:08,  2.65s/it]

Error chunk 513: Expecting value: line 1 column 1 (char 0)


 50%|██████████████████████████████████████████████████████████████████████▏                                                                     | 520/1037 [35:18<26:21,  3.06s/it]

Error chunk 519: Expecting value: line 4 column 1 (char 216)


 52%|████████████████████████████████████████████████████████████████████████▎                                                                   | 536/1037 [36:23<27:25,  3.28s/it]

Error chunk 535: Expecting value: line 1 column 1 (char 0)


 53%|██████████████████████████████████████████████████████████████████████████▎                                                                 | 550/1037 [37:20<24:39,  3.04s/it]

Error chunk 549: Expecting value: line 1 column 1 (char 0)


 54%|██████████████████████████████████████████████████████████████████████████▉                                                                 | 555/1037 [37:38<27:25,  3.41s/it]

Error chunk 554: Expecting value: line 1 column 1 (char 0)


 54%|███████████████████████████████████████████████████████████████████████████▍                                                                | 559/1037 [37:53<25:57,  3.26s/it]

Error chunk 558: Expecting value: line 1 column 1 (char 0)


 54%|████████████████████████████████████████████████████████████████████████████                                                                | 563/1037 [38:06<22:42,  2.87s/it]

Error chunk 562: Expecting value: line 1 column 1 (char 0)


 55%|████████████████████████████████████████████████████████████████████████████▍                                                               | 566/1037 [38:15<21:36,  2.75s/it]

Error chunk 565: Expecting value: line 1 column 1 (char 0)


 55%|████████████████████████████████████████████████████████████████████████████▊                                                               | 569/1037 [38:24<20:03,  2.57s/it]

Error chunk 568: Expecting value: line 1 column 1 (char 0)


 55%|█████████████████████████████████████████████████████████████████████████████                                                               | 571/1037 [38:29<18:37,  2.40s/it]

Error chunk 570: Expecting value: line 1 column 1 (char 0)


 55%|█████████████████████████████████████████████████████████████████████████████▋                                                              | 575/1037 [38:41<19:14,  2.50s/it]

Error chunk 574: Expecting value: line 1 column 1 (char 0)


 57%|███████████████████████████████████████████████████████████████████████████████▍                                                            | 588/1037 [39:33<22:49,  3.05s/it]

Error chunk 587: Expecting value: line 1 column 1 (char 0)


 57%|███████████████████████████████████████████████████████████████████████████████▉                                                            | 592/1037 [39:49<24:40,  3.33s/it]

Error chunk 591: Expecting value: line 1 column 1 (char 0)


 58%|█████████████████████████████████████████████████████████████████████████████████                                                           | 600/1037 [40:20<23:12,  3.19s/it]

Error chunk 599: Expecting value: line 1 column 1 (char 0)


 58%|█████████████████████████████████████████████████████████████████████████████████▏                                                          | 601/1037 [40:21<17:46,  2.45s/it]

Error chunk 600: Expecting value: line 1 column 1 (char 0)


 58%|█████████████████████████████████████████████████████████████████████████████████▍                                                          | 603/1037 [40:26<16:29,  2.28s/it]

Error chunk 602: Expecting value: line 1 column 1 (char 0)


 58%|█████████████████████████████████████████████████████████████████████████████████▋                                                          | 605/1037 [40:30<15:36,  2.17s/it]

Error chunk 604: Expecting value: line 1 column 1 (char 0)


 59%|█████████████████████████████████████████████████████████████████████████████████▉                                                          | 607/1037 [40:35<15:32,  2.17s/it]

Error chunk 606: Expecting value: line 1 column 1 (char 0)


 60%|████████████████████████████████████████████████████████████████████████████████████▏                                                       | 624/1037 [41:46<23:54,  3.47s/it]

Error chunk 623: Expecting value: line 1 column 1 (char 0)


 61%|█████████████████████████████████████████████████████████████████████████████████████▍                                                      | 633/1037 [42:22<23:05,  3.43s/it]

Error chunk 632: Expecting value: line 1 column 1 (char 0)


 62%|██████████████████████████████████████████████████████████████████████████████████████▏                                                     | 638/1037 [42:40<19:36,  2.95s/it]

Error chunk 637: Expecting value: line 1 column 1 (char 0)


 63%|███████████████████████████████████████████████████████████████████████████████████████▊                                                    | 650/1037 [43:39<30:13,  4.69s/it]

Error chunk 649: Expecting value: line 1 column 1 (char 0)


 63%|████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 654/1037 [43:56<25:28,  3.99s/it]

Error chunk 653: Expecting value: line 1 column 1 (char 0)


 65%|██████████████████████████████████████████████████████████████████████████████████████████▉                                                 | 674/1037 [45:21<20:44,  3.43s/it]

Error chunk 673: Expecting value: line 1 column 1 (char 0)


 66%|████████████████████████████████████████████████████████████████████████████████████████████▏                                               | 683/1037 [46:00<20:21,  3.45s/it]

Error chunk 682: Expecting value: line 1 column 1 (char 0)


 66%|████████████████████████████████████████████████████████████████████████████████████████████▋                                               | 687/1037 [46:16<18:48,  3.22s/it]

Error chunk 686: Expecting value: line 1 column 1 (char 0)


 67%|█████████████████████████████████████████████████████████████████████████████████████████████▍                                              | 692/1037 [46:35<18:31,  3.22s/it]

Error chunk 691: Expecting value: line 1 column 1 (char 0)


 67%|█████████████████████████████████████████████████████████████████████████████████████████████▉                                              | 696/1037 [46:51<19:15,  3.39s/it]

Error chunk 695: Expecting value: line 1 column 1 (char 0)


 68%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                             | 701/1037 [47:10<19:38,  3.51s/it]

Error chunk 700: Expecting value: line 1 column 1 (char 0)


 68%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                             | 703/1037 [47:15<15:34,  2.80s/it]

Error chunk 702: Expecting value: line 1 column 1 (char 0)


 68%|███████████████████████████████████████████████████████████████████████████████████████████████▌                                            | 708/1037 [47:35<18:15,  3.33s/it]

Error chunk 707: Expecting value: line 1 column 1 (char 0)


 69%|████████████████████████████████████████████████████████████████████████████████████████████████▌                                           | 715/1037 [48:01<16:14,  3.03s/it]

Error chunk 714: Expecting value: line 1 column 1 (char 0)


 71%|███████████████████████████████████████████████████████████████████████████████████████████████████                                         | 734/1037 [49:22<18:29,  3.66s/it]

Error chunk 733: Expecting value: line 1 column 1 (char 0)


 71%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                                        | 735/1037 [49:23<13:40,  2.72s/it]

Error chunk 734: Expecting value: line 1 column 1 (char 0)


 71%|███████████████████████████████████████████████████████████████████████████████████████████████████▍                                        | 737/1037 [49:27<11:26,  2.29s/it]

Error chunk 736: Expecting value: line 1 column 1 (char 0)


 71%|███████████████████████████████████████████████████████████████████████████████████████████████████▋                                        | 738/1037 [49:29<10:25,  2.09s/it]

Error chunk 737: Expecting value: line 1 column 1 (char 0)


 71%|███████████████████████████████████████████████████████████████████████████████████████████████████▊                                        | 739/1037 [49:29<08:11,  1.65s/it]

Error chunk 738: Expecting value: line 1 column 1 (char 0)


 72%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎                                      | 750/1037 [50:11<14:20,  3.00s/it]

Error chunk 749: Expecting value: line 1 column 1 (char 0)


 73%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋                                      | 753/1037 [50:20<13:03,  2.76s/it]

Error chunk 752: Expecting value: line 1 column 1 (char 0)


 73%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 762/1037 [50:57<16:04,  3.51s/it]

Error chunk 761: Expecting value: line 1 column 1 (char 0)


 75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 780/1037 [52:16<13:49,  3.23s/it]

Error chunk 779: Expecting value: line 1 column 1 (char 0)


 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 834/1037 [56:13<11:46,  3.48s/it]

Error chunk 833: Expecting value: line 1 column 1 (char 0)


 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 864/1037 [58:20<09:38,  3.34s/it]

Error chunk 863: Expecting value: line 1 column 1 (char 0)


 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                       | 867/1037 [58:28<07:38,  2.70s/it]

Error chunk 866: Expecting value: line 1 column 1 (char 0)


 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 881/1037 [59:29<08:28,  3.26s/it]

Error chunk 880: Expecting value: line 1 column 1 (char 0)


 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                    | 883/1037 [59:34<07:07,  2.77s/it]

Error chunk 882: Expecting value: line 1 column 1 (char 0)


 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 887/1037 [59:48<07:19,  2.93s/it]

Error chunk 886: Expecting value: line 1 column 1 (char 0)


 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 888/1037 [59:52<08:36,  3.46s/it]

Error chunk 887: Expecting value: line 16 column 1 (char 1476)


 87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 902/1037 [1:00:50<07:30,  3.33s/it]

Error chunk 901: Expecting value: line 1 column 1 (char 0)


 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                 | 908/1037 [1:01:12<06:37,  3.08s/it]

Error chunk 907: Expecting value: line 1 column 1 (char 0)


 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍              | 928/1037 [1:02:41<07:00,  3.85s/it]

Error chunk 927: Expecting value: line 1 column 1 (char 0)


 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 930/1037 [1:02:46<05:17,  2.97s/it]

Error chunk 929: Expecting value: line 1 column 1 (char 0)


 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 934/1037 [1:03:01<05:31,  3.21s/it]

Error chunk 933: Expecting value: line 1 column 1 (char 0)


 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 938/1037 [1:03:19<05:58,  3.62s/it]

Error chunk 937: Expecting value: line 1 column 1 (char 0)


 91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 941/1037 [1:03:29<05:03,  3.16s/it]

Error chunk 940: Expecting value: line 1 column 1 (char 0)


 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 954/1037 [1:04:26<04:28,  3.23s/it]

Error chunk 953: Expecting value: line 1 column 1 (char 0)


 93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 965/1037 [1:05:15<03:59,  3.32s/it]

Error chunk 964: Expecting value: line 1 column 1 (char 0)


 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 975/1037 [1:05:56<03:24,  3.29s/it]

Error chunk 974: Expecting value: line 1 column 1 (char 0)


 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 987/1037 [1:06:47<02:41,  3.23s/it]

Error chunk 986: Expecting value: line 1 column 1 (char 0)


 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌      | 989/1037 [1:06:52<02:15,  2.82s/it]

Error chunk 988: Expecting value: line 1 column 1 (char 0)


 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 990/1037 [1:06:53<01:40,  2.13s/it]

Error chunk 989: Expecting value: line 1 column 1 (char 0)


 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 992/1037 [1:06:58<01:38,  2.19s/it]

Error chunk 991: Expecting value: line 1 column 1 (char 0)


 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 1001/1037 [1:07:32<01:49,  3.03s/it]

Error chunk 1000: Expecting value: line 1 column 1 (char 0)


 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉    | 1006/1037 [1:07:52<01:41,  3.26s/it]

Error chunk 1005: Expecting value: line 1 column 1 (char 0)


 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 1008/1037 [1:07:57<01:18,  2.72s/it]

Error chunk 1007: Expecting value: line 1 column 1 (char 0)


 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 1012/1037 [1:08:09<01:06,  2.66s/it]

Error chunk 1011: Expecting value: line 1 column 1 (char 0)


 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 1014/1037 [1:08:15<00:56,  2.45s/it]

Error chunk 1013: Expecting value: line 1 column 1 (char 0)


 98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 1021/1037 [1:08:40<00:51,  3.22s/it]

Error chunk 1020: Expecting value: line 1 column 1 (char 0)


 99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 1030/1037 [1:09:16<00:24,  3.47s/it]

Error chunk 1029: Expecting value: line 1 column 1 (char 0)


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 1034/1037 [1:09:29<00:08,  2.85s/it]

Error chunk 1033: Expecting value: line 1 column 1 (char 0)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1037/1037 [1:09:40<00:00,  4.03s/it]


In [56]:
len(all_triples)

1037

In [57]:
output_json = "../data/triples.json"

json_data = json.dumps(all_triples,indent=4)

with open(output_json,"w") as file:
    file.write(json_data)

In [114]:
import json

input_json = "../data/triples.json"

with open(input_json,"r") as file:
    all_triples = json.load(file)

all_triples[0]

[{'subject': 'anxiety',
  'relationship': 'disease_related_feeling',
  'object': 'dread'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'nervous behavior'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'pacing back and forth'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_symptom',
  'object': 'somatic complaints'},
 {'subject': 'anxiety',
  'relationship': 'disease_related_behavior',
  'object': 'rumination'},
 {'subject': 'fear',
  'relationship': 'disease_related_feeling',
  'object': 'dread'}]

In [142]:
def get_color(n):
    type_to_color = {
        "symptom": "#fcce03", 
        "treatment": "#62fc03", 
        "disease": "#fc0303", 
        "feeling": "#0b03fc",
        "risk factor": "#fc7303",
        "behavior": "#03f4fc",
    }

    if n.lower() in sintomas:
        return type_to_color["symptom"]
    if n.lower() in sentimentos:
        return type_to_color["feeling"]
    if n.lower() in tratamentos:
        return type_to_color["treatment"]
    if n.lower() in doencas:
        return type_to_color["disease"]
    if n.lower() in fatores:
        return type_to_color["risk factor"]
    if n.lower() in comportamentos:
        return type_to_color["behavior"]

    return "purple" 

def get_size(n):
    type_to_size = {
        "disease": 50, 
        "risk factor": 40,
        "symptom": 30,
        "treatment": 20, 
        "feeling": 15,
        "behavior": 10
    }

    if n.lower() in sintomas:
        return type_to_size["symptom"]
    if n.lower() in sentimentos:
        return type_to_size["feeling"]
    if n.lower() in tratamentos:
        return type_to_size["treatment"]
    if n.lower() in doencas:
        return type_to_size["disease"]
    if n.lower() in fatores:
        return type_to_size["risk factor"]
    if n.lower() in comportamentos:
        return type_to_size["behavior"]

    return 5

In [59]:
from pyvis.network import Network
import networkx as nx


graph = nx.Graph()

for items in all_triples:
    for item in items:
        try:
            node_1 = item["subject"]
            node_2 = item["object"]
    
            graph.add_node(node_1, title=node_1, color=get_color(node_1), size=get_size(node_1), label=node_1)
            graph.add_node(node_2, title=node_2, color=get_color(node_2), size=get_size(node_2), label=node_2)
            graph.add_edge(node_1, node_2, title=item["relationship"])
        except Exception as e:
            print(e)
            print(f'Error in item: {item}')

string indices must be integers
Error in item: subject
string indices must be integers
Error in item: relationship
string indices must be integers
Error in item: object


In [60]:
len(graph.nodes)

3044

In [61]:
len(graph.edges)

3584

In [69]:
nt = Network(height="750px", width="100%")

nt.from_nx(graph)
nt.force_atlas_2based(central_gravity=0.015, gravity=-60)
nt.show_buttons(filter_=['physics'])
nt.show("../data/graph.html",notebook=False)

../data/graph.html
Opening in existing browser session.


In [71]:
from IPython.display import IFrame
IFrame("../data/graph.html", width=1000, height=800)

In [95]:
no_entities = []

for node in list(graph.nodes):
    no_entity = False
    for entity in entity_list:
        if entity[0] == node:
            no_entity = False
            break
        else:
            no_entity = True
    
    if no_entity:
        no_entities.append(node)

len(no_entities)

1893

In [105]:
for no_entity in no_entities:
    try:
        graph.remove_node(no_entity)
    except Exception as e:
        continue
print(f'Nodes: {graph.number_of_nodes()}')
print(f'Edges: {graph.number_of_edges()}')

Nodes: 1151
Edges: 1338


In [100]:
nt = Network(height="750px", width="100%")

nt.from_nx(graph)
nt.force_atlas_2based(central_gravity=0.015, gravity=-60)
nt.show_buttons(filter_=['physics'])
nt.show("../data/graph-clear.html",notebook=False)

../data/graph-clear.html
Opening in existing browser session.


In [102]:
from IPython.display import IFrame
IFrame("../data/graph-clear.html", width=1000, height=800)

In [106]:
title_edges = nx.get_edge_attributes(graph,"title")
title_edges

{('anxiety', 'dread'): 'related',
 ('anxiety', 'nervous behavior'): 'disease_related_behavior',
 ('anxiety', 'pacing back and forth'): 'disease_related_behavior',
 ('anxiety', 'somatic complaints'): 'disease_related_symptom',
 ('anxiety', 'rumination'): 'disease_related_behavior',
 ('anxiety', 'fear'): 'disease_related_symptom',
 ('anxiety', 'muscular tension'): 'disease_related_symptom',
 ('anxiety', 'restlessness'): 'disease_related_feeling',
 ('anxiety', 'fatigue'): 'disease_related_symptom',
 ('anxiety', "inability to catch one's breath"): 'disease_related_symptom',
 ('anxiety', 'tightness in the abdominal region'): 'disease_related_symptom',
 ('anxiety', 'nausea'): 'disease_related_symptom',
 ('anxiety', 'problems in concentration'): 'disease_related_symptom',
 ('anxiety', 'generalized anxiety disorder'): 'disease_related_disease',
 ('anxiety', 'panic disorder'): 'disease_related_symptom',
 ('anxiety', 'obsessive-compulsive disorder'): 'disease_related_disease',
 ('anxiety', 'post

- Número de Ocorrências
- Probabilidade
- Especificidade
- Confiabilidade

In [109]:
relationships = [
    "disease_related_disease",
    "disease_related_symptom",
    "disease_related_behavior",
    "disease_related_risk factor",
    "disease_related_treatment",
    "disease_related_feeling",
    "behavior_related_feeling",
    "behavior_related_symptom",
    "behavior_related_risk factor",
    "symptom_related_feeling"
]

len(relationships)

10

In [123]:
entities_list = [] 

for e in entity_list:
    entities_list.append(e[0])
    
entities_list[:10]

['anxiety',
 'fear',
 'nervous behavior',
 'pacing back and forth',
 'somatic complaints',
 'rumination',
 'muscular tension',
 'restlessness',
 'fatigue',
 "inability to catch one's breath"]

In [127]:
triples_clear = []

for triples in all_triples:
    for triple in triples:
        try:
            subject = triple['subject']
            predicate = triple["relationship"]
            obj = triple['object']
            
            if predicate in relationships and subject in entities_list and obj in entities_list:
                triples_clear.append((subject,predicate,obj))
        except Exception as e:
            print(e)
            continue
            
len(triples_clear)

string indices must be integers
string indices must be integers
string indices must be integers


1202

In [128]:
triples_clear[:10]

[('anxiety', 'disease_related_feeling', 'dread'),
 ('anxiety', 'disease_related_behavior', 'nervous behavior'),
 ('anxiety', 'disease_related_behavior', 'pacing back and forth'),
 ('anxiety', 'disease_related_symptom', 'somatic complaints'),
 ('anxiety', 'disease_related_behavior', 'rumination'),
 ('fear', 'disease_related_feeling', 'dread'),
 ('anxiety', 'disease_related_feeling', 'fear'),
 ('anxiety', 'disease_related_symptom', 'muscular tension'),
 ('anxiety', 'disease_related_symptom', 'restlessness'),
 ('anxiety', 'disease_related_symptom', 'fatigue')]

In [131]:
import pandas as pd

data = pd.DataFrame(
    triples_clear,
    columns=["subject", "predicate", "object"]
)

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1202 entries, 0 to 1201
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   subject    1202 non-null   object
 1   predicate  1202 non-null   object
 2   object     1202 non-null   object
dtypes: object(3)
memory usage: 28.3+ KB


In [138]:
data["subject"].value_counts()

subject
bipolar disorder                  169
ptsd                               87
anxiety                            73
eating disorders                   51
depression                         34
                                 ... 
re-experiencing                     1
heightened sense of threat          1
single-session debriefing           1
multiple session interventions      1
hiv/aids                            1
Name: count, Length: 327, dtype: int64

In [141]:
data["predicate"].value_counts()

predicate
disease_related_behavior        273
disease_related_treatment       250
disease_related_symptom         231
disease_related_disease         178
disease_related_feeling         129
disease_related_risk factor      71
behavior_related_feeling         28
symptom_related_feeling          21
behavior_related_symptom         15
behavior_related_risk factor      6
Name: count, dtype: int64

In [143]:
graph = nx.Graph()

for item in triples_clear:
    try:
        node_1 = item[0]
        node_2 = item[2]

        graph.add_node(node_1, title=node_1, color=get_color(node_1), size=get_size(node_1), label=node_1)
        graph.add_node(node_2, title=node_2, color=get_color(node_2), size=get_size(node_2), label=node_2)
        graph.add_edge(node_1, node_2, title=item[1])
    except Exception as e:
        print(e)
        print(f'Error in item: {item}')

nt = Network(height="750px", width="100%")

nt.from_nx(graph)
nt.force_atlas_2based(central_gravity=0.015, gravity=-60)
nt.show_buttons(filter_=['physics'])
nt.show("../data/graph-clear-entities.html",notebook=False)

../data/graph-clear-entities.html


Opening in existing browser session.


In [2]:
from IPython.display import IFrame
IFrame("../data/graph-clear-entities.html", width=1000, height=800)

In [151]:
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt

In [148]:
localy = nx.degree_centrality(graph)
localy

{'anxiety': 0.14520202020202022,
 'dread': 0.0025252525252525255,
 'nervous behavior': 0.0012626262626262627,
 'pacing back and forth': 0.0012626262626262627,
 'somatic complaints': 0.0012626262626262627,
 'rumination': 0.0012626262626262627,
 'fear': 0.012626262626262628,
 'muscular tension': 0.0012626262626262627,
 'restlessness': 0.0025252525252525255,
 'fatigue': 0.006313131313131314,
 "inability to catch one's breath": 0.0012626262626262627,
 'tightness in the abdominal region': 0.0012626262626262627,
 'nausea': 0.003787878787878788,
 'problems in concentration': 0.0012626262626262627,
 'generalized anxiety disorder': 0.0025252525252525255,
 'panic disorder': 0.006313131313131314,
 'obsessive-compulsive disorder': 0.0025252525252525255,
 'post-traumatic stress disorder': 0.012626262626262628,
 'mental disorder': 0.010101010101010102,
 'mental disorders': 0.007575757575757576,
 'depression': 0.06818181818181819,
 'self-harm': 0.005050505050505051,
 'suicide': 0.012626262626262628,


In [166]:

def sortTuple(node):
    return node[1]


In [167]:
localy_sort = []

for node in localy:
    localy_sort.append((node,localy[node]))

localy_sort.sort(reverse=True, key=sortTuple)

localy_sort

[('bipolar disorder', 0.1792929292929293),
 ('anxiety', 0.14520202020202022),
 ('ptsd', 0.14520202020202022),
 ('eating disorders', 0.08080808080808081),
 ('depression', 0.06818181818181819),
 ('anorexia nervosa', 0.042929292929292935),
 ('stress', 0.04040404040404041),
 ('depressed mood', 0.030303030303030304),
 ('anxiety disorders', 0.026515151515151516),
 ('bulimia nervosa', 0.025252525252525256),
 ('anorexia', 0.02398989898989899),
 ('mania', 0.01893939393939394),
 ('eating disorder', 0.01893939393939394),
 ('mental illness', 0.015151515151515152),
 ('bulimia', 0.01388888888888889),
 ('fear', 0.012626262626262628),
 ('post-traumatic stress disorder', 0.012626262626262628),
 ('suicide', 0.012626262626262628),
 ('prevention', 0.012626262626262628),
 ('treatment', 0.012626262626262628),
 ('antidepressants', 0.012626262626262628),
 ('adhd', 0.012626262626262628),
 ('neurodevelopmental disorders', 0.012626262626262628),
 ('depressive episodes', 0.011363636363636364),
 ('mental disorder'

In [149]:
closeness = nx.closeness_centrality(graph)
closeness

{'anxiety': 0.3729490022172949,
 'dread': 0.263069403714565,
 'nervous behavior': 0.26194276815261824,
 'pacing back and forth': 0.26194276815261824,
 'somatic complaints': 0.26194276815261824,
 'rumination': 0.26194276815261824,
 'fear': 0.2964790904684264,
 'muscular tension': 0.26194276815261824,
 'restlessness': 0.2795412996509889,
 'fatigue': 0.31592787377911347,
 "inability to catch one's breath": 0.26194276815261824,
 'tightness in the abdominal region': 0.26194276815261824,
 'nausea': 0.2645486001887386,
 'problems in concentration': 0.26194276815261824,
 'generalized anxiety disorder': 0.2631826005319981,
 'panic disorder': 0.3015958400573785,
 'obsessive-compulsive disorder': 0.2684970867587198,
 'post-traumatic stress disorder': 0.2642057726290988,
 'mental disorder': 0.26409169414350764,
 'mental disorders': 0.28621261751818605,
 'depression': 0.3743184599977746,
 'self-harm': 0.29748850371418467,
 'suicide': 0.32106895728942975,
 'withdrawal from situations': 0.26194276815

In [168]:
closeness_sort = []

for node in closeness:
    closeness_sort.append((node,closeness[node]))

closeness_sort.sort(reverse=True, key=sortTuple)

closeness_sort

[('bipolar disorder', 0.37523703290574456),
 ('depression', 0.3743184599977746),
 ('anxiety', 0.3729490022172949),
 ('stress', 0.3465361833633788),
 ('anxiety disorders', 0.34419604031309153),
 ('ptsd', 0.3426534250063662),
 ('eating disorders', 0.33186997484338776),
 ('suicide', 0.32106895728942975),
 ('cognitive behavioral therapy', 0.3197262747707076),
 ('fatigue', 0.31592787377911347),
 ('antidepressants', 0.3146277590722035),
 ('substance abuse', 0.3115824572778215),
 ('treatment', 0.31016042780748665),
 ('medications', 0.3078190053529761),
 ('stroke', 0.3053601416057731),
 ('symptoms', 0.3033910533910534),
 ('emdr', 0.3027902790279028),
 ('major depressive disorder', 0.3018935654671094),
 ('panic disorder', 0.3015958400573785),
 ('cbt', 0.2996748474455481),
 ('euphoria', 0.29908868637474995),
 ('pharmacotherapy', 0.2987964648931918),
 ('multiple sclerosis', 0.29850481387816674),
 ('medication', 0.2977781711958927),
 ('self-harm', 0.29748850371418467),
 ('obsessive–compulsive diso

In [150]:
betweenness = nx.betweenness_centrality(graph)
betweenness

{'anxiety': 0.24474662651722404,
 'dread': 0.0,
 'nervous behavior': 0.0,
 'pacing back and forth': 0.0,
 'somatic complaints': 0.0,
 'rumination': 0.0,
 'fear': 0.018304918687675392,
 'muscular tension': 0.0,
 'restlessness': 0.0,
 'fatigue': 0.00462580957909509,
 "inability to catch one's breath": 0.0,
 'tightness in the abdominal region': 0.0,
 'nausea': 0.019083092902194152,
 'problems in concentration': 0.0,
 'generalized anxiety disorder': 8.391549354273946e-05,
 'panic disorder': 0.0033977188185387985,
 'obsessive-compulsive disorder': 0.0030480429342629083,
 'post-traumatic stress disorder': 0.018997330946646518,
 'mental disorder': 0.006378108606821455,
 'mental disorders': 0.006931882058642351,
 'depression': 0.14037035480047205,
 'self-harm': 0.0006634666177063375,
 'suicide': 0.027095607207553985,
 'withdrawal from situations': 0.0,
 'negative feelings': 0.0,
 'changes in sleeping patterns': 0.0,
 'changes in habits': 0.0,
 'increase or decrease in food intake': 0.0,
 'incr

In [169]:
betweenness_sort = []

for node in betweenness:
    betweenness_sort.append((node,betweenness[node]))

betweenness_sort.sort(reverse=True, key=sortTuple)

betweenness_sort

[('bipolar disorder', 0.3256371640339387),
 ('anxiety', 0.24474662651722404),
 ('ptsd', 0.22926652204764203),
 ('eating disorders', 0.14633982676852156),
 ('depression', 0.14037035480047205),
 ('stress', 0.06446388144823373),
 ('anorexia nervosa', 0.06365244974628288),
 ('anxiety disorders', 0.04251832134306258),
 ('treatment', 0.03875346314661797),
 ('diabetes mellitus', 0.034734194026229424),
 ('neurodevelopmental disorders', 0.03287936252538023),
 ('anorexia', 0.031224526098917925),
 ('mania', 0.03075017761053322),
 ('suicide', 0.027095607207553985),
 ('bulimia nervosa', 0.024601553615172553),
 ('impulsivity', 0.02190042013050863),
 ('depressed mood', 0.021212543015405656),
 ('adhd', 0.01983701319984506),
 ('nausea', 0.019083092902194152),
 ('post-traumatic stress disorder', 0.018997330946646518),
 ('fear', 0.018304918687675392),
 ('eating disorder', 0.017415381238437642),
 ('celiac disease', 0.01728366125220781),
 ('bulimia', 0.014730842407904142),
 ('phenylketonuria', 0.0132456039