In [None]:
# ! pip install ipywidgets
# ! jupyter nbextension enable --py widgetsnbextension
# ! pip install jupyter_contrib_nbextensions
# ! jupyter contrib nbextension install --user


In [None]:
from transformers import pipeline

# Carregar um pipeline de NER pré-treinado
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")

# Texto de exemplo
text = """
Kundalini Yoga offers multiple benefits for cognition and memory in older women at risk of Alzheimer's Disease.
"""

# Extração de entidades
entities = ner_pipeline(text)

# Mostrar os resultados
for entity in entities:
    print(f"Texto: {entity['word']}, Entidade: {entity['entity']}, Score: {entity['score']:.2f}")


In [None]:
def reconstruct_entities(entities):
    result = []
    current_entity = {"word": "", "entity": None, "score": 0.0}
    
    for entity in entities:
        if entity["entity"].startswith("B-"):
            # Adiciona a entidade atual ao resultado antes de começar uma nova
            if current_entity["word"]:
                result.append(current_entity)
            # Inicia uma nova entidade
            current_entity = {"word": entity["word"], "entity": entity["entity"], "score": entity["score"]}
        elif entity["entity"].startswith("I-") and current_entity["entity"]:
            # Continua a entidade atual
            current_entity["word"] += entity["word"].replace("##", "")
            current_entity["score"] = max(current_entity["score"], entity["score"])
        else:
            # Caso não seja uma entidade válida, pula
            if current_entity["word"]:
                result.append(current_entity)
            current_entity = {"word": "", "entity": None, "score": 0.0}
    
    # Adiciona a última entidade ao resultado
    if current_entity["word"]:
        result.append(current_entity)
    
    return result

# Usando a função após a extração
entities = ner_pipeline(text)
cleaned_entities = reconstruct_entities(entities)

# Mostrar os resultados
for entity in cleaned_entities:
    print(f"Texto: {entity['word']}, Entidade: {entity['entity']}, Score: {entity['score']:.2f}")


In [None]:
from transformers import pipeline

# Função para reconstruir entidades fragmentadas
def reconstruct_entities(entities):
    result = []
    current_entity = {"word": "", "entity": None, "score": 0.0}
    
    for entity in entities:
        if entity["entity"].startswith("B-"):
            # Adiciona a entidade atual ao resultado antes de começar uma nova
            if current_entity["word"]:
                result.append(current_entity)
            # Inicia uma nova entidade
            current_entity = {"word": entity["word"], "entity": entity["entity"], "score": entity["score"]}
        elif entity["entity"].startswith("I-") and current_entity["entity"]:
            # Continua a entidade atual
            current_entity["word"] += entity["word"].replace("##", "")
            current_entity["score"] = max(current_entity["score"], entity["score"])
        else:
            # Caso não seja uma entidade válida, pula
            if current_entity["word"]:
                result.append(current_entity)
            current_entity = {"word": "", "entity": None, "score": 0.0}
    
    # Adiciona a última entidade ao resultado
    if current_entity["word"]:
        result.append(current_entity)
    
    return result

# Carregar o pipeline de NER
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", device=0)

# Função para processar um arquivo de texto
def process_text_file(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
    
    # Executar NER no texto
    entities = ner_pipeline(text)
    
    # Reconstruir entidades
    reconstructed_entities = reconstruct_entities(entities)
    
    # Mostrar os resultados
    print("Entidades Encontradas:")
    for entity in reconstructed_entities:
        print(f"Texto: {entity['word']}, Entidade: {entity['entity']}, Score: {entity['score']:.2f}")

# Caminho do arquivo de texto
file_path = "/Users/edmundobrown/Documents/PosYoga/dados/Ingles/docsCientificos(text)/11387275.txt"  # Substituir pelo caminho do seu arquivo de texto

# Processar o arquivo de texto
process_text_file(file_path)


### Verificando se a sentença esta relacionada a Yoga

In [None]:
from transformers import pipeline

# Carregar um pipeline de classificação
classifier = pipeline("text-classification", model="bert-base-uncased")

# Texto de exemplo
text = "Kundalini Yoga offers multiple benefits for cognition and memory."

# Classificar o texto
result = classifier(text)

# Mostrar o resultado
print(result)


In [None]:
from transformers import pipeline

# Carregar o pipeline de classificação com um modelo adequado
classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")

# Texto de exemplo
text = "Kundalini Yoga offers multiple benefits for cognition and memory."

# Classificar o texto
result = classifier(text)

# Mostrar o resultado
print(result)


In [None]:
import spacy
from spacy.matcher import PhraseMatcher
import os
import pandas as pd

# Carregar o modelo de linguagem do spaCy
nlp = spacy.load("en_core_web_sm")

# Frases de interesse
conditions = ["Alzheimer", "menopause", "cognitive decline"]
yoga_practices = ["Kundalini Yoga", "Yoga", "Pilates", "meditation"]

# Configurar o PhraseMatcher
matcher_conditions = PhraseMatcher(nlp.vocab)
matcher_yoga = PhraseMatcher(nlp.vocab)
matcher_conditions.add("CONDITIONS", [nlp.make_doc(text) for text in conditions])
matcher_yoga.add("YOGA", [nlp.make_doc(text) for text in yoga_practices])

def extract_key_information(text):
    """
    Extrai informações-chave do texto usando spaCy.
    """
    doc = nlp(text)
    matches_conditions = matcher_conditions(doc)
    matches_yoga = matcher_yoga(doc)

    # Condições encontradas
    found_conditions = [doc[start:end].text for match_id, start, end in matches_conditions]
    # Práticas de Yoga encontradas
    found_yoga = [doc[start:end].text for match_id, start, end in matches_yoga]

    return {
        "conditions": list(set(found_conditions)),
        "yoga_practices": list(set(found_yoga)),
    }

# Diretório de arquivos texto
input_folder = "/Users/edmundobrown/Documents/PosYoga/dados/Ingles/docsCientificos(text)"
results = []

for filename in os.listdir(input_folder):
    if filename.endswith(".txt"):
        file_path = os.path.join(input_folder, filename)
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
            data = extract_key_information(text)
            results.append({
                "file": filename,
                "conditions": ", ".join(data["conditions"]),
                "yoga_practices": ", ".join(data["yoga_practices"]),
            })

# Salvar os resultados em CSV
df_results = pd.DataFrame(results)
output_file = "extracted_yoga_conditions.csv"
df_results.to_csv(output_file, index=False)
print(f"Resultados salvos em: {output_file}")
