In [17]:
# === Imports ===
import os
import requests
from dotenv import load_dotenv

from langchain_community.tools import ArxivQueryRun, WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.tools import tool
from langchain_groq import ChatGroq

from langchain_core.messages import HumanMessage, AnyMessage
from typing_extensions import TypedDict
from typing import Annotated
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode, tools_condition

# === Load environment variables ===
load_dotenv()
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")


@tool
def check_retraction(paper_title: str) -> str:
    """
    Search Crossref for a paper title and determine if it has been retracted
    based on either relation fields or update-to metadata.
    """
    print(f"Querying Crossref for: {paper_title}")
    try:
        url = "https://api.crossref.org/works"
        params = {
            "query.title": paper_title,
            "rows": 5
        }
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()

        items = data.get("message", {}).get("items", [])
        if not items:
            return f"No papers found with title '{paper_title}'."

        for item in items:
            title = item.get("title", ["Unknown"])[0]
            doi = item.get("DOI", "N/A")

            # Check Crossref relation metadata
            relation = item.get("relation", {})
            if any(rel in relation for rel in ["is-retracted-by", "has-retraction"]):
                return f"The paper '{title}' (DOI: {doi}) appears to be retracted via relation metadata."

            # Check update-to metadata
            updates = item.get("update-to", [])
            if any(update.get("type") == "retraction" for update in updates):
                update_dois = [update.get("DOI", "unknown") for update in updates]
                return (
                    f"The paper '{title}' (DOI: {doi}) has been retracted "
                    f"via update-to record(s): {', '.join(update_dois)}."
                )

        return f"No retraction markers found for papers matching '{paper_title}'."

    except Exception as e:
        return f"Error checking retraction status: {str(e)}"



arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper(top_k_results=2, doc_content_chars_max=500))
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=500))
tavily = TavilySearchResults()

tools = [arxiv, wiki, tavily, check_retraction]

llm = ChatGroq(model="qwen-qwq-32b")
llm_with_tools = llm.bind_tools(tools)

# === LangGraph State ===
class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

def tool_calling_llm(state: State):
    return {"messages": [llm_with_tools.invoke(state["messages"])]}

# === Build LangGraph ===
builder = StateGraph(State)
builder.add_node("tool_calling_llm", tool_calling_llm)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "tool_calling_llm")
builder.add_conditional_edges("tool_calling_llm", tools_condition)
builder.add_edge("tools", END)

graph = builder.compile()
print("LangGraph compiled and ready.")

LangGraph compiled and ready.


In [22]:
query = "is this a retracted research?: The non-linear relationship between carbon dioxide emissions, financial development and energy consumption in developing European and Central Asian economies"

response = graph.invoke({"messages": [HumanMessage(content=query)]})

for m in response["messages"]:
    if m.type == "ai":
        content = getattr(m, "content", "").strip()
        print("\nAssistant:", content)
        if hasattr(m, "tool_calls") and m.tool_calls:
            print("Tools used:")
            for call in m.tool_calls:
                print("-", call["name"])
    elif m.type == "tool":
        print("Tool result:", m.content)


Querying Crossref for: The non-linear relationship between carbon dioxide emissions, financial development and energy consumption in developing European and Central Asian economies

Assistant: 
Tools used:
- check_retraction
Tool result: The paper 'Retraction Note: The non-linear relationship between carbon dioxide emissions, financial development and energy consumption in developing European and Central Asian economies' (DOI: 10.1007/s11356-024-32870-5) has been retracted via update-to record(s): 10.1007/s11356-021-15225-2.


In [23]:
query = "What is ML?"

response = graph.invoke({"messages": [HumanMessage(content=query)]})

In [24]:
response

{'messages': [HumanMessage(content='What is ML?', additional_kwargs={}, response_metadata={}, id='ef3d619b-208e-4bb5-a12a-4d85cf1b5e14'),
  AIMessage(content='', additional_kwargs={'reasoning_content': 'Okay, the user is asking "What is ML?" which stands for Machine Learning. I need to provide a clear and concise explanation. Let me start by recalling the basic definition: ML is a subset of AI that focuses on developing algorithms enabling computers to learn from data and make predictions or decisions without being explicitly programmed. But maybe I should check a reliable source to ensure accuracy.\n\nFirst, I can use the \'wikipedia\' function to look up "Machine learning". Wikipedia usually has a comprehensive entry. Alternatively, the \'tavily_search_results_json\' might give recent or more detailed information. Since Wikipedia is a common starting point, I\'ll try that first. The function requires a query parameter, so I\'ll set the query to "Machine learning". That should retriev

In [9]:
import requests
from pprint import pprint

# Fetch 5 retracted articles from Crossref
resp = requests.get(
    "https://api.crossref.org/works",
    params={"filter": "update-type:retraction", "rows": 5}
)
data = resp.json().get("message", {}).get("items", [])

# Display full metadata for each item
for i, item in enumerate(data, start=1):
    print(f"\n--- Retracted Article #{i} ---")

    # Title
    title = item.get("title", ["No title found"])[0]
    print("Title:", title)

    # DOI
    doi = item.get("DOI", "N/A")
    print("DOI:", doi)

    # Retraction updates
    updates = item.get("update-to") or item.get("update", [])
    if updates:
        for u in updates:
            print("Retracted by:", u.get("DOI", "Unknown DOI"))
            print("Source:", u.get("source", "Unknown source"))
            print("Updated on:", u.get("updated", "Unknown date"))
    else:
        print("No update info found (unexpected).")

    # Optional: show full raw data (uncomment below)
    # pprint(item)



--- Retracted Article #1 ---
Title: Retraction Note: evaluating the environmental impact and economic practicability of solar home lighting systems: a roadmap towards clean energy for ecological sustainability
DOI: 10.1007/s11356-024-33074-7
Retracted by: 10.1007/s11356-023-27928-9
Source: retraction-watch
Updated on: {'date-time': '2024-03-26T00:00:00Z', 'date-parts': [[2024, 3, 26]], 'timestamp': 1711411200000}

--- Retracted Article #2 ---
Title: Retraction Note: A dual hesitant q-rung orthopair enhanced MARCOS methodology under uncertainty to determine a used PPE kit disposal
DOI: 10.1007/s11356-024-34270-1
Retracted by: 10.1007/s11356-022-21601-3
Source: retraction-watch
Updated on: {'date-time': '2024-07-06T00:00:00Z', 'date-parts': [[2024, 7, 6]], 'timestamp': 1720224000000}

--- Retracted Article #3 ---
Title: Retraction Note: Ameliorative role of nanocurcumin against the toxicological effects of novel forms of Cuo as nanopesticides: a comparative study
DOI: 10.1007/s11356-024

In [15]:
import requests
from pprint import pprint

resp = requests.get(
    "https://api.crossref.org/works",
    params={"filter": "update-type:retraction", "rows": 40}
)
data = resp.json().get("message", {}).get("items", [])
pprint(data)


[{'DOI': '10.1007/s11356-024-33074-7',
  'ISSN': ['1614-7499'],
  'URL': 'https://doi.org/10.1007/s11356-024-33074-7',
  'alternative-id': ['33074'],
  'assertion': [{'group': {'label': 'Article History',
                           'name': 'ArticleHistory'},
                 'label': 'First Online',
                 'name': 'first_online',
                 'order': 1,
                 'value': '26 March 2024'},
                {'label': 'Free to read',
                 'name': 'free',
                 'value': 'This content has been made available to all.'}],
  'author': [{'affiliation': [],
              'family': 'Ali',
              'given': 'Shahid',
              'sequence': 'first'},
             {'affiliation': [],
              'family': 'Yan',
              'given': 'Qingyou',
              'sequence': 'additional'},
             {'ORCID': 'https://orcid.org/0000-0003-1446-583X',
              'affiliation': [],
              'authenticated-orcid': False,
              'family

In [11]:
import os
import re
import requests
from docx import Document

def check_retraction(paper_title: str) -> str:
    print(f"Querying Crossref for: {paper_title}")
    try:
        url = "https://api.crossref.org/works"
        params = {
            "query.title": paper_title,
            "rows": 5
        }
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()

        items = data.get("message", {}).get("items", [])
        if not items:
            return f"No papers found with title '{paper_title}'."

        for item in items:
            title = item.get("title", ["Unknown"])[0]
            doi = item.get("DOI", "N/A")
            relation = item.get("relation", {})
            if any(rel in relation for rel in ["is-retracted-by", "has-retraction"]):
                return f"The paper '{title}' (DOI: {doi}) appears to be retracted via relation metadata."

            updates = item.get("update-to", [])
            if any(update.get("type") == "retraction" for update in updates):
                update_dois = [update.get("DOI", "unknown") for update in updates]
                return f"The paper '{title}' (DOI: {doi}) has been retracted via update-to: {', '.join(update_dois)}."

        return f"No retraction markers found for papers matching '{paper_title}'."

    except Exception as e:
        return f"Error checking retraction status: {str(e)}"

def extract_text_from_docx(docx_path: str) -> str:
    doc = Document(docx_path)
    return "\n".join([para.text for para in doc.paragraphs])

def extract_references(text: str) -> list:
    match = re.search(r'(REFERENCES|References|Bibliography)', text)
    if not match:
        return []
    refs_text = text[match.end():]

    # Heuristic: split by numbered patterns
    references = re.split(r'\n\d+\.\s*|\[\d+\]', refs_text)
    references = [ref.strip() for ref in references if ref.strip()]
    return references

def process_documents(folder_path: str):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".docx"):
            file_path = os.path.join(folder_path, filename)
            try:
                full_text = extract_text_from_docx(file_path)
            except Exception as e:
                print(f"Could not read {filename}: {e}")
                continue

            references = extract_references(full_text)
            if not references:
                print(f"No references found in '{filename}'.")
                continue

            print(f"\nChecking references in '{filename}':")
            for ref in references:
                result = check_retraction(ref)
                if "retracted" in result.lower():
                    print(f"RETRACTED in '{filename}': {ref}")
                    print(f" -> {result}\n")

if __name__ == "__main__":
    process_documents(folder_path="Documents")
    else:
        print("Invalid folder path.")


In [10]:
!pip install python-docx

Collecting python-docx
  Using cached python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Collecting lxml>=3.1.0 (from python-docx)
  Downloading lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl.metadata (3.5 kB)
Collecting typing_extensions>=4.9.0 (from python-docx)
  Using cached typing_extensions-4.14.0-py3-none-any.whl.metadata (3.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
Downloading lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl (8.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m973.5 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached typing_extensions-4.14.0-py3-none-any.whl (43 kB)
Installing collected packages: typing_extensions, lxml, python-docx
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [python-docx]
[1A[2KSuccessfully installed lxml-5.4.0 python-docx-1.2.0 typing_extensions-4.14.0
