### A multi-agent RAG system, intelligent and autonomous, capable of:

- Dynamically refining queries (via LLM).
- Retrieving and downloading documents.
- Extracting and ranking relevant information.
- Intelligently visualizing complex relationships.
- Proactively detecting, analyzing, and responding to security threats.

In [None]:
# Intelligent Autonomous Multi-Agent RAG System with GNN Visualization

import openai
import requests
import os
from scholarly import scholarly
import pdfplumber
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import networkx as nx
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer, util
from lime.lime_text import LimeTextExplainer
from sklearn.metrics.pairwise import cosine_similarity
from pathlib import Path

# Configuration
DOCUMENTS_FOLDER = './ML_Threat_Documents/'
Path(DOCUMENTS_FOLDER).mkdir(parents=True, exist_ok=True)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: Advanced LLM Query Refinement Agent
def refine_query(initial_query):
    prompt = f"Refine this query for academic research retrieval: '{initial_query}'"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.1,
        max_tokens=150
    )
    return response.choices[0].text.strip()

# Step 2: Semantic Scholar Retrieval Agent
def query_semantic_scholar(query, limit=5):
    api_url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {"query": query, "limit": limit, "fields": "openAccessPdf"}
    response = requests.get(api_url, params=params).json()
    return [p['openAccessPdf']['url'] for p in response.get("data", []) if p.get('openAccessPdf')]

# Step 3: Google Scholar Retrieval Agent
def query_google_scholar(refined_query, num_results=5):
    search_results = scholarly.search_pubs(refined_query)
    pdf_urls = []
    for _ in range(num_results):
        pub = next(search_results, None)
        if pub and 'eprint_url' in pub:
            pdf_urls.append(pub['eprint_url'])
    return pdf_urls

# Step 4: PDF Download Agent
def download_pdfs(pdf_urls, folder):
    for idx, pdf_url in enumerate(pdf_urls):
        try:
            pdf_response = requests.get(pdf_url)
            pdf_response.raise_for_status()
            filename = os.path.join(folder, f'document_{idx}.pdf')
            with open(filename, 'wb') as file:
                file.write(pdf_response.content)
            print(f"Downloaded: {filename}")
        except Exception as e:
            print(f"Failed to download {pdf_url}: {e}")

# Step 5: Extract texts and embed PDFs
def extract_texts_and_embed(folder):
    texts = []
    for pdf_file in os.listdir(folder):
        with pdfplumber.open(os.path.join(folder, pdf_file)) as pdf:
            text = '\n'.join([page.extract_text() for page in pdf.pages if page.extract_text()])
            texts.append(text)
    embeddings = SentenceTransformer('all-MiniLM-L6-v2').encode(texts)
    return texts, embeddings

# Step 6: Rank and Explain Top Documents
def rank_and_explain(query, texts, embeddings):
    query_emb = model.encode([query])
    scores = util.cos_sim(query_emb, embeddings)[0]
    ranked_indices = scores.argsort(descending=True)[:3]

    explainer = LimeTextExplainer()
    explanations = [explainer.explain_instance(texts[i], lambda x: model.encode(x), num_features=5).as_list() for i in ranked_indices]

    return [(texts[i], explanations[idx]) for idx, i in enumerate(ranked_indices)]

# Step 7: Intelligent GNN Graph Visualization
def intelligent_gnn_visualization(texts, embeddings):
    sim_matrix = cosine_similarity(embeddings)
    edge_index = torch.tensor([[i, j] for i in range(len(texts)) for j in range(len(texts)) if sim_matrix[i, j] > 0.5 and i != j], dtype=torch.long).t().contiguous()
    x = torch.tensor(embeddings, dtype=torch.float)

    data = Data(x=x, edge_index=edge_index)
    gcn = GCNConv(data.num_features, 2)
    embedding_2d = gcn(data.x, data.edge_index).detach().numpy()

    G = nx.Graph()
    for i, coords in enumerate(embedding_2d):
        G.add_node(f'Doc_{i}', pos=(coords[0], coords[1]))

    for i, j in edge_index.t().numpy():
        G.add_edge(f'Doc_{i}', f'Doc_{j}', weight=sim_matrix[i, j])

    pos = {node: (data['pos'][0], data['pos'][1]) for node, data in G.nodes(data=True)}
    plt.figure(figsize=(10, 8))
    nx.draw(G, pos, with_labels=True, node_size=500, node_color='skyblue', edge_color='gray')
    plt.title("GNN-powered Intelligent Visualization")
    plt.show()

# Step 8: Orchestrator Agent (Detection, Reasoning & Response)
def orchestrator(query, texts):
    if any(term in query.lower() for term in ["attack", "threat", "vulnerability", "exploit"]):
        analysis = openai.Completion.create(
            engine="text-davinci-003",
            prompt=f"Analyze the security threat in the context: {' '.join(texts)}",
            temperature=0.1,
            max_tokens=150
        ).choices[0].text.strip()
        return f"Threat detected! Recommended actions: {analysis}"
    return "No threats detected."

# Main Workflow
def main_agent(initial_query):
    refined_query = refine_query(initial_query)
    pdf_urls = query_semantic_scholar(refined_query) + query_google_scholar(refined_query)
    download_pdfs(pdf_urls, DOCUMENTS_FOLDER)
    texts, embeddings = extract_texts_and_embed(DOCUMENTS_FOLDER)
    ranked_docs_with_explanations = rank_and_explain(refined_query, texts, embeddings)
    for doc, explanation in ranked_docs_with_explanations:
        print(f"Document snippet: {doc[:300]}\nExplanation: {explanation}\n")
    intelligent_gnn_visualization(texts, embeddings)
    decision = orchestrator(refined_query, texts)
    print(f"Orchestrator Decision: {decision}")

# Example Execution
initial_query = "Tactics, Techniques, and Procedures in ML Security"
main_agent(initial_query)


In [None]:
# Intelligent Autonomous Multi-Agent RAG System with GNN Visualization and Re-ranking

import openai
import requests
import os
from scholarly import scholarly
import pdfplumber
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import networkx as nx
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer, util
from lime.lime_text import LimeTextExplainer
from sklearn.metrics.pairwise import cosine_similarity
from pathlib import Path

# Configuration
DOCUMENTS_FOLDER = './ML_Threat_Documents/'
Path(DOCUMENTS_FOLDER).mkdir(parents=True, exist_ok=True)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: Advanced LLM Query Refine˜ment Agent
def refine_query(initial_query):
    prompt = f"Refine this query for academic research retrieval: '{initial_query}'"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.1,
        max_tokens=150
    )
    return response.choices[0].text.strip()

# Step 2: Semantic Scholar Retrieval Agent
def query_semantic_scholar(query, limit=5):
    api_url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {"query": query, "limit": limit, "fields": "openAccessPdf"}
    response = requests.get(api_url, params=params).json()
    return [p['openAccessPdf']['url'] for p in response.get("data", []) if p.get('openAccessPdf')]

# Step 3: Google Scholar Retrieval Agent
def query_google_scholar(refined_query, num_results=5):
    search_results = scholarly.search_pubs(refined_query)
    pdf_urls = []
    for _ in range(num_results):
        pub = next(search_results, None)
        if pub and 'eprint_url' in pub:
            pdf_urls.append(pub['eprint_url'])
    return pdf_urls

# Step 4: PDF Download Agent
def download_pdfs(pdf_urls, folder):
    for idx, pdf_url in enumerate(pdf_urls):
        try:
            pdf_response = requests.get(pdf_url)
            pdf_response.raise_for_status()
            filename = os.path.join(folder, f'document_{idx}.pdf')
            with open(filename, 'wb') as file:
                file.write(pdf_response.content)
            print(f"Downloaded: {filename}")
        except Exception as e:
            print(f"Failed to download {pdf_url}: {e}")

# Step 5: Extract texts and embed PDFs
def extract_texts_and_embed(folder):
    texts = []
    for pdf_file in os.listdir(folder):
        with pdfplumber.open(os.path.join(folder, pdf_file)) as pdf:
            text = '\n'.join([page.extract_text() for page in pdf.pages if page.extract_text()])
            texts.append(text)
    embeddings = SentenceTransformer('all-MiniLM-L6-v2').encode(texts)
    return texts, embeddings

# Step 6: Reranking Agent
def rerank_documents(query, texts, embeddings, top_k=5):
    query_embedding = model.encode([query])
    similarity_scores = util.cos_sim(query_embedding, embeddings)[0]
    top_indices = similarity_scores.argsort(descending=True)[:top_k]
    return [(texts[i], similarity_scores[i].item()) for i in top_indices]

# Step 7: Explain Top Documents
def explain_top_documents(top_documents):
    explainer = LimeTextExplainer()
    explanations = [explainer.explain_instance(doc, lambda x: model.encode(x), num_features=5).as_list() for doc, _ in top_documents]
    return explanations

# Step 8: Intelligent GNN Graph Visualization
def intelligent_gnn_visualization(texts, embeddings):
    sim_matrix = cosine_similarity(embeddings)
    edge_index = torch.tensor([[i, j] for i in range(len(texts)) for j in range(len(texts)) if sim_matrix[i, j] > 0.5 and i != j], dtype=torch.long).t().contiguous()
    x = torch.tensor(embeddings, dtype=torch.float)

    data = Data(x=x, edge_index=edge_index)
    gcn = GCNConv(data.num_features, 2)
    embedding_2d = gcn(data.x, data.edge_index).detach().numpy()

    G = nx.Graph()
    for i, coords in enumerate(embedding_2d):
        G.add_node(f'Doc_{i}', pos=(coords[0], coords[1]))

    for i, j in edge_index.t().numpy():
        G.add_edge(f'Doc_{i}', f'Doc_{j}', weight=sim_matrix[i, j])

    pos = {node: (data['pos'][0], data['pos'][1]) for node, data in G.nodes(data=True)}
    plt.figure(figsize=(10, 8))
    nx.draw(G, pos, with_labels=True, node_size=500, node_color='skyblue', edge_color='gray')
    plt.title("GNN-powered Intelligent Visualization")
    plt.show()

# Step 9: Orchestrator Agent (Detection, Reasoning & Response)
def orchestrator(query, texts):
    if any(term in query.lower() for term in ["attack", "threat", "vulnerability", "exploit"]):
        analysis = openai.Completion.create(
            engine="text-davinci-003",
            prompt=f"Analyze the security threat in the context: {' '.join(texts)}",
            temperature=0.1,
            max_tokens=150
        ).choices[0].text.strip()
        return f"Threat detected! Recommended actions: {analysis}"
    return "No threats detected."

# Main Workflow
def main_agent(initial_query):
    refined_query = refine_query(initial_query)
    pdf_urls = query_semantic_scholar(refined_query) + query_google_scholar(refined_query)
    download_pdfs(pdf_urls, DOCUMENTS_FOLDER)
    texts, embeddings = extract_texts_and_embed(DOCUMENTS_FOLDER)
    top_docs = rerank_documents(refined_query, texts, embeddings)
    explanations = explain_top_documents(top_docs)
    for (doc, score), explanation in zip(top_docs, explanations):
        print(f"Document snippet: {doc[:300]}\nScore: {score}\nExplanation: {explanation}\n")
    intelligent_gnn_visualization(texts, embeddings)
    decision = orchestrator(refined_query, texts)
    print(f"Orchestrator Decision: {decision}")

# Example Execution
initial_query = "Tactics, Techniques, and Procedures in ML Security"
main_agent(initial_query)


In [None]:
''' Enhanced Graph Visualization Agent with Intelligence
 Below, we add intelligence to the Graph Visualization by incorporating insights from the Generative Reasoning Agent:

 What makes it Intelligent? 
 - It integrates the Generative Reasoner Agent to annotate each node (document) with meaningful insights extracted through NLP.
 - Uncovers subtle and nuanced patterns in the data using semantic similarity enhanced by contextual reasoning from the LLM.

'''

import networkx as nx
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('all-MiniLM-L6-v2')

def intelligent_graph_visualization(texts):
    embeddings = model.encode(texts)
    sim_matrix = util.cos_sim(embeddings, embeddings).numpy()
    G = nx.Graph()

    # Nodes represent documents
    for idx, _ in enumerate(texts):
        G.add_node(f'Doc_{idx+1}')

    # Add edges based on high similarity (threshold of 0.5)
    threshold = 0.5
    for i in range(len(texts)):
        for j in range(i + 1, len(texts)):
            if sim_matrix[i, j] >= threshold:
                G.add_edge(f'Doc_{i+1}', f'Doc_{j+1}', weight=sim_matrix[i, j])

    # Use Generative Reasoner to annotate nodes intelligently
    insights = []
    for doc in texts:
        insight = generative_reasoner(
            "Identify the TTPs, vulnerabilities, and lifecycle stages mentioned.", [doc]
        )
        insights.append(insight)

    # Visualize the graph with annotations
    plt.figure(figsize=(12, 8))
    pos = nx.spring_layout(G, k=0.3)
    nx.draw(G, pos, with_labels=True, node_color='skyblue', edge_color='gray', node_size=1500)
    
    # Annotate nodes with intelligent insights
    node_labels = {f'Doc_{idx+1}': insight[:50] + "..." for idx, insight in enumerate(insights)}
    nx.draw_networkx_labels(G, pos, labels=node_labels, font_size=8)

    edge_labels = nx.get_edge_attributes(G, 'weight')
    nx.draw_networkx_edge_labels(G, pos, edge_labels={k: f"{v:.2f}" for k, v in edge_labels.items()})

    plt.title("Intelligent Visualization of Relationships (TTPs, Vulnerabilities, ML Lifecycles)")
    plt.show()


In [None]:
'''
A complete Multi-Agent RAG Workflow
'''
def main_agent(initial_query):
    # Step 1: Refine Query
    refined_query = refine_query(initial_query)
    
    # Step 2 & 3: Retrieve PDFs (Semantic Scholar + Google Scholar)
    pdf_urls = query_semantic_scholar(refined_query) + query_google_scholar(refined_query)
    
    # Step 4: Download PDFs
    download_pdfs(pdf_urls, DOCUMENTS_FOLDER)
    
    # Step 5: Extract texts and embed PDFs
    texts, embeddings = extract_texts_and_embed(DOCUMENTS_FOLDER)
    
    # Step 6: Rank and explain top documents
    ranked_docs_with_explanations = rank_and_explain(refined_query, texts, embeddings)
    
    for doc, explanation in ranked_docs_with_explanations:
        print(f"Document snippet: {doc[:300]}")
        print(f"Explanation: {explanation}\n")

    # Step 7: Intelligent graph visualization
    intelligent_graph_visualization(texts)

    # Step 8: Proactive orchestration (threat detection & response)
    decision = orchestrator(refined_query, embeddings, texts)
    print(f"Orchestrator Decision: {decision}")


In [None]:
# Intelligent Autonomous Multi-Agent RAG System

import openai
import requests
import os
import networkx as nx
import matplotlib.pyplot as plt
from scholarly import scholarly
from sentence_transformers import SentenceTransformer, util
from lime.lime_text import LimeTextExplainer
from sklearn.metrics.pairwise import cosine_similarity
import pdfplumber
from pathlib import Path

# Configuration
DOCUMENTS_FOLDER = './ML_Threat_Documents/'
Path(DOCUMENTS_FOLDER).mkdir(parents=True, exist_ok=True)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: LLM Query Refinement Agent
def refine_query_llm(initial_query):
    prompt = f"Refine this query for academic research retrieval: '{initial_query}'"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.1,
        max_tokens=150
    )
    return response.choices[0].text.strip()

# Step 2: Semantic Scholar Agent
def query_semantic_scholar(query, limit=3):
    api_url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {"query": query, "limit": limit, "fields": "openAccessPdf"}
    response = requests.get(api_url, params=params).json()
    return [p['openAccessPdf']['url'] for p in response.get("data", []) if p.get('openAccessPdf')]

# Step 3: Query Google Scholar
def query_google_scholar(refined_query, num_results=3):
    search_results = scholarly.search_pubs(refined_query)
    pdf_urls = []
    for _ in range(num_results):
        pub = next(search_results, None)
        if pub and 'eprint_url' in pub:
            pdf_urls.append(pub['eprint_url'])
    return pdf_urls

# Download PDFs
def download_pdfs(pdf_urls, folder):
    for idx, pdf_url in enumerate(pdf_urls):
        try:
            pdf_response = requests.get(pdf_url)
            pdf_response.raise_for_status()
            filename = os.path.join(folder, f'document_{idx}.pdf')
            with open(filename, 'wb') as file:
                file.write(pdf_response.content)
            print(f"Downloaded: {filename}")
        except Exception as e:
            print(f"Failed to download {pdf_url}: {e}")

# Extract texts and embed PDFs
def extract_texts_and_embed(folder):
    texts = []
    for pdf_file in os.listdir(folder):
        with pdfplumber.open(os.path.join(folder, pdf_file)) as pdf:
            text = '\n'.join([page.extract_text() for page in pdf.pages if page.extract_text()])
            texts.append(text)
    embeddings = SentenceTransformer('all-MiniLM-L6-v2').encode(texts)
    return texts, embeddings

# Generative Reasoning Agent
def generative_reasoner(query, context_docs):
    prompt = f"Context: {' '.join(context_docs)}\n\nUser Query: {query}\n\nDetailed Answer:"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.2,
        max_tokens=200
    )
    return response.choices[0].text.strip()

# Detection Agent
def detect_attack(query):
    keywords = ["evasion", "extraction", "backdoor", "poison", "gradient masking"]
    return any(keyword in query.lower() for keyword in keywords)

# Orchestrator Agent
def orchestrator(query, texts, embeddings):
    if detect_attack(query):
        analysis = generative_reasoner(query, texts)
        return f"Threat detected! Actions recommended: {analysis}"
    return "No threats detected."

# Intelligent Graph Visualization
def intelligent_graph_visualization(texts):
    embeddings = model.encode(texts)
    sim_matrix = cosine_similarity(embeddings)
    G = nx.Graph()

    for i, doc in enumerate(texts):
        G.add_node(f"Doc_{i+1}")
        for j in range(i+1, len(texts)):
            similarity = sim_matrix[i, j]
            if similarity > 0.5:
                G.add_edge(f"Doc_{i+1}", f"Doc_{j+1}", weight=similarity)

    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G)
    nx.draw(G, pos, with_labels=True, node_size=700, node_color="skyblue")
    nx.draw_networkx_edge_labels(G, pos, edge_labels={(u, v): f"{d['weight']:.2f}" for u, v, d in G.edges(data=True)})
    plt.title("Intelligent Relationship Graph")
    plt.show()

# Main Workflow
def main_agent(initial_query):
    refined_query = refine_query(initial_query)
    pdf_urls = query_semantic_scholar(refined_query) + query_google_scholar(refined_query)
    download_pdfs(pdf_urls, DOCUMENTS_FOLDER)
    texts, embeddings = extract_texts_and_embed(DOCUMENTS_FOLDER)
    orchestrator_decision = orchestrator(refined_query, texts, embeddings)
    print(f"Orchestrator Decision: {orchestrator_decision}")
    visualize_relationships(texts)

# Example Execution
initial_query = "Tactics, Techniques, and Procedures in ML Security"
main_agent(initial_query)


In [None]:
import openai

# LLM-based query refinement
def refine_query_with_llm(initial_query):
    prompt = (
        "You're an AI specialized in cybersecurity and machine learning.\n"
        "Refine and expand the following query into a structured and detailed academic-style query "
        "that will retrieve research papers specifically about adversarial threats, vulnerabilities, "
        "and attacks throughout the ML lifecycle:\n\n"
        f"Initial Query: {initial_query}\n\n"
        "Refined Query:"
    )

    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.1,
        max_tokens=150
    )

    refined_query = response.choices[0].text.strip()
    return refined_query

# Example use
initial_query = "Tactics, Techniques, and Procedures in ML security"
refined_query = refine_query(initial_query)
print(f"LLM Refined Query: {refined_query}")


### Semantic Scholar search

In [None]:
# Complete Multi-Agent RAG System with Semantic Scholar Integration

import openai
import requests
import os
import json
from urllib.parse import urlencode
from pathlib import Path

# Configuration
SEMANTIC_SCHOLAR_API = "https://api.semanticscholar.org/graph/v1/paper/search"
DOCUMENTS_FOLDER = './ML_Threat_Documents/'

Path(DOCUMENTS_FOLDER).mkdir(parents=True, exist_ok=True)

# Step 1: Query Refinement via LLM

def refine_query(initial_query):
    prompt = (
        f"Refine the following query into a structured search query suitable for academic databases: '{initial_query}'"
    )
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.1,
        max_tokens=150,
    )
    return response.choices[0].text.strip()

# Semantic Scholar Query Agent
def query_semantic_scholar(refined_query, limit=5):
    params = {
        "query": refined_query,
        "limit": limit,
        "fields": "url,openAccessPdf"
    }
    response = requests.get(f"{SEMANTIC_SCHOLAR_API}?{urlencode(params)}")
    response.raise_for_status()
    data = response.json()
    urls = [paper["openAccessPdf"]["url"] for paper in data.get("data", []) if paper.get("openAccessPdf")]
    return urls

# PDF Download Agent
def download_pdfs(urls, folder):
    Path(folder).mkdir(parents=True, exist_ok=True)
    for url in urls:
        try:
            pdf_response = requests.get(url)
            pdf_response.raise_for_status()
            filename = os.path.join(folder, url.split('/')[-1])
            with open(filename, 'wb') as f:
                f.write(pdf_response.content)
            print(f"Downloaded and saved: {filename}")
        except Exception as e:
            print(f"Failed to download {url}: {e}")

# Main workflow
def main_agent(initial_query, folder=Path(DOCUMENTS_FOLDER), limit=5):
    refined_query = refine_query(initial_query)
    print(f"Refined query: {refined_query}")

    print("Querying Semantic Scholar...")
    pdf_urls = query_semantic_scholar(refined_query, limit=limit)

    print("Downloading PDFs...")
    Path(folder).mkdir(parents=True, exist_ok=True)
    download_pdfs(pdf_urls, folder)

# Example execution
initial_query = "TTPs for adversarial ML throughout the lifecycle"
DOCUMENTS_FOLDER = './ML_Threat_Documents/'
limit = 5
main_agent(initial_query)


### Google Scholar search

In [None]:
# Complete Multi-Agent RAG System with Google Scholar Integration

import openai
import requests
import os
from scholarly import scholarly
import urllib.request
from pathlib import Path

# Configuration
DOCUMENTS_FOLDER = './ML_Threat_Documents/'
Path(DOCUMENTS_FOLDER).mkdir(parents=True, exist_ok=True)

# LLM-based query refinement
def refine_query(initial_query):
    prompt = (
        f"Refine the following query into a structured search query suitable for academic databases: '{initial_query}'"
    )
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.1,
        max_tokens=100
    )
    return response.choices[0].text.strip()

# Query Google Scholar using the scholarly library
def query_google_scholar(refined_query, num_results=5):
    from scholarly import scholarly

    search_results = scholarly.search_pubs(refined_query)
    pdf_urls = []

    for _ in range(num_results):
        try:
            pub = next(search_results)
            if 'eprint_url' in pub:
                pdf_url = pub['eprint_url']
                if pdf_url.endswith('.pdf'):
                    pdf_url = pdf_url
                else:
                    pdf_url = pdf_url + '.pdf'
                pdf_urls.append(pdf_url)
            elif 'pub_url' in pub['bib']:
                pdf_urls.append(pub['bib']['pub_url'])
        except StopIteration:
            break
    return pdf_urls

# Download PDFs
def download_pdfs(pdf_urls, folder):
    for i, pdf_url in enumerate(pdf_urls, 1):
        try:
            response = requests.get(pdf_url)
            response.raise_for_status()
            filename = os.path.join(folder, f'document_{i+1}.pdf')
            with open(filename, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded and saved: {filename}")
        except Exception as e:
            print(f"Failed to download {pdf_url}: {e}")

# Main Workflow
def main_agent(initial_query, folder=DOCUMENTS_FOLDER):
    refined_query = refine_query(initial_query)
    print(f"Refined query: {refined_query}")

    print("Querying Google Scholar...")
    pdf_urls = query_google_scholar(refined_query)

    print("Downloading PDFs...")
    download_pdfs(pdf_urls, DOCUMENTS_FOLDER)

# Example usage
initial_query = "TTPs for adversarial ML"
main_agent(initial_query)


In [None]:
# Comprehensive Multi-Agent Advanced RAG System with Semantic & Google Scholar Integration

import openai
import requests
import os
from scholarly import scholarly
import pdfplumber
from sentence_transformers import SentenceTransformer, util
from lime.lime_text import LimeTextExplainer
import shap
from pathlib import Path

# Configuration
DOCUMENTS_FOLDER = './ML_Threat_Documents/'
Path(DOCUMENTS_FOLDER).mkdir(parents=True, exist_ok=True)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: Query Refinement via LLM

def refine_query(initial_query):
    prompt = (
        f"Refine the following query into a structured search query suitable for academic databases: '{initial_query}'"
    )
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.1,
        max_tokens=150
    )
    return response.choices[0].text.strip()

# Step 2: Query Semantic Scholar
def query_semantic_scholar(refined_query, limit=5):
    api_url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {"query": refined_query, "limit": limit, "fields": "url,openAccessPdf"}
    response = requests.get(api_url, params=params)
    response.raise_for_status()
    data = response.json()
    urls = [paper["openAccessPdf"]["url"] for paper in data.get("data", []) if paper.get("openAccessPdf")]
    return urls

# Step 3: Query Google Scholar
def query_google_scholar(refined_query, num_results=5):
    search_results = scholarly.search_pubs(refined_query)
    pdf_urls = []
    for _ in range(num_results):
        try:
            pub = next(search_results)
            if 'eprint_url' in pub:
                pdf_urls.append(pub['eprint_url'])
        except StopIteration:
            break
    return pdf_urls

# Step 4: Download PDFs
def download_pdfs(pdf_urls, folder):
    for i, pdf_url in enumerate(pdf_urls):
        try:
            response = requests.get(pdf_url)
            response.raise_for_status()
            filename = os.path.join(folder, f'document_{i+1}.pdf')
            with open(filename, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded: {filename}")
        except Exception as e:
            print(f"Failed to download {pdf_url}: {e}")

# Step 5: Extract and Embed PDFs
def extract_texts_and_embed(folder):
    texts = []
    for pdf_file in os.listdir(folder):
        with pdfplumber.open(os.path.join(folder, pdf_file)) as pdf:
            text = ''.join(page.extract_text() for page in pdf.pages if page.extract_text())
            texts.append(text)
    embeddings = model.encode(texts)
    return texts, embeddings

# Step 6: Document Ranking and Explainability
def rank_and_explain(query, texts, embeddings):
    query_embedding = model.encode([query])
    scores = util.cos_sim(query_embedding, embeddings)[0]
    ranked_indices = scores.argsort(descending=True)

    explainer = LimeTextExplainer()
    explanations = []
    for idx in ranked_indices[:3]:
        exp = explainer.explain_instance(texts[idx], lambda x: model.encode(x))
        explanations.append(exp.as_list())

    return [texts[i] for i in ranked_indices[:3]], explanations

# Main Workflow
def main_agent(initial_query):
    refined_query = refine_query(initial_query)
    pdf_urls = query_semantic_scholar(refined_query) + query_google_scholar(refined_query)
    download_pdfs(pdf_urls, DOCUMENTS_FOLDER)
    texts, embeddings = extract_texts_and_embed(DOCUMENTS_FOLDER)
    top_texts, explanations = rank_and_explain(refined_query, texts, embeddings)
    print("Top documents and explanations:")
    for text, explanation in zip(top_texts, explanations):
        print(text[:500])
        print("Explanation:", explanation)

# Example usage
initial_query = "TTPs for adversarial ML security"
main_agent(initial_query)


In [None]:
# Enhanced Multi-Agent RAG System with Graph Visualization

import openai
import requests
import os
import networkx as nx
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer, util
from lime.lime_text import LimeTextExplainer
from sklearn.metrics.pairwise import cosine_similarity
from scholarly import scholarly
import pdfplumber
from pathlib import Path

# Configuration
DOCUMENTS_FOLDER = './ML_Threat_Documents/'
Path(DOCUMENTS_FOLDER).mkdir(parents=True, exist_ok=True)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: Query Refinement via LLM
def refine_query(initial_query):
    refined_query = "((\"Tactics, Techniques, and Procedures in Machine Learning Security\") AND (\"adversarial attacks\" OR \"data poisoning\" OR \"evasion attack\" OR \"model tampering\" OR \"model inversion\" OR \"backdoor attack\" OR \"adversarial example\"))"
    return refined_query

# Step 2: Query Semantic Scholar
# (Implementation not provided here, assumed from previous examples)

# Step 3: Query Google Scholar
def query_google_scholar(refined_query, num_results=5):
    search_results = scholarly.search_pubs(refined_query)
    pdf_urls = []
    for _ in range(num_results):
        try:
            pub = next(search_results)
            if 'eprint_url' in pub:
                pdf_urls.append(pub['eprint_url'])
        except StopIteration:
            break
    return pdf_urls

# PDF Download Agent
def download_pdfs(pdf_urls, folder):
    for i, pdf_url in enumerate(pdf_urls):
        try:
            response = requests.get(pdf_url)
            response.raise_for_status()
            with open(Path(folder) / f'doc_{i}.pdf', 'wb') as f:
                f.write(response.content)
            print(f"Downloaded: doc_{i}.pdf")
        except Exception as e:
            print(f"Failed to download {pdf_url}: {e}")

# Extract texts and embed PDFs
model = SentenceTransformer('all-MiniLM-L6-v2')
def extract_texts_and_embed(folder):
    texts = []
    for pdf_file in os.listdir(folder):
        with pdfplumber.open(Path(folder) / pdf_file) as pdf:
            text = '\n'.join([page.extract_text() for page in pdf.pages if page.extract_text()])
            texts.append(text)
    embeddings = model.encode(texts)
    return texts, embeddings

# Rank and Explain Agent
def rank_and_explain(query, texts, embeddings):
    query_emb = model.encode([query])
    scores = util.cos_sim(query_embedding, embeddings)[0]
    ranked_indices = scores.argsort(descending=True)

    explainer = LimeTextExplainer()
    explanations = [explainer.explain_instance(texts[i], lambda x: model.encode(x), num_features=5).as_list() for i in ranked_indices[:3]]

    return [(texts[i], explanations[idx]) for idx, i in enumerate(ranked_indices[:3])]

# Graph Visualization Agent
def visualize_relationships(texts):
    entities = ['TTP', 'Vulnerability', 'ML Lifecycle']
    embeddings = model.encode(texts)
    sim_matrix = cosine_similarity(embeddings)
    graph = {}

    for i, doc in enumerate(texts):
        graph[f"Doc_{i}"] = {}
        for j, sim_score in enumerate(sim_scores):
            if i != j and sim_score > 0.5:
                graph[f"Doc_{i}"][f"Doc_{j}"] = sim_score

    G = nx.Graph()
    for doc, edges in graph.items():
        for target, weight in edges.items():
            G.add_edge(doc, target, weight=weight)

    plt.figure(figsize=(12, 8))
    pos = nx.spring_layout(G, k=0.3)
    nx.draw(G, pos, with_labels=True, node_color='skyblue', edge_color='gray')
    labels = nx.get_edge_attributes(G, 'weight')
    nx.draw_networkx_edge_labels(G, pos, edge_labels={k: f'{v:.2f}' for k, v in labels.items()})
    plt.title("Relationships Between TTPs, Vulnerabilities, and ML lifecycle stages")
    plt.show()

# Main Workflow
def main_agent(initial_query):
    refined_query = refine_query(initial_query)
    pdf_urls = query_semantic_scholar(refined_query) + query_google_scholar(refined_query)
    download_pdfs(pdf_urls, DOCUMENTS_FOLDER)
    texts, embeddings = extract_texts_and_embed(DOCUMENTS_FOLDER)
    top_docs_with_explanations = rank_and_explain(refined_query, texts, embeddings)

    print("Top Documents and Explanations:")
    for doc, explanation in top_docs_with_explanations:
        print(doc[:300])
        print("Explanation:", explanation)

    # Graph Visualization
    visualize_graph(texts, embeddings)

# Example Execution
initial_query = "TTPs in Machine Learning Security"
main_agent(initial_query)


In [None]:
# Complete Agentic RAG System Example

import openai
import faiss
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Mock Document store: represents simplified knowledge-base.
DOCUMENTS = [
    "Evasion attacks manipulate test samples at inference time to evade detection.",
    "Model extraction attacks use queries to steal ML models.",
    "Backdoor attacks poison ML training data with hidden triggers.",
    "Gradient masking defends ML models by obfuscating gradients.",
    "Parseval networks improve robustness by controlling the Lipschitz constant.",
    "Transferability attacks craft adversarial examples on surrogate models to attack unknown models.",
    "Membership inference attacks infer if data points belong to training sets.",
]

# Initialize TTP knowledge base vector embeddings (simplified)
vector_db = TfidfVectorizer().fit_transform(DOCUMENTS)

# Retriever Agent: fetches relevant information
def retrieve_docs(query, vector_db, documents, top_k=3):
    query_vec = TfidfVectorizer().fit(documents).transform([query])
    scores = cosine_similarity(query, vector_db).flatten()
    top_indices = scores.argsort()[::-1][:top_k]
    return [documents[i] for i in top_k_indices]

# Generative Reasoning Agent: makes sense of retrieved docs
def generative_reasoner(query, context_docs):
    prompt = f"Context: {' '.join(context_docs)}\n\nUser Query: {query}\n\nAnswer:"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.2,
        max_tokens=100
    )
    return response.choices[0].text.strip()

# Detection Agent: determines if query indicates an adversarial attack
def detect_attack(query):
    keywords = ["evasion", "extraction", "backdoor", "poison", "gradient masking"]
    return any(keyword in query.lower() for keyword in keywords)

# Orchestrator Agent: decides on response based on detection results
def orchestrator(query, vector_db, documents):
    if detect_threat := detect_attack(query):
        context_docs = retrieve_docs(query, vector_db, DOCUMENTS)
        analysis = generative_reasoner(query, context_docs=context_docs)
        decision = f"Threat detected! Taking security action. Details: {response}"
    else:
        decision = "No threats detected. No action needed."
    return decision

# Learner Agent: placeholder to demonstrate continual learning
def update_knowledge_base(new_doc, documents):
    documents.append(new_doc)
    return TfidfVectorizer().fit_transform(documents)

# Main workflow
def main_agent(query):
    print(f"Received query: {query}")
    docs = retrieve_docs(query, vector_db, DOCUMENTS)
    response = orchestrator(query, vector_db, DOCUMENTS)
    print(response)

# Example query (model extraction threat scenario)
query = "We observed numerous queries potentially indicative of model extraction."
main_response = orchestrator(query, vector_db, DOCUMENTS)
print(main_response)

In [None]:
# Complete Agentic RAG System for ML Security

import openai
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Knowledge Base: Security knowledge docs
documents = [
    "Evasion attacks manipulate inputs to evade detection by ML models.",
    "Model extraction attacks use queries to steal ML model information.",
    "Backdoor attacks poison ML training data with hidden triggers.",
    "Gradient masking defends ML models by obfuscating gradients.",
    "Parseval networks improve robustness by controlling the Lipschitz constant.",
    "Adversarial examples exploit vulnerabilities in ML models.",
    "Data poisoning introduces malicious data to corrupt ML training processes.",
    "Evasion attacks involve subtle perturbations to avoid detection at inference." 
}

# Vectorize documents for semantic retrieval
vectorizer = TfidfVectorizer()
vector_db = vectorizer.fit_transform(DOCUMENTS)

# Retriever Agent
def retrieve_docs(query, vectorizer, vector_db, documents, top_k=3):
    query_vec = vectorizer.transform([query])
    scores = cosine_similarity(query_vec, vector_db).flatten()
    top_indices = scores.argsort()[::-1][:top_k]
    return [documents[i] for i in top_k]

# Generative Reasoning Agent (LLM)
def generative_reasoning(query, context_docs):
    prompt = f"Context: {' '.join(context_docs)}\n\nUser Query: {query}\n\nAnswer:"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.2,
        max_tokens=150
    )
    return response.choices[0].text.strip()

# Detection Agent
def detect_attack(query):
    keywords = ["data poisoning", "evasion", "model extraction", "tampering", "gradient masking"]
    return any(keyword in query.lower() for keyword in keywords)

# Orchestrator Agent
def orchestrator(query, vectorizer, vector_db, documents):
    context_docs = retrieve_docs(query, vectorizer, vector_db, documents)
    if detect_attack(query):
        action = "Adversarial threat detected. Initiating defensive actions."
    else:
        action = "No immediate threat detected. Proceeding with standard operations."
    generated_response = Generative_Reasoning_Agent(query, context_docs)
    return f"{action}\n\nDetailed Analysis:\n{generated_response}"

# Query Refinement Agent (to adapt initial user query into detailed RAG queries)
def refine_query(initial_query):
    refined_query = (
        "(adversarial OR threat OR vulnerability) AND "
        "(machine learning security OR ML security OR AI security OR deep learning security) AND "
        "(data poisoning OR evasion attack OR model tampering)"
    )
    return refined_query

# Main Workflow Agent
def main_agent(initial_query):
    print(f"Received initial query: {initial_query}")
    detailed_query = refine_query(initial_query)
    response = orchestrator(detailed_query, vectorizer, vector_db, DOCUMENTS)
    print(response)

# Example Execution
initial_query = "TTPs for machine learning security threats"
main_agent(initial_query)


In [None]:
## TODO:

# Comprehensive Multi-Agent RAG System

import openai
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import requests
from pathlib import Path

# Document Repository
DOCUMENTS_FOLDER = './ML_Threat_Documents/'

# Initial Query Refinement via Retrieval Augmentation
def refine_query(initial_query):
    refined_query = (
        "(adversarial attack OR threat OR vulnerability OR advers*) AND "
        "(machine learning security OR ML security OR AI security OR deep learning security) AND "
        "(data poisoning OR evasion attack OR model tampering OR data extraction)"
    )
    return refined_query

# Document Retriever: Fetch and Store PDFs
def download_and_store_documents(query):
    # This function would realistically interface with a database/API (e.g., Google Scholar API)
    # Placeholder: simulating document retrieval and storage
    print(f"Documents retrieved for query: {query}")
    print(f"Storing documents in: {DOCUMENTS_FOLDER}")

# Document Processing & Vector Database Update
def process_documents(folder):
    documents = [
        "Evasion attacks manipulate test-time data to fool ML classifiers.",
        "Data poisoning affects training datasets to manipulate model training.",
        "Backdoor attacks introduce secret triggers into ML models.",
    ]
    vector_db = TfidfVectorizer().fit_transform(documents)
    return vector_db, documents

# Retriever Agent
def retrieve_docs(query, vector_db, documents, top_k=3):
    query_vec = TfidfVectorizer().fit(documents).transform([query])
    scores = cosine_similarity(query_vec, vector_db).flatten()
    top_indices = scores.argsort()[-top_k:][::-1]
    return [documents[i] for i in top_indices]

# Generative Reasoning Agent (GRA)
def generate_response(query, context_docs):
    prompt = f"Context: {' '.join(context_docs)}\n\nUser Query: {query}\n\nAnswer:"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.3,
        max_tokens=200
    )
    return response.choices[0].text

# Sentry/Detection Agent
def detect_attack(query):
    keywords = ["data poisoning", "evasion", "backdoor", "model extraction", "tampering", "attack"]
    return any(keyword in query.lower() for keyword in keywords)

# Orchestrator Agent
def orchestrator(query, vector_db, documents):
    if detect_attack(query):
        context_docs = retrieve_docs(query, vector_db, documents)
        answer = retrieve_and_generate(query, context_docs)
        print(f"Threat detected: {answer}")
    else:
        print("No threats detected.")

# Learner Agent
def update_knowledge_base(new_doc, documents):
    documents.append(new_doc)
    return TfidfVectorizer().fit_transform(documents)

# Main Workflow
def main_agent(initial_query):
    refined_query = refine_query(initial_query)
    download_docs(refined_query)
    vector_db, documents = process_docs(DOCUMENTS_FOLDER)
    response = orchestrator(refined_query, vector_db, documents)
    print(response)

# Example execution
initial_query = "TTPs in ML security"
main_agent(initial_query)


In [None]:
import os
import PyPDF2
from sentence_transformers import SentenceTransformer

# Function to read all PDFs from a folder
def read_pdfs_from_folder(folder_path):
    documents = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".pdf"):
            file_path = os.path.join(folder_path, filename)
            with open(file=file_path, mode='rb') as file:
                reader = PyPDF2.PdfReader(file)
                pdf_text = ""
                for page in range(len(reader.pages)):
                    pdf_page = reader.pages[page]
                    documents.append(pdf_page.extract_text())
    return documents

# Generate embeddings from documents
def generate_embeddings(documents):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(documents)
    return embeddings

if __name__ == "__main__":
    folder_path = "path/to/pdf/folder"  # Update with your PDFs folder path
    documents = read_pdfs_from_folder(folder_path)
    embeddings = generate_embeddings(documents)
    print("Embeddings generated for all PDF documents.")


In [None]:
import openai
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Document Embedding
def build_embeddings(docs):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(docs)
    return embeddings, docs

# Step 2: Retrieval Agent
def retrieve_context(query, embeddings, docs, top_k=3):
    query_vec = model.encode([query])[0]
    similarities = cosine_similarity([query_vec], embeddings)[0]
    indices = similarities.argsort()[-top_k:][::-1]
    return [docs[i] for i in indices]

# Step 3: Generative Reasoning Agent
def generate_response(query, context):
    context_text = "\n\n".join(context)
    completion = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an AI assistant helping identify and explain ML threats."},
            {"role": "user", "content": f"Query: {query}\n\nContext: {context}\n\nResponse:"}
        ]
    )
    return completion.choices[0].message.content

# Step 3: Detection Agent
def detect_threat(input_data, detection_model, threshold=0.7):
    risk_score = detection_model.predict_proba([input_data])[0].max()
    return risk_score < threshold

# Step 3: Decision Agent
def orchestrate_response(detection_flag, insights):
    if detection_flag:
        return f"🚨 Threat Detected! Recommended Response: {insights}"
    else:
        return "✅ Input appears safe. No action required."

# Main Execution Example
if __name__ == "__main__":
    documents = ["content from your PDFs goes here"]
    embeddings, docs = build_embeddings(documents)
    
    query = "Describe vulnerabilities related to model extraction."
    
    context_docs = retrieve_context(query, embeddings, documents, top_k=3)
    
    insights = generate_insight(query, context_docs=context)
    
    sample_input = [0.85]  # Example detection confidence
    detection_flag = detect_attack(sample_input, threshold=0.90)
    
    final_response = orchestrate_response(detection_flag, insights)
    print(final_response)



---

###  **Creating an End-to-End Integration:**
- Embed your provided PDF contents using sentence transformers and store them in vector databases like FAISS.
- Continuously integrate new research or data.
- Apply real-time monitoring and alerting through dashboards (Grafana, Prometheus).

This structured system allows effective utilizeation of retrieval-augmented generation in an agentic manner, proactively responding to ML security threats in real-time.


In [None]:
{
  "lifecycle_stages": {
    "Deployment": ["Evasion Attacks", "Model Extraction", "Side-Channel Attacks", "Sponge Attacks", "Graph-Based Threat Exploits", "HOUYI Prompt Injection Attacks", "QA-Prompt", "MASTERKEY Automated Jailbreaking", "Semantic Firewall Bypass (Self-Deception Attacks)", "Dynamic Role Hijacking Attacks", "LLM-System Exploits (LLMs)", "System Development", "LLM Deployment Exploitation"],
    "Test time": ["Evasion Attacks", "Side-Channel Attacks", "Sponge Attacks", "Universal Adversarial Texts (UATs)", "Gradient-Based NLP Adversarial Attacks", "HOUYI Prompt Injection Attacks", "Visual Adversarial Examples in Multimodal Models", "MASTERKEY Automated Jailbreaking", "Systematic Jailbreak Prompts", "Semantic Firewall Bypass (Self-Deception Attacks)", "RAIN Gradient-Based Obstinate Adversarial Attacks", "Dynamic Role Hijacking Attacks"],
    "Fine-tuning": ["Prompt Injection Attacks", "Universal Prompt Vulnerabilities", "Adversarial Alignment Challenges", "Parameter-Efficient NLP Vulnerabilities", "Visual Adversarial Examples in Multimodal Models", "Instruction-Tuning Dataset Errors (DONKII)", "Adversarial Alignment Challenges", "Universal Prompt Vulnerabilities"],
    "Training": ["Backdoor Attacks", "Poisoning Attacks", "Federated Learning Poisoning", "Vertical Federated Learning Vulnerabilities", "Insertion-Based Backdoor Attacks", "Imperceptible Backdoor Attacks", "Instruction-Tuning Dataset Errors (DONKII)", "Exploiting Machine Unlearning for Backdoor Attacks (BAU)", "DP-Forward Robust Training", "Adversarial Alignment Challenges"],
    "Test time": ["Evasion Attacks", "Side-Channel Attacks", "Adversarial Examples", "Sponge Attacks", "Universal Adversarial Texts (UATs)", "Gradient-Based NLP Adversarial Attacks", "Prompt Injection Attacks", "RAIN Gradient-Based Obstinate Adversarial Attacks"],
    "Federated Aggregation": ["Federated Learning Poisoning", "Vertical Federated Learning Vulnerabilities", "FedSecurity"],
    "Data Preparation": ["Poisoning Attacks", "Insertion-Based Backdoor Attacks", "Instruction-Tuning Dataset Errors (DONKII)", "Preprocessing"],
    "Inference": ["DP-Forward Robust Training", "Gradient-Based Obstinate Adversarial Attacks"],
    "Preprocessing": ["Insertion-Based Backdoor Attacks", "Instruction-Tuning Dataset Errors (DONKII)"],
    "Post-training": ["Exploiting Machine Unlearning for Backdoor Attacks (BAU)"],
    "Pretraining": ["Universal Prompt Vulnerabilities", "Insertion-Based Backdoor Attacks", "Imperceptible Backdoor Attacks", "Universal Prompt Vulnerabilities"],
    "Federated Aggregation": ["Federated Learning Poisoning", "Federated Learning Poisoning with Federated LLMs (FedSecurity)", "Vertical Federated Learning Vulnerabilities"],
    "System Modeling": ["Graph-Based Threat Exploits"],
    "System Development": ["LLM Prompt Injection Attacks", "LLM System Exploits (LLMsmith)", "Trust Exploitation in LLM"],
    "Data Collection": ["Instruction-Tuning Dataset Errors (DONKII)"]
},
"TTPs": {
    "Evasion Attacks": ["Deployment", "Test time"],
    "Model Extraction": ["Deployment", "Inference"],
    "Backdoor Attacks": ["Training", "Data Preparation"],
    "Membership Inference": ["Deployment", "Inference"],
    "Transferability Attacks": ["Test time"],
    "Poisoning Attacks": ["Training", "Data Preparation"],
    "Side-Channel Attacks": ["Deployment", "Test time"],
    "Universal Adversarial Texts": ["Test time"],
    "Federated Learning Poisoning": ["Training", "Federated Aggregation"]
},
"Vulnerabilities": [
    "Weak Adversarial Defenses", "Insufficient Monitoring", "Data Poisoning", "Unencrypted Model Parameters",
    "Privacy Violations", "Exposed Training Data", "Biased Data Sources", "Semantic Exploitation",
    "Resource Exhaustion", "Trigger Sensitivity", "Input Validation Gaps", "Model Overfitting",
    "Federated Model Poisoning", "Backdoor Exploits", "Model Overfitting", "Exposed Training Data"
],
"Critical_Vulnerabilities": [
    "Data Poisoning", "Exposed Training Data", "Privacy Violations", "Model Overfitting",
    "Federated Model Poisoning", "Backdoor Exploits"
]
}
