In [42]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer, util
from ollama import chat
from ollama import ChatResponse
import re
import tiktoken

# Load the embedding model
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
em_model = SentenceTransformer(embedding_model, device="cuda")  # Use GPU for encoding

# Function to load FAISS index
def load_faiss_index(index_file):
    """Loads the FAISS index from a file."""
    index = faiss.read_index(index_file)
    return index

# Function to perform retrieval
def retrieve_top_k(query, index, metadata, top_k=5):
    """
    Retrieve top-k similar documents for a given query.

    Args:
        query (str): The query string.
        index (faiss.Index): The FAISS index.
        metadata (list[dict]): Metadata corresponding to the embeddings in the index.
        top_k (int): Number of top results to retrieve.

    Returns:
        list[dict]: Top-k metadata entries with similarity scores.
    """
    # Encode the query into an embedding
    query_embedding = em_model.encode([query], convert_to_numpy=True)

    # Search the FAISS index
    distances, indices = index.search(query_embedding, top_k)

    # Collect results
    results = []
    for i, idx in enumerate(indices[0]):
        if idx == -1:  # FAISS returns -1 for missing indices
            continue
        results.append({
            "score": distances[0][i],
            **metadata[idx]
        })
    return results

def truncate_text(text, max_tokens=2000, model_name="cl100k_base"):
    """
    Truncates text from the bottom to fit within a token limit.

    Args:
        text (str): The text to truncate.
        max_tokens (int): The maximum number of tokens allowed.
        model_name (str): The name of the model for tokenization (default: cl100k_base).

    Returns:
        str: The truncated text.
    """
    enc = tiktoken.get_encoding(model_name)
    tokens = enc.encode(text)

    if len(tokens) <= max_tokens:
        return text  # No truncation needed

    truncated_tokens = tokens[:max_tokens]
    truncated_text = enc.decode(truncated_tokens)
    print(f"Truncated from {len(text)} to {len(truncated_text)}.")
    return truncated_text

def get_llm_passage(text, topic):
  sys_prompt = f"""You are asked to generate a passage based on the topic provided by the user.
  Use the documents provided by the user as much as possible and only generate content relevant to the topic.
  Make sure the response is less than 300 words."""

  user_prompt = f"""Topic: {topic}
  Documents: {text}"""

  response: ChatResponse = chat(model='llama3.1:8b-instruct-q8_0', messages=[
    {
      'role': 'system',
      'content': sys_prompt
    },
    {
      'role': 'user',
      'content': user_prompt
    },
  ])
  return response['message']['content']

def get_sentiment_score(text, topic):

  # passage = get_llm_passage(text, topic)

  prompt = f"""Please rate the sentiment towards {topic} expressed in the text provided
                on a scale of -5 to +5, where -5 is extremely negative, 0 is neutral, and
                +5 is extremely positive. If the passage is not relevant to the topic, return 0.
                Your response should only include the rating and no other text.
                """

  response: ChatResponse = chat(model='llama3.1:8b-instruct-q8_0', messages=[
    # {
    #   'role': 'system',
    #   'content': prompt
    # },
    {
      'role': 'user',
      'content': prompt+text
    },
  ])
  rating = response['message']['content']
  # return rating
  match = re.search(r'-?\d', rating)
  if match:
    return int(match.group())
  else:
    return 0

def get_relevant_passages(query, index, metadata, top_k=5):
  top_k_results = retrieve_top_k(query, index, metadata, top_k)

  rel_chuncks = []
  # sort based on score
  top_k_results = sorted(top_k_results, key=lambda x: x['score'], reverse=True)

  return top_k_results

In [4]:
import json
# Paths to the index and metadata
faiss_index_file = "/content/drive/MyDrive/RAG_Poisoning/poisoned_embeddings/wikiasp_embeddings_2.faiss"
metadata_file = "/content/drive/MyDrive/RAG_Poisoning/poisoned_embeddings/metadata_2.json"

normal_faiss_index_file = "/content/drive/MyDrive/RAG_Poisoning/embeddings/wikiasp_embeddings.faiss"
normal_metadata_file = "/content/drive/MyDrive/RAG_Poisoning/embeddings/metadata.json"

passages_file = "/content/drive/MyDrive/RAG_Poisoning/passages_2.json"

# Load FAISS index and metadata
poison_index = load_faiss_index(faiss_index_file)
normal_index = load_faiss_index(normal_faiss_index_file)

with open(metadata_file, "r") as f:
    poison_metadata = json.load(f)

with open(normal_metadata_file, "r") as f:
    normal_metadata = json.load(f)

with open (passages_file, "r") as f:
    passages = json.load(f)

In [5]:
def similarity_analysis(passages):
  # Encode the passages into embeddings
  embeddings = em_model.encode(passages, convert_to_numpy=True)

  # Compute the cosine similarity matrix
  cosine_sim_matrix = util.cos_sim(embeddings, embeddings).numpy()

  # Calculate overall similarity metrics
  # Mean similarity (excluding diagonal)
  upper_triangle_indices = np.triu_indices_from(cosine_sim_matrix, k=1)
  mean_similarity = np.mean(cosine_sim_matrix[upper_triangle_indices])

  # Minimum and maximum similarity
  min_similarity = np.min(cosine_sim_matrix[upper_triangle_indices])
  max_similarity = np.max(cosine_sim_matrix[upper_triangle_indices])

  print(f"\nMean Similarity: {mean_similarity:.2f}")
  print(f"Minimum Similarity: {min_similarity:.2f}")
  print(f"Maximum Similarity: {max_similarity:.2f}")

  # Identify passages above a similarity threshold
  threshold = 0.8
  similar_passages = []
  similar_passages_indices = []

  for i in range(len(passages)):
      for j in range(len(passages)):
          if i!=j and cosine_sim_matrix[i, j] > threshold:
              similar_passages.append((passages[i], passages[j], cosine_sim_matrix[i, j]))
              similar_passages_indices.append(i)

  if similar_passages:
      similar_passages_indices = list(set(similar_passages_indices))
      print(f"{len(similar_passages_indices)} passages above similarity threshold ({threshold})")

  else:
      print(f"\nNo passages found with similarity above {threshold}.")

  return similar_passages_indices

In [6]:
# Function to analyze sentiment distribution and detect outliers
def analyze_sentiment(sentiments, z_threshold=1):
    """
    Analyze sentiment distribution and detect outliers.

    Args:
        sentiments (list of float): List of sentiment scores.
        z_threshold (float): Z-score threshold for identifying outliers.

    Returns:
        baseline (float): Mean sentiment score.
        outliers (list of tuple): Indices and scores of outliers.
    """
    mean_sentiment = np.mean(sentiments)
    std_dev_sentiment = np.std(sentiments)

    # Calculate z-scores
    z_scores = [(i, (s - mean_sentiment) / std_dev_sentiment) for i, s in enumerate(sentiments)]

    # Detect outliers
    outliers = [(i, sentiments[i]) for i, z in z_scores if abs(z) > z_threshold]

    return mean_sentiment, outliers

def sentiment_distribution(passages, similar_passages_indices):
  passage_sentiments = []
  for passage in passages:
    passage_sentiments.append(get_sentiment_score(passage, query))

  baseline_sentiment, outliers = analyze_sentiment(passage_sentiments)

  poisoned_passages_indices = []
  for i in range(len(passages)):
    if i in similar_passages_indices and outliers:
      poisoned_passages_indices.append(i)

  return poisoned_passages_indices


In [7]:
def confusion_matrix(y_true, y_pred):

  tp = len(set(poisoned_passages_indices).intersection(set(similar_passages_indices)))
  fp = len(set(poisoned_passages_indices).difference(set(similar_passages_indices)))
  fn = len(set(similar_passages_indices).difference(set(poisoned_passages_indices)))
  tn = len(set(passages).difference(set(poisoned_passages_indices).union(set(similar_passages_indices))))

  print(f"True Positives: {tp}")
  print(f"False Positives: {fp}")
  print(f"False Negatives: {fn}")
  print(f"True Negatives: {tn}")

  confusion_matrix = np.array([[tp, fp], [fn, tn]])

  return confusion_matrix


In [36]:
def find_actual_poisoned_pasages(passages):
  y_true = []
  for i in range(len(passages)):
    num_commas = passages[i].count(',')
    num_words = len(passages[i].split())
    if (num_commas < 45) and (num_words < 550):
      y_true.append(1)
    else:
      y_true.append(0)
  return y_true


In [32]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

query = "Obama's Healthcare Reforms"
retrieved_docs = get_relevant_passages(query, poison_index, poison_metadata, top_k = 25)

passages = []
for doc in retrieved_docs:
  passages.append(doc["chunk"])

similar_passages_indices = similarity_analysis(passages)
poisoned_passages_indices = sentiment_distribution(passages, similar_passages_indices)


Mean Similarity: 0.69
Minimum Similarity: 0.55
Maximum Similarity: 0.93
12 passages above similarity threshold (0.8)


In [33]:
# Convert lists to binary format (0 or 1) based on whether an index is present
y_true = find_actual_poisoned_pasages(passages)
y_pred = [1 if i in poisoned_passages_indices else 0 for i in range(len(passages))]

cf_matrix = confusion_matrix(poisoned_passages_indices, similar_passages_indices)

print(f"Confusion Matrix:\n{cf_matrix}")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F1 Score: {f1_score(y_true, y_pred)}")

True Positives: 12
False Positives: 0
False Negatives: 0
True Negatives: 25
Confusion Matrix:
[[12  0]
 [ 0 25]]
Accuracy: 0.92
Precision: 0.8333333333333334
Recall: 1.0
F1 Score: 0.9090909090909091


In [34]:
print(y_true)
print(y_pred)

[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1]


In [38]:
topics_file = "/content/drive/MyDrive/RAG_Poisoning/topics.json"

with open(topics_file, "r") as f:
    topics = json.load(f)
    topics = topics["topics"]

In [43]:
total_y_true = []
total_y_pred = []
for topic in topics:
  query = topic['topic']
  retrieved_docs = get_relevant_passages(query, poison_index, poison_metadata, top_k = 25)

  passages = []
  for doc in retrieved_docs:
    passages.append(doc["chunk"])

  print(f"Procesing: {query}")
  similar_passages_indices = similarity_analysis(passages)
  poisoned_passages_indices = sentiment_distribution(passages, similar_passages_indices)

  y_true = find_actual_poisoned_pasages(passages)
  y_pred = [1 if i in poisoned_passages_indices else 0 for i in range(len(passages))]

  total_y_true.extend(y_true)
  total_y_pred.extend(y_pred)

Procesing: Brexit and its Impact on the European Union

Mean Similarity: 0.70
Minimum Similarity: 0.50
Maximum Similarity: 0.95
13 passages above similarity threshold (0.8)
Procesing: Angela Merkel's Leadership during the Eurozone Crisis

Mean Similarity: 0.76
Minimum Similarity: 0.56
Maximum Similarity: 0.96
20 passages above similarity threshold (0.8)
Procesing: Vladimir Putin's Foreign Policy in the Middle East

Mean Similarity: 0.71
Minimum Similarity: 0.44
Maximum Similarity: 0.95
11 passages above similarity threshold (0.8)
Procesing: The Paris Agreement and Climate Change Policies

Mean Similarity: 0.71
Minimum Similarity: 0.51
Maximum Similarity: 0.93
12 passages above similarity threshold (0.8)
Procesing: Xi Jinping's Economic Policies and Reforms

Mean Similarity: 0.63
Minimum Similarity: 0.45
Maximum Similarity: 0.93
8 passages above similarity threshold (0.8)
Procesing: India's Healthcare Policies under Narendra Modi

Mean Similarity: 0.53
Minimum Similarity: 0.10
Maximum S

In [44]:
print(f"Accuracy: {accuracy_score(total_y_true, total_y_pred)}")
print(f"Precision: {precision_score(total_y_true, total_y_pred)}")
print(f"Recall: {recall_score(total_y_true, total_y_pred)}")
print(f"F1 Score: {f1_score(total_y_true, total_y_pred)}")

Accuracy: 0.8347826086956521
Precision: 0.8077922077922078
Recall: 0.9775784753363229
F1 Score: 0.8845922787193974
