In [None]:
!pip install faiss-cpu langchain langchain-community hf_xet langchain-huggingface langchain_google_genai

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting hf_xet
  Downloading hf_xet-1.1.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (879 bytes)
Collecting langchain-huggingface
  Downloading langchain_huggingface-0.2.0-py3-none-any.whl.metadata (941 bytes)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.

In [None]:
import pickle
with open('/content/drive/MyDrive/Bertopic/Data/bs_emb.pkl', 'rb') as f:
    bs_emb = pickle.load(f)
with open('/content/drive/MyDrive/Bertopic/Data/bs_sen.pkl', 'rb') as f:
    bs_sen = pickle.load(f)
data_dir = '/content/drive/MyDrive/Sentences and Pickles'

In [None]:
with open('/content/drive/MyDrive/Sentences and Pickles/uc_sentences.pkl', 'rb') as f:
  uc_sen = pickle.load(f)
with open('/content/drive/MyDrive/Sentences and Pickles/uc_embedding.pkl', 'rb') as f:
  uc_emb = pickle.load(f)

In [None]:
# The sentences are wrapped as docs for reliable processing
# A more simplistic approach could be used, but this was found to cause errors sometimes
import faiss
import os
import numpy as np
from langchain.docstore import InMemoryDocstore
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document

faiss_dir = os.path.join(data_dir, "faiss")
os.makedirs(faiss_dir, exist_ok=True)

# Ensure float32 for FAISS
embeddings = np.array(bs_emb).astype("float32")

# Step 1: Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 2: Wrap sentences as Documents
docs = [Document(page_content=txt) for txt in bs_sen]
docstore = InMemoryDocstore(dict(enumerate(docs)))
index_to_docstore_id = dict(enumerate(range(len(docs))))

# Step 3: Set up embedding model again for retrieval later
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Step 4: Create LangChain FAISS object
faiss_store = FAISS(
    embedding_function=embedding_model,
    index=index,
    docstore=docstore,
    index_to_docstore_id=index_to_docstore_id
)

# Step 5: Save
faiss_store.save_local(os.path.join(faiss_dir, "index_a"))
print("✅ Successfully built and saved FAISS index_a.")

✅ Successfully built and saved FAISS index_a.


In [None]:
import faiss
import numpy as np
from langchain.docstore import InMemoryDocstore
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document

# Ensure float32 for FAISS
embeddings = np.array(uc_emb).astype("float32")

# Step 1: Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 2: Wrap sentences as Documents
docs = [Document(page_content=txt) for txt in uc_sen]
docstore = InMemoryDocstore(dict(enumerate(docs)))
index_to_docstore_id = dict(enumerate(range(len(docs))))

# Step 3: Set up embedding model again for retrieval later
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Step 4: Create LangChain FAISS object
faiss_store = FAISS(
    embedding_function=embedding_model,
    index=index,
    docstore=docstore,
    index_to_docstore_id=index_to_docstore_id
)

# Step 5: Save
faiss_store.save_local(os.path.join(faiss_dir, "index_b"))
print("✅ Successfully built and saved FAISS index_b.")

✅ Successfully built and saved FAISS index_b.


In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Load both FAISS indexes
faiss_a = FAISS.load_local(os.path.join(faiss_dir, "index_a"), embeddings=embedding_model, allow_dangerous_deserialization=True)
faiss_b = FAISS.load_local(os.path.join(faiss_dir, "index_b"), embeddings=embedding_model, allow_dangerous_deserialization=True)

retriever_a = faiss_a.as_retriever(search_kwargs={"k": 5})
retriever_b = faiss_b.as_retriever(search_kwargs={"k": 5})

# Sample query
query = "What is community-led welfare?"

# Retrieve and print results
print("\n🔴 Top 5 from index_a (Boot & Shoe):")
for i, doc in enumerate(retriever_a.get_relevant_documents(query)):
    print(f"{i+1}. {doc.page_content}")

print("\n🔵 Top 5 from index_b (Unite Community):")
for i, doc in enumerate(retriever_b.get_relevant_documents(query)):
    print(f"{i+1}. {doc.page_content}")



🔴 Top 5 from index_a (Boot & Shoe):


  for i, doc in enumerate(retriever_a.get_relevant_documents(query)):


1. ment of the supposed social programme of the government
2. welfare not only of the labour and socialist movement of to
3. their intention to help rather than to hinder the government
4. without separation from their mothers and that in the end the community is the gainer for the expenditure which it has under
5. especially for the section of the community whose income is the lowest

🔵 Top 5 from index_b (Unite Community):
1. in to support community members with welfare issues and helping to complete welfare forms for
2. organising rallies and offered welfare advice community provided a seminar with welfare advice to
3. welfare advice and representation is given through the network of trained community members and the community
4. welfare public meetings informing their community of the welfare cuts and legal aid changes that
5. direction here and now what is community wealth building


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
import os

# 🔧 Set your Gemini API key
os.environ["GOOGLE_API_KEY"] = api_key

# 🔍 Load the embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# 📦 Load your FAISS indexes
faiss_a = FAISS.load_local("index_a", embeddings=embedding_model, allow_dangerous_deserialization=True)
faiss_b = FAISS.load_local("index_b", embeddings=embedding_model, allow_dangerous_deserialization=True)

# 🔁 Set up retrievers
retriever_a = faiss_a.as_retriever(search_kwargs={"k": 5})
retriever_b = faiss_b.as_retriever(search_kwargs={"k": 5})

# 🤖 Gemini model
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)

# 🔗 Basic RAG chains
qa_chain_a = RetrievalQA.from_chain_type(llm=llm, retriever=retriever_a)
qa_chain_b = RetrievalQA.from_chain_type(llm=llm, retriever=retriever_b)

# 🎯 Sample query
query = "What are the key features of community-led welfare?"

# 💬 Responses from both datasets
print("\n🔴 Boot & Shoe (index_a) response:")
print(qa_chain_a.run(query))

print("\n🔵 Unite Community (index_b) response:")
print(qa_chain_b.run(query))



🔴 Boot & Shoe (index_a) response:


  print(qa_chain_a.run(query))


Based on the provided text snippets, here are some key features of community-led welfare:

*   **Focus on the General Welfare:** Community-led welfare prioritizes the well-being of the entire community.
*   **Investment in the Community:** It recognizes that investing in the community ultimately benefits everyone.
*   **Future-Oriented:** It acknowledges that the community's future welfare depends on the efficiency of its working.
*   **Better Conditions for All:** It aims to create improved conditions for the people as a whole.

🔵 Unite Community (index_b) response:
Based on the provided text, the key features of community-led welfare include:

*   **Empowering residents:** Giving residents the opportunity to own and benefit from local businesses and assets.
*   **Addressing community concerns:** Focusing on areas of political, social, and economic concern.
*   **Organizing community voices:** Supporting the ability of the community to organize and express their needs.


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import HumanMessage, SystemMessage
from langchain.prompts import ChatPromptTemplate
import pandas as pd
import os

# 🔧 Set your Gemini API key
os.environ["GOOGLE_API_KEY"] = api_key  # Make sure `api_key` is defined

# 🔍 Load the embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


# 🔁 Set up retrievers
retriever_a = faiss_a.as_retriever(search_kwargs={"k": 10})
retriever_b = faiss_b.as_retriever(search_kwargs={"k": 10})

# 🤖 Gemini model
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.7)

# 🧠 System prompts
system_prompt_a = """You are a 1920s trade union representative.
Use the retrieved sentences as your knowledge base.
Speak persuasively as if you are arguing with a fellow trade unionist.
Do not use your own knowledge.
Vary your sentence structures, do not repeat phrases.
Respond with 2 sentences maximum.
Do no use the names of any trade union members or name any geographic locations."""

system_prompt_b = """You are a 2020s trade union representative.
Use the retrieved sentences as your knowledge base.
Speak persuasively as if you are arguing with a fellow trade unionist.
Do not use your own knowledge.
Vary your sentence structures, do not repeat phrases.
Respond with 2 sentences maximum.
Do no use the names of any trade union members or name any geographic locations."""

# 📃 Store dialogue
speakers = []
responses = []

# 🔄 Dialogue generator
def generate_turn(query, retriever, system_prompt, speaker):
    docs = retriever.get_relevant_documents(query)
    content = "\n".join(doc.page_content for doc in docs)

    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", f"You just heard the following message:\n\n\"{query}\"\n\nHere are 5 excerpts from your documents that may help you reply:\n{content}\n\nRespond to the message above based ONLY on this information, and speak as if you were in a real conversation.")
    ])

    response = llm(prompt.format_messages())
    reply_text = response.content.strip().replace("\n", " ")
    print(f"\n{speaker}:\n{reply_text}\n{'-'*50}")
    speakers.append(speaker)
    responses.append(reply_text)
    return reply_text



🔴 Boot & Shoe Union (1920s):
Hold on a minute, friend!  While the Labour Party has many union members,  it's not a monolith; remember, even within the party, there are differing opinions.
--------------------------------------------------

🔵 Unite Community (2020s):
Look,  we need all Labour members unionised –  it's crucial for worker power, regardless of internal party views.  The Labour Party's own conference supports this, and we should push for it.
--------------------------------------------------

🔴 Boot & Shoe Union (1920s):
Listen,  a united front is precisely what we need,  as the Labour Party conference itself advocates.  We must rally all workers, regardless of their individual political leanings, to strengthen our collective bargaining power.
--------------------------------------------------

🔵 Unite Community (2020s):
Look,  we've already got a proven track record of working together to achieve real gains;  our unprecedented community membership shows we're building tha

In [None]:
# 🗣️ Start the dialogue
import random
sentence_index = range(0, len(bs_sen),1)
query = bs_sen[random.choice(sentence_index)]
for i in range(5):
    if i % 2 == 0:
        speaker = "🔴 Union (1920s)"
        query = generate_turn(query, retriever_a, system_prompt_a, speaker)
    else:
        speaker = "🔵 Union (2020s)"
        query = generate_turn(query, retriever_b, system_prompt_b, speaker)

# 💾 Save dialogue
dialogue_df = pd.DataFrame({'Speaker': speakers, 'Response': responses})
dialogue_df.to_csv("rag_dialogue_output.csv", index=False)


🔴 Boot & Shoe Union (1920s):
Listen here,  we need more than just provisions for sickness; our members deserve protection against poverty *during* sickness!  We must fight for comprehensive benefits, not just a meager handout.
--------------------------------------------------

🔵 Unite Community (2020s):
Absolutely, we need to go further than mere provisions;  our members deserve comprehensive benefits to combat poverty caused by illness, not just a paltry handout.  Let's fight for a fairer benefits system,  a national campaign to protect our safety net.
--------------------------------------------------

🔴 Boot & Shoe Union (1920s):
Brother,  we've been fighting for improved sickness benefits for years,  and our members need more than mere "paltry handouts."  A national campaign for comprehensive benefits is precisely what we should be pushing for.
--------------------------------------------------

🔵 Unite Community (2020s):
Look, we're already actively supporting members facing ben

In [None]:
dialogue_df.head(10)

Unnamed: 0,Speaker,Response
0,🔴 Boot & Shoe Union (1920s),"Hold on a minute, friend! While the Labour Pa..."
1,🔵 Unite Community (2020s),"Look, we need all Labour members unionised – ..."
2,🔴 Boot & Shoe Union (1920s),"Listen, a united front is precisely what we n..."
3,🔵 Unite Community (2020s),"Look, we've already got a proven track record..."
4,🔴 Boot & Shoe Union (1920s),"Nonsense, a united front means *all* workers,..."
5,🔵 Unite Community (2020s),"Look, we've already seen the power of Unite's..."
6,🔴 Boot & Shoe Union (1920s),"Rubbish! We need tangible unity, not just pic..."
7,🔵 Unite Community (2020s),"Look, tangible unity *is* building a nationwid..."
8,🔴 Boot & Shoe Union (1920s),Nonsense! Increased membership and influence ...
9,🔵 Unite Community (2020s),"Look, building a nationwide movement boosts *..."


In [None]:
print(dialogue_df.head(5).to_markdown())

|    | Speaker                      | Response                                                                                                                                                                                                                                                                               |
|---:|:-----------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|  0 | 🔴 Boot & Shoe Union (1920s) | Hold on a minute, friend!  While the Labour Party has many union members,  it's not a monolith; remember, even within the party, there are differing opinions.                                                                                                                         |
|  1 | 🔵 Unite Community (2020s)   | Look,  