In [None]:
pip install kagglehub[pandas-datasets] langchain faiss-cpu sentence-transformers transformers
pip install -U langchain-community
pip install -U sentence-transformers transformers torch sympy
pip install kagglehub
# Wiki data
import kagglehub
from kagglehub import KaggleDatasetAdapter
# Set the path
file_path = "test.csv"
df = kagglehub.load_dataset(
KaggleDatasetAdapter.PANDAS,
"thedevastator/wikipedia-biographies-text-generation-dataset",
file_path,)
df50 = df.head(50)
# Convert to document langchain
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

docs = [
    Document(page_content=row["input_text"], metadata={"name": row.get("name", str(i))})
    for i, row in df50.iterrows()
]

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_docs = splitter.split_documents(docs)
# Victor db with embeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(split_docs, embedding_model)                                        # TAKES A LOT OF TIME
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Load llm from hugging face (local)
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline

model_id = "mistralai/Mistral-7B-Instruct-v0.1"  # Or any instruct model

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=128)
llm = HuggingFacePipeline(pipeline=pipe)
# Prompt
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

template = """
You are a security assistant trained to create creative passwords.
You will be given a short person description. First, you will receive a few similar profiles.
Then, you must create a strong password that combines:
- important traits from the original description
- references to similar people
- use a mix of symbols, digits, and capitalization

Original description:
"{query}"

Similar people:
{context}

Now generate a password that encodes the theme.
Only return the password, nothing else.
"""

prompt = PromptTemplate(input_variables=["query", "context"], template=template)
rag_chain = LLMChain(llm=llm, prompt=prompt)
# Password generation func
def generate_contextual_password(description: str) -> str:
    similar_docs = retriever.get_relevant_documents(description)
    context = "\n\n".join(doc.page_content for doc in similar_docs)
    return rag_chain.run({"query": description, "context": context})
# Test
if __name__ == "__main__":
    desc = "A female british works in computer science"
    password = generate_contextual_password(desc)
    print("Generated Password:", password)