In [3]:
#!pip install ollama nltk rouge-score

In [4]:
from ollama import Client
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer

# Initialize Ollama client
ollama = Client(host='http://localhost:11434')

In [5]:
# Example task â€” summarize log errors (realistic for devs/engineers)
context = """
2025-10-15 12:02:31 ERROR ConnectionTimeout: Database connection failed after 30s.
2025-10-15 12:02:32 INFO Retrying connection...
2025-10-15 12:02:35 ERROR AuthenticationFailed: Invalid DB credentials.
2025-10-15 12:02:40 INFO Shutting down pipeline gracefully.
"""
reference_summary = "Database connection failed due to timeout and authentication issues."

# Helper: Generate response
def ask_model(prompt):
    res = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': prompt}])
    return res['message']['content'].strip()

In [6]:
# -------------------- Prompt Variants ----------------------
#Zero-shot
zero_shot_prompt = f"Summarize the following server log in one sentence:\n{context}"
zero_shot_output = ask_model(zero_shot_prompt)

In [7]:
zero_shot_output

'The server log indicates that the database connection failed due to incorrect credentials, triggering a retry attempt, and the pipeline is shutting down gracefully as a result.'

In [None]:
#One-shot
one_shot_prompt = f"""
Example:
Log: "2025-10-14 08:01:10 ERROR APIError: Token expired."
Summary: API request failed due to expired token.

Now summarize the following:
{context}
"""
one_shot_output = ask_model(one_shot_prompt)

In [None]:
one_shot_output

In [None]:
#Few-shot
few_shot_prompt = f"""
Examples:
Log: "2025-10-14 08:01:10 ERROR APIError: Token expired."
Summary: API request failed due to expired token.
---
Log: "2025-10-12 22:10:05 ERROR DiskFull: Cannot write to /tmp."
Summary: Disk was full preventing file writes.
---
Now summarize the following:
{context}
"""
few_shot_output = ask_model(few_shot_prompt)

In [None]:
few_shot_output

In [None]:
# Chain-of-Thought (CoT)
cot_prompt = f"""
Let's reason step by step.
1. Identify key errors and their causes.
2. Summarize them concisely.

Logs:
{context}
"""
cot_output = ask_model(cot_prompt)

In [None]:
print(cot_output)

In [None]:
#Self-consistency: multiple reasoning samples averaged
import random

sc_outputs = []
for i in range(3):  # 3 reasoning paths
    sc_prompt = cot_prompt + f"\nReasoning attempt {i+1}:"
    sc_outputs.append(ask_model(sc_prompt))

# choose most frequent / best overlap summary (simple heuristic)
from collections import Counter
final_sc_output = Counter(sc_outputs).most_common(1)[0][0]

In [None]:
print(final_sc_output)

In [None]:
Counter(sc_outputs).most_common()[0][0]

In [None]:
print(sc_outputs)

In [None]:
sc_outputs

In [None]:
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from rouge_score import rouge_scorer

def evaluate(prediction, reference):
    # Tokenize
    reference_tokens = [reference.split()]
    prediction_tokens = prediction.split()
    
    # BLEU with smoother to prevent 0s
    try:
        bleu = corpus_bleu([reference_tokens], [prediction_tokens],
                           smoothing_function=SmoothingFunction().method1)
    except TypeError:
        bleu = 0.0  # fallback if NLTK bug appears

    # ROUGE
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge_scores = rouge.score(reference, prediction)
    rouge1 = rouge_scores['rouge1'].fmeasure
    rougeL = rouge_scores['rougeL'].fmeasure

    return bleu, rouge1, rougeL

In [None]:
# -------------------- Evaluate ------------------------------
print("=== Evaluation Results ===\n")
for label, output in [
    ("Zero-shot", zero_shot_output),
    ("One-shot", one_shot_output),
    ("Few-shot", few_shot_output),
    ("Chain-of-Thought", cot_output),
    ("Self-Consistency", final_sc_output)
]:
    bleu, rouge1, rougeL = evaluate(output, reference_summary)
    print(f"{label}:\nOutput: {output}\nBLEU={bleu:.3f}, ROUGE-1={rouge1:.3f}, ROUGE-L={rougeL:.3f}\n")

In [None]:
reference_comment = "Function to connect to database and return connection object."

prompt = """
# Write a Python function to connect to a database.
# Generate a one-line docstring describing its purpose clearly.

def connect_db():
    pass
"""

pred = ask_model(prompt)
bleu, rouge1, rougeL = evaluate(pred, reference_comment)
print("Model output:", pred)
print(f"BLEU={bleu:.3f}, ROUGE-1={rouge1:.3f}")

In [None]:
reference_sentence = "ETL job completed successfully for customer data pipeline."

prompt = """
Rephrase this ETL status message in the same wording style:
"ETL job completed successfully for customer data pipeline."
"""

pred = ask_model(prompt)
bleu, rouge1, rougeL = evaluate(pred, reference_sentence)
print(pred)
print(f"BLEU={bleu:.3f}, ROUGE-1={rouge1:.3f}")

In [None]:
#pip install langchain langchain-community chromadb sentence-transformers streamlit

In [2]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
print("Model loaded OK")

Model loaded OK


In [None]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader

loader = TextLoader("reports/fin_report.txt")
docs = loader.load()

embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# In-memory mode â€“ no persistence directory
vectordb = Chroma.from_documents(docs, embedding, persist_directory=None)
print("In-memory vector store created successfully!")

In [2]:
# rag_prepare.py
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader

# Load
loader = TextLoader("reports/fin_report.txt")
docs = loader.load()
'''
# Embed & store
embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(docs, embedding, persist_directory="db")
vectordb.persist()

print("Vector store ready")'''

from langchain_community.vectorstores import FAISS

embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(docs, embedding)
print("FAISS index created")

  embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


FAISS index created


In [3]:
vectordb.save_local("faiss_index")
print("FAISS vector store created and saved to ./faiss_index")

FAISS vector store created and saved to ./faiss_index


In [5]:
import pickle
d = {'a':10,'b':'del'}

In [9]:
ser

b'\x80\x04\x95\x15\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x01a\x94K\n\x8c\x01b\x94\x8c\x03del\x94u.'

In [7]:
ser=pickle.dumps(d)

In [8]:
pickle.loads(ser)

{'a': 10, 'b': 'del'}

In [None]:
FAISS.load_local

In [5]:
# rag_query.py
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
from langchain_community.vectorstores import Chroma

llm = Ollama(model="mistral")
#vectordb = Chroma(persist_directory="db")
vectordb = FAISS.load_local("faiss_index", embedding, allow_dangerous_deserialization=True)

retriever = vectordb.as_retriever(search_kwargs={"k": 3})

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

query = "Summarize the risk section of the report"
result = qa(query)
print(result["result"])

  llm = Ollama(model="mistral")
  result = qa(query)


 The risk section of the Q3 FY2025 report identifies four major risks that the company is currently facing:

1. Market Volatility: The derivatives desk may be affected by fluctuations in global interest rates and commodity prices.
2. Credit Risk: There's an increase in counterparty risk exposure within the corporate lending portfolio.
3. Operational Risk: The system migration to cloud infrastructure resulted in intermittent downtime in two regional data centers.
4. Regulatory Risk: Additional compliance reviews are being conducted by the Monetary Authority regarding anti-money laundering (AML) practices.


In [None]:
#pip install langchain_core.memory

In [None]:
#!python -m pip install --upgrade pip

In [None]:
# Ask a question
query = "Summarize the major risk factors mentioned in the report."
result = qa(query)

print("Query:", query)
print("Answer:\n", result["result"])

In [None]:
# ========================================
# Step 3 â€“ Interactive Streamlit App
# ========================================
with open("rag_faiss_app.py", "w", encoding="utf-8") as f:
    f.write("""
import streamlit as st
from langchain_community.llms import Ollama
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA

st.title("Financial Report Q&A (Ollama + FAISS + LangChain)")
query = st.text_input("Ask a question about the financial report:")

if st.button("Get Answer") and query:
    with st.spinner("Analyzing..."):
        embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        vectordb = FAISS.load_local("faiss_index", embedding, allow_dangerous_deserialization=True)
        retriever = vectordb.as_retriever(search_kwargs={"k": 3})
        llm = Ollama(model="mistral")
        qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
        answer = qa.run(query)
        st.write("### Answer:")
        st.write(answer)
""")

In [None]:
pwd

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import SentenceTransformerEmbeddings
import os

# Create embedding model
embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Load multiple text files
folder_path = "data"
docs = []
for file in os.listdir(folder_path):
    if file.endswith(".txt"):
        loader = TextLoader(os.path.join(folder_path, file))
        docs.extend(loader.load())

# Create FAISS index
vectordb = FAISS.from_documents(docs, embedding_model)
vectordb.save_local("faiss_index")
print(f"âœ… FAISS index created with {len(docs)} documents.")

In [None]:
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)

retriever = vectordb.as_retriever(search_kwargs={"k": 3})
llm = Ollama(model="mistral")

qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")

query = "What are the main risk factors discussed in the financial and credit documents?"
response = qa_chain.run(query)
print("ðŸ§© Response:\n", response)

In [None]:
results = vectordb.similarity_search(query, k=3)
for i, doc in enumerate(results):
    print(f"\nðŸ“˜ Document {i+1} source: {doc.metadata.get('source', 'unknown')}")
    print(doc.page_content[:250], "...")

In [None]:
queries = [
    "Summarize the market outlook from the reports.",
    "List all credit risk factors mentioned.",
    "How do financial forecasts differ across reports?"
]

for q in queries:
    print(f"\nðŸ’¬ Query: {q}")
    print("ðŸ§  Answer:", qa_chain.run(q))

In [None]:
new_loader = TextLoader("data/new_policy_doc.txt")
new_docs = new_loader.load()
new_vdb = FAISS.from_documents(new_docs, embedding_model)

# Merge with existing FAISS DB
vectordb.merge_from(new_vdb)
vectordb.save_local("faiss_index")