In [None]:
!pip install sentence_transformers textstat spacy -q
!python -m spacy download en_core_web_md --force


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Curate data set

In [78]:
# How to order credit card online 

ubs = ["""How do I apply for a Credit Card?""",
"""
On your computer
1. Log in to E-Banking

2. Navigate to Products and click the option Cards

<< Annotated Image of E-Banking Screenshot: Menu - cards. Shows where to click. >>
3. Choose the card that fulfills your needs and click Apply for it

<< Annotated Image of  of E-Banking Screenshot: Apply for card >>

Your card will be sent to you by mail.""", 'UBS', 
'https://www.ubs.com/ch/en/help/creditcard/order.html?campID=ubs_Smart_Search']


uob = ["""How do I apply for a Credit Card?""",
"""For new customers, you may submit your application online through our website at http://www.uob.com.sg/cards.

For existing customers, please log in to your UOB Personal Internet Banking / UOB TMRW to apply for a new card.""",
'UOB', 'https://www.uob.com.sg/personal/customer-service/credit-card.page'

]

dbs = ["""How do I apply for a Credit Card?""", 
"""Applying for a DBS Debit Card or Credit Card
Am I eligible for a debit card?

You must be at least 16 years old and have a POSB Savings Account, DBS Savings Plus Account, DBS Autosave Account or DBS Current Account. To open one of these accounts, click here.

If you are a foreigner, please apply for a debit card at any of our branches. You will need to present your passport and an employment pass that is valid for at least 6 months.


Am I eligible for a credit card?

If you're Singaporean or have permanent residency and you're over 21 years of age, you can apply for a DBS Credit Card. You'll need to earn at least S$30,000 a year. If you're a foreigner with a valid employment pass, you'll need to earn at least S$45,000 a year unless otherwise stated. For DBS Vantage Card, you'll need to earn S$120,000 annually.


What do I need to apply?

The type of document you’ll need varies. Click here for detailed list.
""",
"DBS", 'https://www.dbs.com.sg/personal/cards/cards-faqs.page']

bad_bank =  ["How do I apply for a Credit Card?",
             "The capital of France is Paris. Paris is the capital of France. Paris is known for its art and culture.",
             "bad_bank",
             None
             ]


COLS = ['question', 'answer', 'bank', 'url']
# Create individual DataFrames and combine them
df = pd.concat([pd.DataFrame(data, index=COLS).T for data in (ubs, dbs, uob, bad_bank)], ignore_index=True)
df

Unnamed: 0,question,answer,bank,url
0,How do I apply for a Credit Card?,\nOn your computer\n1. Log in to E-Banking\n\n...,UBS,https://www.ubs.com/ch/en/help/creditcard/orde...
1,How do I apply for a Credit Card?,Applying for a DBS Debit Card or Credit Card\n...,DBS,https://www.dbs.com.sg/personal/cards/cards-fa...
2,How do I apply for a Credit Card?,"For new customers, you may submit your applica...",UOB,https://www.uob.com.sg/personal/customer-servi...
3,How do I apply for a Credit Card?,The capital of France is Paris. Paris is the c...,bad_bank,


# Q&A Evaluative Metrics 

## Relevance

In [79]:

from sentence_transformers import SentenceTransformer, util


# Load a better pre-trained sentence transformer model
MODEL_NAME = "multi-qa-mpnet-base-dot-v1"
model = SentenceTransformer(MODEL_NAME)

def compute_relevance(query: str, faq_text: str) -> float:
    """
    Compute the relevance score between a query and FAQ text using cosine similarity.
    """
    # Encode the query and FAQ text into embeddings
    query_embedding = model.encode(query, convert_to_tensor=True)
    faq_embedding = model.encode(faq_text, convert_to_tensor=True)

    # Compute cosine similarity between the embeddings
    relevance_score = util.cos_sim(query_embedding, faq_embedding).item()
    return relevance_score

# Example usage
if __name__ == "__main__":
    query = "How to apply for a credit card?"
    faq_text = "You can apply for a credit card online by visiting our website and filling out the application form."

    relevance_score = compute_relevance(query, faq_text)
    print(f"Relevance Score: {relevance_score:.4f}")

Relevance Score: 0.8146


## Clarity

In [80]:

import textstat

# Function to compute clarity score using Flesch-Kincaid readability
def compute_clarity(text):
    clarity_score = textstat.flesch_reading_ease(text)
    clarity_score = clarity_score / 100  # Normalize to 0-1 scale
    return clarity_score


## Conciseness

In [81]:
import spacy
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load NLP models
nlp = spacy.load("en_core_web_md")
sbert_model = SentenceTransformer("all-MiniLM-L6-v2")  # Lightweight but powerful

def calculate_redundancy(text):
    """
    Calculate redundancy using SBERT embeddings and cosine similarity.
    Returns a score between 0 and 1 (1 = highly redundant).
    """
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]

    if len(sentences) < 2:
        return 0.0  # No redundancy if only one sentence

    # Convert sentences into SBERT embeddings
    embeddings = sbert_model.encode(sentences, convert_to_tensor=True)

    # Compute cosine similarity matrix
    similarity_matrix = cosine_similarity(embeddings.cpu().numpy())

    # Compute average similarity between different sentences
    redundancy_score = np.mean([
        similarity_matrix[i, j]
        for i in range(len(sentences)) for j in range(len(sentences)) if i != j
    ])

    return redundancy_score

def calculate_semantic_density(text):
    """
    Calculate semantic density: proportion of meaningful words.
    Weighted scoring gives more importance to Nouns & Verbs.
    """
    doc = nlp(text)
    meaningful_words = [token for token in doc if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"] and not token.is_stop]
    
    # Assign higher weights to key content words
    weights = {"NOUN": 1.2, "VERB": 1.5, "ADJ": 1.0, "ADV": 0.8}
    weighted_count = sum(weights[token.pos_] for token in meaningful_words)

    semantic_density = weighted_count / len(doc) if len(doc) > 0 else 0.0
    return semantic_density

def calculate_conciseness_score(text, weights: tuple = (0.3, 0.5, 0.2)) -> float:
    """
    Combine redundancy, semantic density, and length penalty into a conciseness metric.
    """
    # Normalize redundancy (lower is better, so we invert it)
    normalized_redundancy = 1 - calculate_redundancy(text)

    # Add a length penalty (longer text reduces conciseness)
    length_penalty = max(0, 1 - (len(text.split()) / 100))  # Penalizes texts >100 words

    # Weighted combination
    w_r, w_s, w_l = weights
    conciseness_score = (
        w_r * normalized_redundancy + 
        w_s * calculate_semantic_density(text) +
        w_l * length_penalty
    )

    return conciseness_score


In [82]:
import spacy
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load spaCy model
nlp = spacy.load("en_core_web_md")

def calculate_redundancy(text):
    """Compute redundancy using SBERT embeddings and cosine similarity."""
    sentences = [sent.text.strip() for sent in nlp(text).sents if sent.text.strip()]
    if len(sentences) < 2:
        return 0.0

    # Load SBERT model only when needed
    sbert = SentenceTransformer("all-MiniLM-L6-v2")
    embeddings = sbert.encode(sentences, convert_to_tensor=True).cpu().numpy()
    similarity_matrix = cosine_similarity(embeddings)
    
    return np.mean([similarity_matrix[i, j] for i in range(len(sentences)) for j in range(len(sentences)) if i != j])

def calculate_semantic_density(text):
    """Compute proportion of meaningful words, giving higher weight to key parts of speech."""
    doc = nlp(text)
    weights = {"NOUN": 1.2, "VERB": 1.5, "ADJ": 1.0, "ADV": 0.8}
    
    weighted_count = sum(weights.get(token.pos_, 0) for token in doc if not token.is_stop)
    return weighted_count / len(doc) if doc else 0.0

def calculate_conciseness_score(text, weights=(0.3, 0.5, 0.2)):
    """Combine redundancy, semantic density, and length penalty into a conciseness score."""
    w_r, w_s, w_l = weights
    length_penalty = max(0, 1 - (len(text.split()) / 100))  # Penalizes overly long text
    
    return w_r * (1 - calculate_redundancy(text)) + w_s * calculate_semantic_density(text) + w_l * length_penalty


In [83]:
# Function to compute combined score
def compute_combined_score(relevance, clarity, conciseness, weights):
    combined_score = (
        weights["relevance"] * relevance +
        weights["clarity"] * clarity +
        weights["conciseness"] * conciseness
    )
    return combined_score

# Define weights for each criterion
weights = {"relevance": 0.6, "clarity": 0.3, "conciseness": 0.1}

# User query
user_query = "How do I apply for a Credit Card?"

# Evaluate each Q&A pair in the DataFrame
df["relevance_score"] = df.apply(lambda row: compute_relevance(user_query, f"{row['question']} {row['answer']}"), axis=1)
df["clarity_score"] = df["answer"].apply(compute_clarity)
df["conciseness_score"] = df["answer"].apply(calculate_conciseness_score)
df["combined_score"] = df.apply(
    lambda row: compute_combined_score(
        row["relevance_score"], row["clarity_score"], row["conciseness_score"], weights
    ), axis=1
)

# Rank Q&A pairs by combined score
df = df.sort_values(by="combined_score", ascending=False).reset_index(drop=True)
score_cols = df.filter(regex='score').columns
df[score_cols] = df[score_cols].round(2)
df.drop(columns='url')

Unnamed: 0,question,answer,bank,relevance_score,clarity_score,conciseness_score,combined_score
0,How do I apply for a Credit Card?,\nOn your computer\n1. Log in to E-Banking\n\n...,UBS,0.79,0.76,0.47,0.75
1,How do I apply for a Credit Card?,Applying for a DBS Debit Card or Credit Card\n...,DBS,0.57,0.75,0.38,0.61
2,How do I apply for a Credit Card?,"For new customers, you may submit your applica...",UOB,0.69,0.46,0.42,0.59
3,How do I apply for a Credit Card?,The capital of France is Paris. Paris is the c...,bad_bank,0.43,0.82,0.36,0.54
