In [2]:
import pandas as pd
import numpy as np
import faiss
import pickle
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = pd.read_csv("Training Dataset.csv")

In [4]:
df = df.dropna().reset_index(drop=True)
print(f"✅ Loaded {len(df)} valid rows.")

✅ Loaded 480 valid rows.


In [5]:
def row_to_text(row):
    return (
        f"Applicant has a credit history of {row['Credit_History']}, "
        f"income of {row['ApplicantIncome']}, loan amount {row['LoanAmount']}, "
        f"education level is {row['Education']}, and loan status is {row['Loan_Status']}."
    )

texts = [row_to_text(row) for _, row in df.iterrows()]

In [6]:
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embed_model.encode(texts)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))

In [7]:
with open("texts.pkl", "wb") as f:
    pickle.dump(texts, f)
faiss.write_index(index, "faiss.index")

print("✅ FAISS index built with SentenceTransformer embeddings.")

✅ FAISS index built with SentenceTransformer embeddings.


In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
gen_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").to(device)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [9]:
def answer_query(query, top_k=5):
    # Embed the query
    query_vec = embed_model.encode([query])
    distances, indices = index.search(np.array(query_vec), top_k)

    # Retrieve top-k relevant context rows
    context = "\n".join([texts[i] for i in indices[0]])

    # Prepare prompt
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
    outputs = gen_model.generate(**inputs, max_new_tokens=100)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return answer

In [10]:
print("Ask your Loan Approval-related questions below:")
sample_questions = [
    "What factors lead to loan rejection?",
    "Does high income always mean loan approval?",
    "What happens if credit history is 0?",
    "Is graduate education preferred for loans?"
]

Ask your Loan Approval-related questions below:


In [11]:
for q in sample_questions:
    print(f"\n❓ {q}")
    print("💬", answer_query(q))


❓ What factors lead to loan rejection?
💬 education level is Not Graduate, and loan status is N

❓ Does high income always mean loan approval?
💬 high income always mean loan approval

❓ What happens if credit history is 0?
💬 No education level is Not Graduate

❓ Is graduate education preferred for loans?
💬 Not Graduate
