In [None]:
import pandas as pd
df=pd.read_csv("/content/ConvoProject_CustomMadeDataset.csv")

In [None]:
print(df.head())
print(df.columns)
print(df.shape)

                                        Scheme name      Category  \
0  PM-JANMAN (Pradhan Mantri Janjati Adivasi Nyay...        Rural   
1           Pradhan Mantri Fasal Bima Yojana (PMFBY)  Agriculture   
2                   PM KISAN SAMMAN NIDHI (PM-KISAN)  Agriculture   
3              Agriculture Infrastructure Fund (AIF)  Agriculture   
4                                      AgriSURE Fund  Agriculture   

                                         description Unnamed: 3  
0  PM-JANMAN (Pradhan Mantri Janjati Adivasi Nyay...        NaN  
1  Third largest insurance scheme globally that s...        NaN  
2  A Central Sector scheme providing income suppo...        NaN  
3  A medium-long term debt financing facility for...        NaN  
4  A Category-II Alternative Investment Fund prov...        NaN  
Index(['Scheme name ', 'Category', 'description', 'Unnamed: 3'], dtype='object')
(126, 4)


In [None]:
df.columns=(
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
)

print(df.columns)


Index(['scheme_name', 'category', 'description', 'unnamed:_3'], dtype='object')


In [None]:
import re

def clean_text(text):
    text=str(text).lower()
    text=re.sub(r'\[[0-9]+\]', '', text)
    text=re.sub(r'\n|\t', ' ', text)
    text=re.sub(r'\s+', ' ', text)
    text=re.sub(r'[^\x00-\x7F]+', ' ', text)
    return text.strip()

df["description"]=df["description"].apply(clean_text)
df["scheme_name"]=df["scheme_name"].str.strip()


In [None]:
print(df.shape)
df.sample(5)


(126, 4)


Unnamed: 0,scheme_name,category,description,unnamed:_3
104,Rashtriya Gram Swaraj Abhiyan(RGSA ),Rural,rashtriya gram swaraj abhiyan(rgsa national vi...,
28,Swayam Prabha,Education,swayam prabha cs moe 2017 education a group of...,
106,Lakhpati Didi Scheme,Rural,empowers rural women in self-help groups to ea...,
47,National Pension System 2004,Finance,national pension system mof 2004 pension con...,
9,Krishonnati Yojana CSS MoAFW 2017 Agriculture ...,Agriculture,krishonnati yojana css moafw 2017 agriculture ...,


In [None]:
df["description"]=df["description"].str.lower()
df["category"]=df["category"].str.lower()
df.head()

Unnamed: 0,scheme_name,category,description,unnamed:_3
0,PM-JANMAN (Pradhan Mantri Janjati Adivasi Nyay...,rural,pm-janman (pradhan mantri janjati adivasi nyay...,
1,Pradhan Mantri Fasal Bima Yojana (PMFBY),agriculture,third largest insurance scheme globally that s...,
2,PM KISAN SAMMAN NIDHI (PM-KISAN),agriculture,a central sector scheme providing income suppo...,
3,Agriculture Infrastructure Fund (AIF),agriculture,a medium-long term debt financing facility for...,
4,AgriSURE Fund,agriculture,a category-ii alternative investment fund prov...,


In [None]:
df=df.drop(columns=["unnamed:_3"])


In [None]:
df.to_csv("Final_Govt_Schemes_Dataset.csv", index=False)


In [None]:
!pip install -q --no-cache-dir faiss-cpu sentence-transformers pandas

import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import os

DATA_PATH="Final_Govt_Schemes_Dataset.csv"

if not os.path.exists(DATA_PATH):
    print("Database not found.")
else:
    df_schemes=pd.read_csv(DATA_PATH)

    print(f"Loaded {len(df_schemes)} schemes.")
    print(df_schemes.columns)

    print("Loading SBERT Model (all-MiniLM-L6-v2)...")
    model=SentenceTransformer("all-MiniLM-L6-v2")

    scheme_texts=df_schemes["description"].astype(str).tolist()

    print("Generating embeddings...")
    embeddings=model.encode(
        scheme_texts,
        show_progress_bar=True,
        normalize_embeddings=True
    )
    embeddings=embeddings.astype("float32")

    print(f"Embeddings shape: {embeddings.shape}")
    dimension=embeddings.shape[1]
    index=faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    faiss.write_index(index, "schemes_faiss.index")

    df_schemes.to_pickle("schemes_data.pkl")
    with open("scheme_texts.pkl", "wb") as f:
        pickle.dump(scheme_texts, f)

    print("FAISS index and data saved successfully ")


In [None]:
import faiss
import pandas as pd
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
index=faiss.read_index("schemes_faiss.index")

df=pd.read_pickle("schemes_data.pkl")
df.columns=df.columns.str.strip().str.lower().str.replace(" ", "_")
with open("scheme_texts.pkl", "rb") as f:
    scheme_texts=pickle.load(f)
model=SentenceTransformer("all-MiniLM-L6-v2")
print("System Ready ")


In [None]:
def search_schemes(user_query, top_k=3):
    query_vector=model.encode(
        [user_query],
        normalize_embeddings=True
    ).astype("float32")
    distances, indices=index.search(query_vector, top_k)

    results=[]
    for idx in indices[0]:
        results.append({
            "scheme_name": df.iloc[idx]["scheme_name"],
            "category": df.iloc[idx]["category"],
            "description": df.iloc[idx]["description"]
        })

    return results


In [None]:
results_1=search_schemes("I need money for my crops and farming tools")
results_2=search_schemes("I want to start a new company and need a loan")
results_3=search_schemes("My father is sick and needs hospital treatment")
print(results_1)
print(results_2)
print(results_3)


[{'scheme_name': 'Agriculture Infrastructure Fund (AIF)', 'category': 'agriculture', 'description': 'a medium-long term debt financing facility for investment in post-harvest management infrastructure and community farming assets.'}, {'scheme_name': 'Credit Guarantee Scheme for e-NWR', 'category': 'agriculture', 'description': 'helps farmers access post-harvest loans by leveraging electronic warehouse receipts as collateral to avoid distress selling.'}, {'scheme_name': 'Lakhpati Didi Scheme', 'category': 'rural ', 'description': 'empowers rural women in self-help groups to earn a sustainable income of at least 1 lakh per annum.'}]
[{'scheme_name': 'Pradhan Mantri Mudra Yojana (PMMY)', 'category': 'finance', 'description': 'provides loans up to 20 lakh (tarun plus) to non-corporate, non-farm small and micro enterprises.'}, {'scheme_name': 'PM SVANidhi', 'category': 'urban development', 'description': 'provides collateral-free working capital loans of 10,000 to 50,000 to street vendors t

In [None]:
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
import faiss
import pandas as pd
import pickle
import numpy as np
from IPython.display import display, Markdown
import os
os.environ["GOOGLE_API_KEY"]="Key hidden"


In [None]:
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

model_name=None
for m in genai.list_models():
    if "generateContent" in m.supported_generation_methods and "gemini" in m.name:
        model_name=m.name
        break
gemini_model=genai.GenerativeModel(model_name or "gemini-pro")
print(f"Using Gemini model: {model_name}")


In [None]:
embedding_model=SentenceTransformer("all-MiniLM-L6-v2")
index=faiss.read_index("schemes_faiss.index")
df=pd.read_pickle("schemes_data.pkl")
df.columns=df.columns.str.strip().str.lower().str.replace(" ", "_")
print("Database loaded successfully ")


In [None]:
def search_schemes(user_query, top_k=3):
    query_vector=embedding_model.encode(
        [user_query],
        normalize_embeddings=True
    ).astype("float32")

    distances, indices=index.search(query_vector, top_k)

    results=[]
    for idx in indices[0]:
        row=df.iloc[idx]
        results.append(
            f"""
Scheme Name: {row['scheme_name']}
Category: {row['category']}
Description: {row['description']}
"""
        )
    return results


In [None]:
def generate_rag_response(user_query, retrieved_context):
    prompt=f"""
You are a Government Scheme Consultant.
Answer STRICTLY using the provided data only.

SCHEME DATA:
{retrieved_context}

USER QUESTION:
{user_query}

RULES:
- Do NOT invent schemes.
- Mention exact scheme name.
- Mention exact monetary benefits if present.
- If eligibility is unclear, say "Maybe" and explain why.

FORMAT:
- **Scheme Identified**
- **Benefit Amount**
- **Eligibility Verdict**
- **Reasoning**
- **Key Benefits**
"""
    response=gemini_model.generate_content(prompt)
    return response.text


In [None]:
def run_test_case(query):
    print(f"\nUser Query:{query}")
    schemes=search_schemes(query, top_k=3)
    context="\n".join(schemes)
    answer=generate_rag_response(query, context)
    display(Markdown(answer))


In [None]:
run_test_case("I am a farmer with 2 acres of land. Can I get financial help?")



User Query: I am a farmer with 2 acres of land. Can I get financial help?


**Scheme Identified: PM KISAN SAMMAN NIDHI (PM-KISAN)**
*   **Benefit Amount**: 6,000 per year in three equal installments.
*   **Eligibility Verdict**: Yes.
*   **Reasoning**: This scheme provides income support to all land-holding farmer families. As a farmer with 2 acres of land, you fall under the category of a land-holding farmer family.
*   **Key Benefits**: Income support to land-holding farmer families.

---

**Scheme Identified: Agriculture Infrastructure Fund (AIF)**
*   **Benefit Amount**: Not specified; it is a medium-long term debt financing facility.
*   **Eligibility Verdict**: Maybe.
*   **Reasoning**: The scheme is a debt financing facility for investment in post-harvest management infrastructure and community farming assets. While you are a farmer, the provided data does not explicitly state that individual farmers with 2 acres are directly eligible for this debt financing, nor does it specify the exact eligibility criteria related to the nature of the applicant or their landholding for general "financial help." Eligibility would depend on your specific plans to invest in post-harvest management infrastructure or community farming assets.
*   **Key Benefits**: Medium-long term debt financing for investment in post-harvest management infrastructure and community farming assets.

In [None]:
def evaluate_response(user_query, bot_response, expected_fact):
    judge_prompt=f"""
You are an impartial evaluator.
QUESTION:
{user_query}
EXPECTED FACT (exact number or equivalent wording allowed):
{expected_fact}
CHATBOT RESPONSE:
{bot_response}

RULES:
- Expected fact must be EXPLICITLY present.
- Vague mentions are NOT acceptable.

OUTPUT FORMAT (STRICT):
Score: <1-5>
Reason: <one sentence>
"""
    return gemini_model.generate_content(judge_prompt).text


In [None]:
def extract_score(judge_output):
    match=re.search(r"Score:\s*([1-5])", judge_output)
    return int(match.group(1)) if match else None


In [None]:
import pandas as pd
import time
import re
from google.api_core import exceptions
test_dataset=[
    {
        "category": "Housing",
        "question": "I do not have a pukka house and I want financial help to build one.",
        "expected_scheme": "PMAY",
        "expected_key_fact": "subsidy"
    },
    {
        "category": "Education",
        "question": "I am a meritorious student from a poor family looking for a scholarship.",
        "expected_scheme": "Scholarship",
        "expected_key_fact": "financial assistance"
    },
    {
        "category": "Pension",
        "question": "I am an unorganized worker concerned about income after old age.",
        "expected_scheme": "Atal Pension",
        "expected_key_fact": "pension"
    },
    {
        "category": "Business",
        "question": "I need a small loan of 50,000 rupees to start a shop.",
        "expected_scheme": "MUDRA",
        "expected_key_fact": "collateral"
    },
    {
        "category": "Health",
        "question": "My family needs insurance for hospitalization expenses up to 5 lakh.",
        "expected_scheme": "Ayushman Bharat",
        "expected_key_fact": "5 lakh"
    }
]
def safe_generate_content(model, prompt, retries=3):
    for attempt in range(retries):
        try:
            response=model.generate_content(prompt)
            return response.text
        except exceptions.TooManyRequests:
            print(f"  Quota hit! Waiting 30s to cool down...")
            time.sleep(30)
        except Exception as e:
            return f"Error: {e}"
    return "Error: Failed"
results=[]
print(f"Running evaluation on {len(test_dataset)} diverse test cases...\n")

for i, test in enumerate(test_dataset):
    cat=test["category"]
    q=test["question"]
    fact=test["expected_key_fact"]

    print(f"Test #{i+1} [{cat}]: '{q}'")

    schemes=search_schemes(q, top_k=3)
    context="\n\n".join(schemes)
    bot_prompt=f"""
    You are a Government Scheme Consultant.
    SCHEME DATA: {context}
    USER QUESTION: {q}
    INSTRUCTIONS:
    - Identify the most relevant scheme.
    - Mention specific benefits (numbers, amounts).
    - If no exact scheme matches, suggest the closest one.
    """
    bot_ans=safe_generate_content(gemini_model, bot_prompt)

    time.sleep(5)
    judge_prompt=f"""
    You are an impartial Judge.

    USER QUERY: "{q}"
    EXPECTED KEYWORD/FACT: "{fact}"
    BOT RESPONSE: "{bot_ans}"

    TASK:
    1. Did the bot mention the Expected Keyword (or a synonym)?
    2. Did it provide a helpful answer?

    OUTPUT FORMAT:
    Score: [1-5]
    Reason: [Short explanation]
    """
    judge_output=safe_generate_content(gemini_model, judge_prompt)
    match=re.search(r"Score:\s*([1-5])", judge_output)
    score=int(match.group(1)) if match else 0

    print(f"  -> Score: {score}/5")

    results.append({
        "Category": cat,
        "Question": q,
        "Numeric Score": score,
        "Judge Output": judge_output
    })
    print("   (Cooling down 15s...)\n")
    time.sleep(15)
print("\n" + "="*50)
print("FINAL REPORT CARD")
print("="*50)

df_results=pd.DataFrame(results)
display(df_results[["Category", "Question", "Numeric Score", "Judge Output"]])
valid_scores=df_results["Numeric Score"]
if not valid_scores.empty:
    avg=valid_scores.mean()
    print(f"\n Average System Score: {avg:.2f} / 5.0")
else:
    print("\n No valid scores.")

Running evaluation on 5 diverse test cases...

Test #1 [Housing]: 'I do not have a pukka house and I want financial help to build one.'
   -> Score: 4/5
   (Cooling down 15s...)

Test #2 [Education]: 'I am a meritorious student from a poor family looking for a scholarship.'
   -> Score: 5/5
   (Cooling down 15s...)

Test #3 [Pension]: 'I am an unorganized worker concerned about income after old age.'
   -> Score: 5/5
   (Cooling down 15s...)

Test #4 [Business]: 'I need a small loan of 50,000 rupees to start a shop.'
   -> Score: 4/5
   (Cooling down 15s...)

Test #5 [Health]: 'My family needs insurance for hospitalization expenses up to 5 lakh.'
   -> Score: 5/5
   (Cooling down 15s...)


FINAL REPORT CARD


Unnamed: 0,Category,Question,Numeric Score,Judge Output
0,Housing,I do not have a pukka house and I want financi...,4,Score: 4\nReason: The bot provided a very help...
1,Education,I am a meritorious student from a poor family ...,5,Score: 5\nReason: The bot provided a highly he...
2,Pension,I am an unorganized worker concerned about inc...,5,Score: 5\nReason: The bot explicitly mentioned...
3,Business,"I need a small loan of 50,000 rupees to start ...",4,Score: 4\nReason: The bot did not explicitly m...
4,Health,My family needs insurance for hospitalization ...,5,Score: 5\nReason: The bot correctly identified...



âœ… Average System Score: 4.60 / 5.0


In [None]:

!pip install -q gradio
import gradio as gr

def scheme_chat_logic(user_message, history):
    if not user_message:
        return "Please type a question."
    try:
        schemes=search_schemes(user_message, top_k=3)

        if not schemes:
            return "I couldn't find any relevant schemes in the database. Please try asking about farming, health, or business loans."

        context_block="\n\n".join(schemes)


        prompt=f"""
        You are a friendly Government Scheme Advisor.

        USER QUESTION: "{user_message}"

        OFFICIAL SCHEME DATA:
        {context_block}

        INSTRUCTIONS:
        - Answer the user's question directly using the provided data.
        - Use bullet points for key benefits.
        - If the user asks something irrelevant, politely decline.
        - Be encouraging and clear.
        """
        response=gemini_model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"System Error: {e}"
demo=gr.ChatInterface(
    fn=scheme_chat_logic,
    title="ðŸ‡®ðŸ‡³ YojanaSetu AI Assistant",
    description="Ask about PM-KISAN, Student Scholarships, Startup Loans, and more.",
    examples=[
        "I am a farmer with 2 acres. What benefits can I get?",
        "I want to start a new business. Any loans available?",
        "Are there any schemes for girl child education?",
        "I need a house loan, I am from a low income group."
    ],
    theme="soft"
)
print("Starting the App... Click the link below!")
demo.launch(share=True,debug=True)

  self.chatbot = Chatbot(


Starting the App... Click the link below!
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://80c2f78eb84e031f0a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
