#CHatbot

In [None]:
# Install necessary packages
!pip install spacy transformers scikit-learn pandas openpyxl thefuzz --quiet

import pandas as pd
import spacy
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from thefuzz import fuzz, process  # Using thefuzz instead of fuzzywuzzy (Updated)

# Load the dataset
df = pd.read_excel('/content/indian_farmer_schemes_large.xlsx')  # Kaggle path to input file

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Step 1: Preprocessing Scheme Data
def normalize_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
    return text

df['Scheme Name Normalized'] = df['Scheme Name'].apply(normalize_text)
df['Combined Text Normalized'] = (df['Scheme Name Normalized'] + ' ' + df['Objective'].fillna('').apply(normalize_text))

# Step 2: TF-IDF Vectorization for Matching User Queries to Dataset
vectorizer = TfidfVectorizer().fit(df['Combined Text Normalized'])
scheme_vectors = vectorizer.transform(df['Combined Text Normalized'])

# Step 3: Matching User Queries Using TF-IDF Cosine Similarity and Fuzzy Matching
def match_query_to_schemes(user_input):
    user_input_normalized = normalize_text(user_input)
    user_vector = vectorizer.transform([user_input_normalized])
    cosine_similarities = cosine_similarity(user_vector, scheme_vectors).flatten()

    # Use fuzzy matching to enhance matching results
    fuzzy_matches = process.extract(user_input_normalized, df['Scheme Name Normalized'], scorer=fuzz.token_sort_ratio, limit=5)
    fuzzy_matching_indices = [df[df['Scheme Name Normalized'] == match[0]].index[0] for match in fuzzy_matches if match[1] > 75]

    # Get all matching schemes above a certain threshold
    threshold = 0.3  # Set a higher threshold to limit irrelevant results
    matching_indices = np.where(cosine_similarities > threshold)[0]

    # Combine cosine similarity and fuzzy matching indices
    combined_indices = list(set(matching_indices).union(set(fuzzy_matching_indices)))
    if len(combined_indices) > 0:
        best_match_index = combined_indices[0]  # Select only the best match
        return df.iloc[[best_match_index]]
    return None

# Step 4: Extract Intent with Improved Approach
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

labels = [
    "financial_support",
    "benefits",
    "eligibility",
    "limitations",
    "application_process",
    "small_talk",
    "farming_advice",
    "objective"
]

def extract_intent(user_input):
    # Adding simple keyword-based identification for small talk and schemes
    small_talk_keywords = ["hello", "hi", "how are you", "greetings", "tell me something interesting", "what is your name"]
    farming_advice_keywords = [
        "how to", "what's the best", "best practices", "how to improve", "advice",
        "recommendation", "how can I", "suggest", "guide"
    ]

    if any(keyword in user_input.lower() for keyword in small_talk_keywords):
        return "small_talk"
    elif any(keyword in user_input.lower() for keyword in farming_advice_keywords):
        return "farming_advice"

    # Default to classifier for specific scheme-related intents
    result = classifier(user_input, labels)
    return result['labels'][0]

# Step 5: Retrieve Information from Dataset Based on Intent
def get_scheme_details(matching_schemes, intent):
    if matching_schemes is not None and not matching_schemes.empty:
        scheme_info = matching_schemes.iloc[0]
        response = f"Scheme Name: {scheme_info['Scheme Name']}\n"
        response += f"Objective: {scheme_info['Objective'] if not pd.isna(scheme_info['Objective']) else 'Information not available'}\n"

        if intent == "financial_support":
            response += f"Financial Support: {scheme_info.get('Benefits', 'Information not available')}\n"
        elif intent == "benefits":
            response += f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
        elif intent == "eligibility":
            response += f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
        elif intent == "limitations":
            response += f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
        elif intent == "application_process":
            response += f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
        else:
            response += (
                f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
                f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
                f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
                f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
            )

        return response.strip()

    return "I couldn't find details about the scheme you mentioned. Please provide more specific information."

# Step 6: Formal Small Talk Handling
def handle_small_talk(user_input):
    """Provide formal and simple responses for small talk."""
    small_talk_responses = {
        "hello": "Hello! How can I assist you today?",
        "hi": "Hi there! How may I help you?",
        "how are you": "I am doing great! What about you?",
        "greetings": "Greetings! Please let me know how I can assist you.",
        "good morning": "Good morning! How can I assist you today?",
        "good afternoon": "Good afternoon! How can I assist you today?",
        "good evening": "Good evening! How can I help you?",
        "what is your name": "I am your assistant chatbot, here to help you with information.",
        "tell me something interesting": "Did you know there are numerous government schemes to support farmers in India? Ask me about them!"
    }
    return small_talk_responses.get(user_input.lower(), "Hello! How can I assist you?")

# Step 7: Formal Farming Advice Handling
def handle_farming_advice(user_input):
    """Provide formal responses for farming advice."""
    farming_advice_responses = {
        "how to improve soil health": "Use organic compost and practice crop rotation to maintain healthy soil.",
        "how to reduce soil erosion": "Consider using cover crops and building terraces to reduce soil erosion.",
        "how to save water in farming": "Use drip irrigation systems to conserve water effectively.",
        "best practices for pest control": "Consider using natural predators or organic pesticides to manage pests.",
        "how to increase crop yield": "Implement crop rotation and use quality seeds to increase crop yield.",
        # Add more responses as needed...
    }
    return farming_advice_responses.get(user_input.lower(), "I'm still learning about that topic. Please ask me about another farming practice.")

# Step 8: Chatbot Logic
def get_response(user_input):
    # Determine the user's intent
    intent = extract_intent(user_input)

    # Handle different intents
    if intent == "small_talk":
        return handle_small_talk(user_input)
    elif intent == "farming_advice":
        return handle_farming_advice(user_input)
    else:
        matching_schemes = match_query_to_schemes(user_input)
        return get_scheme_details(matching_schemes, intent)

# Step 9: Interactive Chat Function for Colab Notebook
def chat_with_bot():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        try:
            response = get_response(user_input)
            print(f"Chatbot: {response}")
        except Exception as e:
            print(f"Error occurred: {e}")

# Start the interactive chat
if __name__ == "__main__":
    chat_with_bot()


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/3.1 MB[0m [31m16.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.1/3.1 MB[0m [31m55.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m38.5 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Chatbot is ready! Type 'exit' to end the conversation.
You: Soil Health Card Scheme
Chatbot: Scheme Name: Soil Health Card Scheme
Objective: Promote soil health and fertility
Benefits: Low-interest credit for agricultural needs
You: exit
Chatbot: Goodbye!


#Converting it into models

In [None]:
# Install necessary packages
!pip install spacy transformers scikit-learn pandas openpyxl thefuzz joblib --quiet

import pandas as pd
import spacy
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from thefuzz import fuzz, process
import joblib

# Step 1: Create the Chatbot Model as a Class
class FarmerChatbot:
    def __init__(self):
        # Load the dataset
        self.df = pd.read_excel('/content/indian_farmer_schemes_large.xlsx')  # Path to input file

        # Load SpaCy model
        self.nlp = spacy.load("en_core_web_sm")

        # Load Transformers pipeline
        self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

        # Preprocess the dataset
        self.preprocess_dataset()

        # Train the TF-IDF vectorizer and save the vectors
        self.vectorizer = TfidfVectorizer().fit(self.df['Combined Text Normalized'])
        self.scheme_vectors = self.vectorizer.transform(self.df['Combined Text Normalized'])

    def preprocess_dataset(self):
        def normalize_text(text):
            text = text.lower()
            text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
            text = re.sub(r'\s+', ' ', text).strip()
            return text

        self.df['Scheme Name Normalized'] = self.df['Scheme Name'].apply(normalize_text)
        self.df['Combined Text Normalized'] = (
            self.df['Scheme Name Normalized'] + ' ' +
            self.df['Objective'].fillna('').apply(normalize_text)
        )

    def match_query_to_schemes(self, user_input):
        user_input_normalized = self.normalize_text(user_input)
        user_vector = self.vectorizer.transform([user_input_normalized])
        cosine_similarities = cosine_similarity(user_vector, self.scheme_vectors).flatten()

        # Fuzzy matching
        fuzzy_matches = process.extract(user_input_normalized, self.df['Scheme Name Normalized'], scorer=fuzz.token_sort_ratio, limit=5)
        fuzzy_matching_indices = [
            self.df[self.df['Scheme Name Normalized'] == match[0]].index[0]
            for match in fuzzy_matches if match[1] > 75
        ]

        # Combine cosine similarity and fuzzy matching indices
        threshold = 0.3
        matching_indices = np.where(cosine_similarities > threshold)[0]
        combined_indices = list(set(matching_indices).union(set(fuzzy_matching_indices)))

        if len(combined_indices) > 0:
            best_match_index = combined_indices[0]
            return self.df.iloc[[best_match_index]]

        return None

    def extract_intent(self, user_input):
        small_talk_keywords = ["hello", "hi", "how are you", "greetings", "tell me something interesting", "what is your name"]
        farming_advice_keywords = [
            "how to", "what's the best", "best practices", "how to improve", "advice",
            "recommendation", "how can I", "suggest", "guide"
        ]

        if any(keyword in user_input.lower() for keyword in small_talk_keywords):
            return "small_talk"
        elif any(keyword in user_input.lower() for keyword in farming_advice_keywords):
            return "farming_advice"

        labels = [
            "financial_support",
            "benefits",
            "eligibility",
            "limitations",
            "application_process",
            "small_talk",
            "farming_advice",
            "objective"
        ]
        result = self.classifier(user_input, labels)
        return result['labels'][0]

    def get_response(self, user_input):
        intent = self.extract_intent(user_input)

        if intent == "small_talk":
            return self.handle_small_talk(user_input)
        elif intent == "farming_advice":
            return self.handle_farming_advice(user_input)
        else:
            matching_schemes = self.match_query_to_schemes(user_input)
            return self.get_scheme_details(matching_schemes, intent)

    def normalize_text(self, text):
        text = text.lower()
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text

    def get_scheme_details(self, matching_schemes, intent):
        if matching_schemes is not None and not matching_schemes.empty:
            scheme_info = matching_schemes.iloc[0]
            response = f"Scheme Name: {scheme_info['Scheme Name']}\n"
            response += f"Objective: {scheme_info['Objective'] if not pd.isna(scheme_info['Objective']) else 'Information not available'}\n"

            if intent == "financial_support":
                response += f"Financial Support: {scheme_info.get('Benefits', 'Information not available')}\n"
            elif intent == "benefits":
                response += f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
            elif intent == "eligibility":
                response += f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
            elif intent == "limitations":
                response += f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
            elif intent == "application_process":
                response += f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
            else:
                response += (
                    f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
                    f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
                    f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
                    f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
                )
            return response.strip()

        return "I couldn't find details about the scheme you mentioned. Please provide more specific information."

    def handle_small_talk(self, user_input):
        small_talk_responses = {
            "hello": "Hello! How can I assist you today?",
            "hi": "Hi there! How may I help you?",
            "how are you": "I am doing great! What about you?",
            "greetings": "Greetings! Please let me know how I can assist you.",
            "good morning": "Good morning! How can I assist you today?",
            "good afternoon": "Good afternoon! How can I assist you today?",
            "good evening": "Good evening! How can I help you?",
            "what is your name": "I am your assistant chatbot, here to help you with information.",
            "tell me something interesting": "Did you know there are numerous government schemes to support farmers in India? Ask me about them!"
        }
        return small_talk_responses.get(user_input.lower(), "Hello! How can I assist you?")

    def handle_farming_advice(self, user_input):
        farming_advice_responses = {
            "how to improve soil health": "Use organic compost and practice crop rotation to maintain healthy soil.",
            "how to reduce soil erosion": "Consider using cover crops and building terraces to reduce soil erosion.",
            "how to save water in farming": "Use drip irrigation systems to conserve water effectively.",
            "best practices for pest control": "Consider using natural predators or organic pesticides to manage pests.",
            "how to increase crop yield": "Implement crop rotation and use quality seeds to increase crop yield.",
        }
        return farming_advice_responses.get(user_input.lower(), "I'm still learning about that topic. Please ask me about another farming practice.")

# Save the chatbot model and required artifacts
chatbot = FarmerChatbot()
joblib.dump(chatbot.vectorizer, 'tfidf_vectorizer.pkl')
joblib.dump(chatbot.scheme_vectors, 'scheme_vectors.pkl')
joblib.dump(chatbot, 'farmer_chatbot.pkl')


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


['farmer_chatbot.pkl']

Testing

In [None]:
# Install necessary packages
!pip install spacy transformers scikit-learn pandas openpyxl thefuzz joblib --quiet

import joblib

# Step 1: Load the Saved Artifacts
vectorizer = joblib.load('tfidf_vectorizer.pkl')
scheme_vectors = joblib.load('scheme_vectors.pkl')
chatbot = joblib.load('farmer_chatbot.pkl')

# Update the chatbot instance with loaded artifacts
chatbot.vectorizer = vectorizer
chatbot.scheme_vectors = scheme_vectors

# Step 2: Test Function to Interact with Chatbot
def chat_with_bot():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        try:
            response = chatbot.get_response(user_input)
            print(f"Chatbot: {response}")
        except Exception as e:
            print(f"Error occurred: {e}")

# Step 3: Start the Interactive Chat for Testing
if __name__ == "__main__":
    chat_with_bot()


Chatbot is ready! Type 'exit' to end the conversation.
You: exit
Chatbot: Goodbye!


#Model info

In [None]:
# --------------------------------------------
# Chatbot Model Development Overview Comments
# --------------------------------------------

# 1. Necessary Installations:
# Install the following Python packages that are required to create, save, and use the chatbot:
# - pandas: To work with dataset and data frames.
# - spacy: For natural language processing.
# - transformers: For the zero-shot classification model.
# - scikit-learn: For TF-IDF vectorization and cosine similarity.
# - openpyxl: For reading Excel files.
# - thefuzz: For fuzzy string matching (replacement for fuzzywuzzy).
# - joblib: For saving and loading Python objects like models and vectors.
#
# Installation Command:
# !pip install spacy transformers scikit-learn pandas openpyxl thefuzz joblib --quiet

# 2. Dataset Preparation:
# Load the dataset using pandas. Make sure the dataset contains columns such as:
# - Scheme Name: The name of each farmer scheme.
# - Objective: The objectives or details of each scheme.
# These columns will be used to create a vectorized text representation to match user input.

# 3. Preprocessing Steps:
# Use SpaCy to clean and normalize text data to make it suitable for further vectorization.
# This step helps to remove unwanted characters, lower case the text, and remove extra spaces.
# `normalize_text()` function is used for this purpose.

# 4. Text Vectorization:
# Use TF-IDF vectorization to convert scheme descriptions into a numerical representation.
# - TfidfVectorizer: Converts the combined text of scheme names and objectives to vectors.
# - Vector representation is used for similarity matching with user input.
#
# After fitting the vectorizer on scheme data:
# - Save the vectorizer using `joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')`
# - Save the resulting vectors using `joblib.dump(scheme_vectors, 'scheme_vectors.pkl')`

# 5. Matching User Queries:
# Use cosine similarity to find the best matching scheme based on user input.
# Use fuzzy matching (`thefuzz`) to enhance the accuracy of the match by adding tolerance for textual variations.
# The function `match_query_to_schemes()` handles this matching process.

# 6. Intent Classification:
# A zero-shot classification model from HuggingFace is used to classify user queries into different intents:
# - financial_support, benefits, eligibility, etc.
# Use the `extract_intent()` function to determine what the user wants to know.

# 7. Handle Specific Intents:
# Based on the identified intent, retrieve scheme information or respond appropriately.
# For example:
# - Provide details of financial support if the user asks about financial support.
# - Handle small talk (e.g., greeting messages) separately using the `handle_small_talk()` function.
# This makes the chatbot capable of responding in different contexts.

# 8. Formal Responses:
# Write predefined responses for small talk and farming advice to enhance user engagement.
# - Use the `handle_small_talk()` and `handle_farming_advice()` functions to handle these intents.

# 9. Save the Chatbot as a Model:
# Convert the entire chatbot logic into a class called `FarmerChatbot`.
# The class includes all steps: vectorization, matching, intent extraction, and responding.
# After building and testing the chatbot, save the complete model using `joblib`:
# - `joblib.dump(chatbot, 'farmer_chatbot.pkl')`
# This makes the chatbot model portable for future use.

# 10. Loading the Saved Model:
# When you want to use the saved chatbot model again, load the saved files:
# - Load the saved TF-IDF vectorizer: `vectorizer = joblib.load('tfidf_vectorizer.pkl')`
# - Load the scheme vectors: `scheme_vectors = joblib.load('scheme_vectors.pkl')`
# - Load the chatbot model: `chatbot = joblib.load('farmer_chatbot.pkl')`
# Update the loaded chatbot instance with the vectorizer and scheme vectors:
# - `chatbot.vectorizer = vectorizer`
# - `chatbot.scheme_vectors = scheme_vectors`

# 11. Test the Chatbot Model:
# Use the `chat_with_bot()` function to test the saved chatbot.
# Interact with the chatbot by asking questions related to the farmer schemes or other intents.
# The chatbot should load the model, match user input to relevant schemes, and respond based on the intent.

# --------------------------------------------
# Example Flow for Saving, Loading, and Using Chatbot
# --------------------------------------------
# 1. Train the Chatbot:
# - Preprocess the dataset.
# - Create vectors using TF-IDF vectorizer.
# - Create an instance of the chatbot class and fit the vectors.
# - Save the trained artifacts (vectorizer, scheme vectors, chatbot class).

# 2. Use the Chatbot:
# - Load all saved artifacts.
# - Initialize the chatbot instance with required attributes.
# - Interact with the chatbot using the `chat_with_bot()` function.

# --------------------------------------------
# Summary:
# - This approach enables you to save the trained chatbot as a model that you can use anywhere.
# - You only need the saved files and the functions to load them.
# - This makes the chatbot solution portable and easy to deploy across different environments.
# --------------------------------------------


In [None]:
from google.colab import files

# Specify the file paths that you want to download
file_paths = [
    "/content/scheme_vectors.pkl",
    "/content/farmer_chatbot.pkl",
    "/content/tfidf_vectorizer.pkl"
]

# Iterate over the file paths and download each one
for file_path in file_paths:
    files.download(file_path)


#for other notebooks

In [None]:
# Define the FarmerChatbot class (or use an appropriate dummy placeholder if you do not need full functionality)
class FarmerChatbot:
    def __init__(self):
        pass

    def __call__(self, user_input, labels):
        # You could add any dummy or mock behavior here to simulate the original functionality.
        return {"labels": ["farming_advice"]}

# Load the farmer chatbot model after defining its class
farmer_chatbot = joblib.load('/content/farmer_chatbot.pkl')


In [None]:
# Install necessary packages
!pip install spacy transformers scikit-learn pandas openpyxl thefuzz --quiet

import pandas as pd
import spacy
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from thefuzz import fuzz, process  # Using thefuzz instead of fuzzywuzzy (Updated)
import joblib

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Step 1: Load Pretrained Models
tfidf_vectorizer = joblib.load('/content/tfidf_vectorizer.pkl')
scheme_vectors = joblib.load('/content/scheme_vectors.pkl')
farmer_chatbot = joblib.load('/content/farmer_chatbot.pkl')

# Load the dataset
df = pd.read_excel('/content/indian_farmer_schemes_large.xlsx')

# Step 2: Preprocessing Function
def normalize_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
    return text

# Add Normalized columns
df['Scheme Name Normalized'] = df['Scheme Name'].apply(normalize_text)
df['Combined Text Normalized'] = (df['Scheme Name Normalized'] + ' ' + df['Objective'].fillna('').apply(normalize_text))

# Step 3: Matching User Queries Using TF-IDF Cosine Similarity and Fuzzy Matching
def match_query_to_schemes(user_input):
    user_input_normalized = normalize_text(user_input)
    user_vector = tfidf_vectorizer.transform([user_input_normalized])
    cosine_similarities = cosine_similarity(user_vector, scheme_vectors).flatten()

    # Use fuzzy matching to enhance matching results
    fuzzy_matches = process.extract(user_input_normalized, df['Scheme Name Normalized'], scorer=fuzz.token_sort_ratio, limit=5)
    fuzzy_matching_indices = [df[df['Scheme Name Normalized'] == match[0]].index[0] for match in fuzzy_matches if match[1] > 75]

    # Get all matching schemes above a certain threshold
    threshold = 0.3  # Set a higher threshold to limit irrelevant results
    matching_indices = np.where(cosine_similarities > threshold)[0]

    # Combine cosine similarity and fuzzy matching indices
    combined_indices = list(set(matching_indices).union(set(fuzzy_matching_indices)))
    if len(combined_indices) > 0:
        best_match_index = combined_indices[0]  # Select only the best match
        return df.iloc[[best_match_index]]
    return None

# Step 4: Extract Intent with Improved Approach
labels = [
    "financial_support",
    "benefits",
    "eligibility",
    "limitations",
    "application_process",
    "small_talk",
    "farming_advice",
    "objective"
]

def extract_intent(user_input):
    # Adding simple keyword-based identification for small talk and schemes
    small_talk_keywords = ["hello", "hi", "how are you", "greetings", "tell me something interesting", "what is your name"]
    farming_advice_keywords = [
        "how to", "what's the best", "best practices", "how to improve", "advice",
        "recommendation", "how can I", "suggest", "guide"
    ]

    if any(keyword in user_input.lower() for keyword in small_talk_keywords):
        return "small_talk"
    elif any(keyword in user_input.lower() for keyword in farming_advice_keywords):
        return "farming_advice"

    # Default to classifier for specific scheme-related intents
    result = farmer_chatbot(user_input, labels)
    return result['labels'][0]

# Step 5: Retrieve Information from Dataset Based on Intent
def get_scheme_details(matching_schemes, intent):
    if matching_schemes is not None and not matching_schemes.empty:
        scheme_info = matching_schemes.iloc[0]
        response = f"Scheme Name: {scheme_info['Scheme Name']}\n"
        response += f"Objective: {scheme_info['Objective'] if not pd.isna(scheme_info['Objective']) else 'Information not available'}\n"

        if intent == "financial_support":
            response += f"Financial Support: {scheme_info.get('Benefits', 'Information not available')}\n"
        elif intent == "benefits":
            response += f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
        elif intent == "eligibility":
            response += f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
        elif intent == "limitations":
            response += f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
        elif intent == "application_process":
            response += f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
        else:
            response += (
                f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
                f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
                f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
                f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
            )

        return response.strip()

    return "I couldn't find details about the scheme you mentioned. Please provide more specific information."

# Step 6: Formal Small Talk Handling
def handle_small_talk(user_input):
    """Provide formal and simple responses for small talk."""
    small_talk_responses = {
        "hello": "Hello! How can I assist you today?",
        "hi": "Hi there! How may I help you?",
        "how are you": "I am doing great! What about you?",
        "greetings": "Greetings! Please let me know how I can assist you.",
        "good morning": "Good morning! How can I assist you today?",
        "good afternoon": "Good afternoon! How can I assist you today?",
        "good evening": "Good evening! How can I help you?",
        "what is your name": "I am your assistant chatbot, here to help you with information.",
        "tell me something interesting": "Did you know there are numerous government schemes to support farmers in India? Ask me about them!"
    }
    return small_talk_responses.get(user_input.lower(), "Hello! How can I assist you?")

# Step 7: Formal Farming Advice Handling
def handle_farming_advice(user_input):
    """Provide formal responses for farming advice."""
    farming_advice_responses = {
        "how to improve soil health": "Use organic compost and practice crop rotation to maintain healthy soil.",
        "how to reduce soil erosion": "Consider using cover crops and building terraces to reduce soil erosion.",
        "how to save water in farming": "Use drip irrigation systems to conserve water effectively.",
        "best practices for pest control": "Consider using natural predators or organic pesticides to manage pests.",
        "how to increase crop yield": "Implement crop rotation and use quality seeds to increase crop yield.",
        # Add more responses as needed...
    }
    return farming_advice_responses.get(user_input.lower(), "I'm still learning about that topic. Please ask me about another farming practice.")

# Step 8: Chatbot Logic
def get_response(user_input):
    # Determine the user's intent
    intent = extract_intent(user_input)

    # Handle different intents
    if intent == "small_talk":
        return handle_small_talk(user_input)
    elif intent == "farming_advice":
        return handle_farming_advice(user_input)
    else:
        matching_schemes = match_query_to_schemes(user_input)
        return get_scheme_details(matching_schemes, intent)

# Step 9: Interactive Chat Function for Colab Notebook
def chat_with_bot():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        try:
            response = get_response(user_input)
            print(f"Chatbot: {response}")
        except Exception as e:
            print(f"Error occurred: {e}")

# Start the interactive chat
if __name__ == "__main__":
    chat_with_bot()




Chatbot is ready! Type 'exit' to end the conversation.
You: exit
Chatbot: Goodbye!


#Hosting

In [None]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.5.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Col

In [None]:
!pip install spacy transformers scikit-learn pandas openpyxl thefuzz --quiet


[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
!pip install thefuzz




In [None]:
# Install necessary packages
!pip install spacy transformers scikit-learn pandas openpyxl thefuzz --quiet

import pandas as pd
import spacy
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from thefuzz import fuzz, process  # Using thefuzz instead of fuzzywuzzy (Updated)
import joblib

# Load the saved models
classifier = joblib.load('/content/farmer_chatbot.pkl')
vectorizer = joblib.load('/content/scheme_vectors.pkl')
scheme_vectors = joblib.load('/content/tfidf_vectorizer.pkl')

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load the dataset
df = pd.read_excel('/content/indian_farmer_schemes_large.xlsx')

# Step 1: Preprocessing Scheme Data
def normalize_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
    return text

# Step 2: Matching User Queries Using TF-IDF Cosine Similarity and Fuzzy Matching
def match_query_to_schemes(user_input):
    user_input_normalized = normalize_text(user_input)
    user_vector = vectorizer.transform([user_input_normalized])
    cosine_similarities = cosine_similarity(user_vector, scheme_vectors).flatten()

    # Use fuzzy matching to enhance matching results
    fuzzy_matches = process.extract(user_input_normalized, df['Scheme Name Normalized'], scorer=fuzz.token_sort_ratio, limit=5)
    fuzzy_matching_indices = [df[df['Scheme Name Normalized'] == match[0]].index[0] for match in fuzzy_matches if match[1] > 75]

    # Get all matching schemes above a certain threshold
    threshold = 0.3  # Set a higher threshold to limit irrelevant results
    matching_indices = np.where(cosine_similarities > threshold)[0]

    # Combine cosine similarity and fuzzy matching indices
    combined_indices = list(set(matching_indices).union(set(fuzzy_matching_indices)))
    if len(combined_indices) > 0:
        best_match_index = combined_indices[0]  # Select only the best match
        return df.iloc[[best_match_index]]
    return None

# Step 3: Extract Intent with Improved Approach
labels = [
    "financial_support",
    "benefits",
    "eligibility",
    "limitations",
    "application_process",
    "small_talk",
    "farming_advice",
    "objective"
]

def extract_intent(user_input):
    # Adding simple keyword-based identification for small talk and schemes
    small_talk_keywords = ["hello", "hi", "how are you", "greetings", "tell me something interesting", "what is your name"]
    farming_advice_keywords = [
        "how to", "what's the best", "best practices", "how to improve", "advice",
        "recommendation", "how can I", "suggest", "guide"
    ]

    if any(keyword in user_input.lower() for keyword in small_talk_keywords):
        return "small_talk"
    elif any(keyword in user_input.lower() for keyword in farming_advice_keywords):
        return "farming_advice"

    # Default to classifier for specific scheme-related intents
    result = classifier(user_input, labels)
    return result['labels'][0]

# Step 4: Retrieve Information from Dataset Based on Intent
def get_scheme_details(matching_schemes, intent):
    if matching_schemes is not None and not matching_schemes.empty:
        scheme_info = matching_schemes.iloc[0]
        response = f"Scheme Name: {scheme_info['Scheme Name']}\n"
        response += f"Objective: {scheme_info['Objective'] if not pd.isna(scheme_info['Objective']) else 'Information not available'}\n"

        if intent == "financial_support":
            response += f"Financial Support: {scheme_info.get('Benefits', 'Information not available')}\n"
        elif intent == "benefits":
            response += f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
        elif intent == "eligibility":
            response += f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
        elif intent == "limitations":
            response += f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
        elif intent == "application_process":
            response += f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
        else:
            response += (
                f"Benefits: {scheme_info.get('Benefits', 'Information not available')}\n"
                f"Eligibility: {scheme_info.get('Eligibility', 'Information not available')}\n"
                f"Application Process: {scheme_info.get('Application Process', 'Information not available')}\n"
                f"Limitations: {scheme_info.get('Challenges/Limitations', 'Information not available')}\n"
            )

        return response.strip()

    return "I couldn't find details about the scheme you mentioned. Please provide more specific information."

# Step 5: Chatbot Logic
def get_response(user_input):
    # Determine the user's intent
    intent = extract_intent(user_input)

    # Handle different intents
    if intent == "small_talk":
        return "Hello! How can I assist you today?"
    elif intent == "farming_advice":
        return "Use organic compost and crop rotation for healthy farming!"
    else:
        matching_schemes = match_query_to_schemes(user_input)
        return get_scheme_details(matching_schemes, intent)

# Step 6: Interactive Chat Function for Colab Notebook
def chat_with_bot():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        try:
            response = get_response(user_input)
            print(f"Chatbot: {response}")
        except Exception as e:
            print(f"Error occurred: {e}")

# Start the interactive chat
if __name__ == "__main__":
    chat_with_bot()


Chatbot is ready! Type 'exit' to end the conversation.
You: hi
Chatbot: Hello! How can I assist you today?
You: Soil Health Card Scheme
Chatbot: Use organic compost and crop rotation for healthy farming!
You: tell me about scheme
Chatbot: Use organic compost and crop rotation for healthy farming!
You: Kisan Credit Card (KCC)
Chatbot: Use organic compost and crop rotation for healthy farming!
You: exit
Chatbot: Goodbye!


In [None]:
!pip install pyngrok --quiet



In [None]:
!pip install pyngrok --quiet

# Import ngrok
from pyngrok import ngrok

# Authenticate ngrok with your authtoken
ngrok.set_auth_token("2ofQdxSsTjNxhYT6ZT42pywon2F_Mk2rVAe2TXbMTTpCfTHL")


In [None]:
public_url = ngrok.connect(5000)
print("Public URL:", public_url)


Public URL: NgrokTunnel: "https://bcf7-34-19-27-184.ngrok-free.app" -> "http://localhost:5000"


In [None]:
# Install necessary packages
!pip install Flask spacy transformers scikit-learn pandas openpyxl thefuzz --quiet

import pandas as pd
import spacy
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from thefuzz import fuzz, process
from flask import Flask, request, jsonify

# Load the dataset and models
df = pd.read_excel('/content/indian_farmer_schemes_large.xlsx')
nlp = spacy.load("en_core_web_sm")

# Preprocess and fit vectorizer
def normalize_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['Scheme Name Normalized'] = df['Scheme Name'].apply(normalize_text)
df['Combined Text Normalized'] = df['Scheme Name Normalized'] + ' ' + df['Objective'].fillna('').apply(normalize_text)

vectorizer = TfidfVectorizer().fit(df['Combined Text Normalized'])
scheme_vectors = vectorizer.transform(df['Combined Text Normalized'])
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

labels = [
    "financial_support", "benefits", "eligibility", "limitations",
    "application_process", "small_talk", "farming_advice", "objective"
]

def match_query_to_schemes(user_input):
    user_input_normalized = normalize_text(user_input)
    user_vector = vectorizer.transform([user_input_normalized])
    cosine_similarities = cosine_similarity(user_vector, scheme_vectors).flatten()
    threshold = 0.3
    matching_indices = np.where(cosine_similarities > threshold)[0]
    if len(matching_indices) > 0:
        best_match_index = matching_indices[0]
        return df.iloc[[best_match_index]]
    return None

def extract_intent(user_input):
    result = classifier(user_input, labels)
    return result['labels'][0]

def get_response(user_input):
    intent = extract_intent(user_input)
    if intent == "small_talk":
        return "Hello! How can I assist you today?"
    else:
        matching_schemes = match_query_to_schemes(user_input)
        return matching_schemes['Scheme Name'].iloc[0] if matching_schemes is not None else "Scheme not found."

# Flask Application Setup
app = Flask(__name__)

@app.route('/chat', methods=['POST'])
def chat():
    user_input = request.json.get("message")
    response = get_response(user_input)
    return jsonify({"response": response})

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [13/Nov/2024 06:07:07] "[31m[1mGET /chat HTTP/1.1[0m" 405 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2024 06:07:07] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2024 06:07:10] "[31m[1mGET /chat HTTP/1.1[0m" 405 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2024 06:07:40] "[31m[1mGET /chat HTTP/1.1[0m" 405 -


In [None]:
# Install necessary packages
!pip install Flask spacy transformers scikit-learn pandas openpyxl thefuzz --quiet

import pandas as pd
import spacy
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from thefuzz import fuzz, process
from flask import Flask, request, jsonify
import subprocess  # To run server as a subprocess

# Load the dataset and models
df = pd.read_excel('/content/indian_farmer_schemes_large.xlsx')
nlp = spacy.load("en_core_web_sm")

# Preprocess and fit vectorizer
def normalize_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['Scheme Name Normalized'] = df['Scheme Name'].apply(normalize_text)
df['Combined Text Normalized'] = df['Scheme Name Normalized'] + ' ' + df['Objective'].fillna('').apply(normalize_text)

vectorizer = TfidfVectorizer().fit(df['Combined Text Normalized'])
scheme_vectors = vectorizer.transform(df['Combined Text Normalized'])
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

labels = [
    "financial_support", "benefits", "eligibility", "limitations",
    "application_process", "small_talk", "farming_advice", "objective"
]

def match_query_to_schemes(user_input):
    user_input_normalized = normalize_text(user_input)
    user_vector = vectorizer.transform([user_input_normalized])
    cosine_similarities = cosine_similarity(user_vector, scheme_vectors).flatten()
    threshold = 0.3
    matching_indices = np.where(cosine_similarities > threshold)[0]
    if len(matching_indices) > 0:
        best_match_index = matching_indices[0]
        return df.iloc[[best_match_index]]
    return None

def extract_intent(user_input):
    result = classifier(user_input, labels)
    return result['labels'][0]

def get_response(user_input):
    intent = extract_intent(user_input)
    if intent == "small_talk":
        return "Hello! How can I assist you today?"
    else:
        matching_schemes = match_query_to_schemes(user_input)
        return matching_schemes['Scheme Name'].iloc[0] if matching_schemes is not None else "Scheme not found."

# Flask Application Setup
app = Flask(__name__)

@app.route('/chat', methods=['POST'])
def chat():
    user_input = request.json.get("message")
    response = get_response(user_input)
    return jsonify({"response": response})

# Save the Flask code to a Python script to run it in background
with open("flask_app.py", "w") as f:
    f.write("""
from flask import Flask, request, jsonify
import pandas as pd
import spacy
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from thefuzz import fuzz, process

# Load the dataset and models
df = pd.read_excel('/content/indian_farmer_schemes_large.xlsx')
nlp = spacy.load("en_core_web_sm")

# Preprocess and fit vectorizer
def normalize_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\\s+', ' ', text).strip()
    return text

df['Scheme Name Normalized'] = df['Scheme Name'].apply(normalize_text)
df['Combined Text Normalized'] = df['Scheme Name Normalized'] + ' ' + df['Objective'].fillna('').apply(normalize_text)

vectorizer = TfidfVectorizer().fit(df['Combined Text Normalized'])
scheme_vectors = vectorizer.transform(df['Combined Text Normalized'])
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

labels = [
    "financial_support", "benefits", "eligibility", "limitations",
    "application_process", "small_talk", "farming_advice", "objective"
]

def match_query_to_schemes(user_input):
    user_input_normalized = normalize_text(user_input)
    user_vector = vectorizer.transform([user_input_normalized])
    cosine_similarities = cosine_similarity(user_vector, scheme_vectors).flatten()
    threshold = 0.3
    matching_indices = np.where(cosine_similarities > threshold)[0]
    if len(matching_indices) > 0:
        best_match_index = matching_indices[0]
        return df.iloc[[best_match_index]]
    return None

def extract_intent(user_input):
    result = classifier(user_input, labels)
    return result['labels'][0]

def get_response(user_input):
    intent = extract_intent(user_input)
    if intent == "small_talk":
        return "Hello! How can I assist you today?"
    else:
        matching_schemes = match_query_to_schemes(user_input)
        return matching_schemes['Scheme Name'].iloc[0] if matching_schemes is not None else "Scheme not found."

# Flask Application Setup
app = Flask(__name__)

@app.route('/chat', methods=['POST'])
def chat():
    user_input = request.json.get("message")
    response = get_response(user_input)
    return jsonify({"response": response})

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)
""")

# Run the Flask app in the background
process = subprocess.Popen(["python3", "flask_app.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

print("Flask app is now running in the background.")



Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Flask app is now running in the background.


In [None]:
import requests

url = "http://localhost:5000"  # Replace with your ngrok URL if needed
payload = {
    "message": "Tell me about Soil Health Card Scheme"
}
headers = {
    "Content-Type": "application/json"
}

# Send POST request
response = requests.post(url, json=payload, headers=headers)

# Print response
print("Chatbot Response:", response.json().get("response"))


JSONDecodeError: Expecting value: line 1 column 1 (char 0)