In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/image-2/image-2.jpg
/kaggle/input/agri-dataset/text2.pdf
/kaggle/input/agri-dataset/text3.pdf
/kaggle/input/agri-dataset/text1.pdf
/kaggle/input/image-vision/tomato_leaf.jpg


**Necessary installations and imports**

In [None]:
!pip install -q pdfplumber nltk
!pip install -U bitsandbytes
!pip install -q sentence-transformers huggingface_hub transformers faiss-cpu

In [2]:
import pdfplumber
import re
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


**Data Preprocessing** 

In [3]:
# Settings
chunk_size = 400  # approx words per chunk
pdf_files = ['/kaggle/input/agri-dataset/text1.pdf', '/kaggle/input/agri-dataset/text2.pdf', '/kaggle/input/agri-dataset/text3.pdf']

# Helper function: Read PDF and extract text
def extract_text_from_pdf(pdf_path):
    full_text = ''
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                full_text += text + '\n'
    return full_text

# Helper function: Clean the extracted text
def clean_text(text):
    # Remove multiple spaces, tabs, and newlines
    text = re.sub(r'\s+', ' ', text)
    # Remove unwanted characters if needed (example: form feeds)
    text = text.replace('\x0c', '')
    return text.strip()

# Helper function: Chunk text into small passages
def chunk_text(text, chunk_size=400):
    sentences = sent_tokenize(text)
    chunks = []
    current_chunk = ''
    current_count = 0
    
    for sentence in sentences:
        words = sentence.split()
        if current_count + len(words) <= chunk_size:
            current_chunk += ' ' + sentence
            current_count += len(words)
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence
            current_count = len(words)
    
    # Add last chunk
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks

In [4]:
# Main pipeline
all_chunks = []

for pdf_path in pdf_files:
    print(f"Processing: {pdf_path}")
    raw_text = extract_text_from_pdf(pdf_path)
    cleaned_text = clean_text(raw_text)
    chunks = chunk_text(cleaned_text, chunk_size=chunk_size)
    all_chunks.extend(chunks)

print(f"\nTotal Chunks Created: {len(all_chunks)}")

# Save chunks if needed
import json
with open('agriculture_chunks.json', 'w', encoding='utf-8') as f:
    json.dump(all_chunks, f, ensure_ascii=False, indent=2)

Processing: /kaggle/input/agri-dataset/text1.pdf
Processing: /kaggle/input/agri-dataset/text2.pdf
Processing: /kaggle/input/agri-dataset/text3.pdf

Total Chunks Created: 46


**Model Loading Code (Embedding + Generation)**

In [5]:
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

2025-04-29 17:17:18.778545: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745947038.972962     107 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745947039.029307     107 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [6]:
# DEVICE setting
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

Using device: cuda


In [7]:
# 1. Load Embedding Model (for Retrieval)
# (Very lightweight, fast on Kaggle GPUs)
embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embedder = SentenceTransformer(embedding_model_name, device=device)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
# Safer, smaller model (for memory constraints on Kaggle):
generation_model_name = "mistralai/Mistral-7B-Instruct-v0.2"

In [None]:
# Install the Hugging Face CLI if not installed
!pip install -q huggingface_hub

# Login to Hugging Face
from huggingface_hub import login

# Put your Hugging Face token here
# Get your token from https://huggingface.co/settings/tokens
login("your hugging face access token")

In [10]:
# Load model and tokenizer
from transformers import BitsAndBytesConfig
tokenizer = AutoTokenizer.from_pretrained(generation_model_name, trust_remote_code=True)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,    # Load model in 4-bit quantized mode
    bnb_4bit_compute_dtype=torch.float16,
)

generation_model = AutoModelForCausalLM.from_pretrained(
    generation_model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto",
)

# Optional: Wrap in a text generation pipeline
generator = pipeline("text-generation", model=generation_model, tokenizer=tokenizer)

print("\nEmbedding model and Generation model loaded successfully!")

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Device set to use cuda:0



Embedding model and Generation model loaded successfully!


In [11]:
!pip install sacremoses

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: sacremoses
Successfully installed sacremoses-0.1.1


**Load Translation Model**

In [12]:
# Load translation model (English to Hindi)
translator = pipeline('translation_en_to_hi', model="Helsinki-NLP/opus-mt-en-hi")
def translate_to_hindi(text):
    """
    Translate English text to Hindi.
    """
    translated = translator(text, max_length=512)
    return translated[0]['translation_text']

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

Device set to use cuda:0


**FAISS Indexing + Retrieval Code**

In [13]:
# Imports
import faiss
import numpy as np
import json

# Load your chunks (from preprocessing step)
with open('agriculture_chunks.json', 'r', encoding='utf-8') as f:
    chunks = json.load(f)

# 1. Generate embeddings for all chunks
print("Generating embeddings for chunks...")
chunk_embeddings = embedder.encode(chunks, batch_size=32, show_progress_bar=True, convert_to_numpy=True)

# 2. Build FAISS Index
embedding_dim = chunk_embeddings.shape[1]  # should be 384 for MiniLM
index = faiss.IndexFlatL2(embedding_dim)
index.add(chunk_embeddings)

print(f"✅ FAISS index created with {index.ntotal} documents.")

# 3. Save FAISS Index + Chunk Mapping (Optional)
faiss.write_index(index, "agriculture_faiss.index")

with open('agriculture_chunk_mapping.json', 'w', encoding='utf-8') as f:
    json.dump(chunks, f, ensure_ascii=False, indent=2)

print("✅ Saved FAISS index and chunk mapping files.")

Generating embeddings for chunks...


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

✅ FAISS index created with 46 documents.
✅ Saved FAISS index and chunk mapping files.


**Retrieval + Augmentation + Generation Code**

In [14]:
# Helper function: Retrieve Top-k Chunks
def retrieve_chunks(query, top_k=5):
    # Encode query
    query_embedding = embedder.encode([query], convert_to_numpy=True)
    # Search in FAISS index
    distances, indices = index.search(query_embedding, top_k)
    # Fetch corresponding chunks
    retrieved_chunks = [chunks[idx] for idx in indices[0]]
    return retrieved_chunks

# Helper function: Build Augmented Prompt
def build_prompt(query, retrieved_chunks):
    context = "\n\n".join(retrieved_chunks)
    prompt = f"""You are an agricultural expert helping farmers.
Use the following context to answer the question.

Context:
{context}

Question:
{query}

Answer:"""
    return prompt

def generate_answer(query):
    retrieved_chunks = retrieve_chunks(query)
    augmented_prompt = build_prompt(query, retrieved_chunks)
    
    output = generator(augmented_prompt, max_new_tokens=100, temperature=0.7, do_sample=True)[0]['generated_text']
    
    # Optional: Post-process to remove prompt from output
    answer_start = output.find("Answer:") + len("Answer:")
    final_answer = output[answer_start:].strip()
    return final_answer

# ✅ Ready! Example Usage:

# Farmer Query Example
user_query = "How can I protect my rice crop from brown spot disease?"

# Generate answer
answer = generate_answer(user_query)
print("\n🧠 Final Answer:\n", answer)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



🧠 Final Answer:
 Brown spot disease is a fungal disease that affects rice crops. It is caused by the fungus Pyricularia grisea. To protect your rice crop from brown spot disease, consider implementing the following integrated pest management (IPM) strategies:

1. Cultural practices:
- Use disease-resistant rice varieties.
- Maintain proper irrigation and drainage to prevent water stagnation.
- Avoid overuse of nitrogen fertil


In [15]:
def get_crop_care_plan(crop_name, region=None, language='en'):
    """
    Generate a detailed crop care plan.
    
    Parameters:
    - crop_name (str): Name of the crop (e.g., "Rice", "Tomato").
    - region (str, optional): Region for localized advice (e.g., "Punjab").
    - language (str): 'en' for English, 'hi' for Hindi.
    
    Returns:
    - care_plan (str): Final care plan in the specified language.
    """
    # Step 1: Build prompt
    care_prompt = f"""You are an agricultural expert helping farmers. 
Create a detailed 3-month care plan for growing "{crop_name}".
{f"Consider typical weather and soil conditions in {region}." if region else ""}

Structure the plan week-by-week including:
- Soil preparation
- Watering schedule
- Fertilization
- Pest and disease management
- Harvest tips

Use simple language suitable for farmers.

Care Plan:"""

    # Step 2: Generate care plan (in English)
    output = generator(care_prompt, max_new_tokens=500, temperature=0.7, do_sample=False)[0]['generated_text']
    care_start = output.find("Care Plan:") + len("Care Plan:")
    care_plan = output[care_start:].strip()
    
    # Step 3: Translate if needed
    if language == 'hi':
        care_plan = translate_to_hindi(care_plan)
    
    return care_plan


In [16]:
# Example 1: English Plan
plan_en = get_crop_care_plan(crop_name="Wheat", region="Haryana", language='en')
print("\n Care Plan (English):\n", plan_en)

# Example 2: Hindi Plan
plan_hi = get_crop_care_plan(crop_name="Tomato", region="Madhya Pradesh", language='hi')
print("\n देखभाल योजना (हिंदी):\n", plan_hi)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Care Plan (English):
 Growing Wheat in Haryana

Month 1: Preparation and Planting (November - December)

Week 1:
1. Soil Preparation: Plow the land to a depth of 15-20 cm to loosen the soil and kill weeds.
2. Watering: Wait for the first monsoon rains.

Week 2-3:
1. Soil Preparation: Harrow the land to remove stones and level the soil.
2. Watering: Continue waiting for monsoon rains.

Week 4:
1. Soil Preparation: Rake the soil to remove any remaining debris and create a smooth, level surface.
2. Planting: As soon as the rains start, sow wheat seeds at a depth of 1-2 cm and a spacing of 15-25 cm between rows.

Week 5-6:
1. Watering: Water the field if rains are insufficient.
2. Fertilization: Apply 25-30 kg of NPK (Nitrogen, Phosphorus, Potassium) fertilizer per hectare.

Month 2: Growth and Maintenance (December - January)

Week 1-2:
1. Watering: Continue irrigating if rains are scarce.
2. Fertilization: Apply 15-20 kg of NPK fertilizer per hectare.
3. Pest and Disease Management: Wa

In [17]:
from transformers import AutoModelForImageClassification, AutoImageProcessor
from transformers import AutoFeatureExtractor
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained("linkanjarad/mobilenet_v2_1.0_224-plant-disease-identification")
processor = AutoImageProcessor.from_pretrained("linkanjarad/mobilenet_v2_1.0_224-plant-disease-identification")
disease_model = AutoModelForImageClassification.from_pretrained("linkanjarad/mobilenet_v2_1.0_224-plant-disease-identification")
image_processor = AutoFeatureExtractor.from_pretrained("linkanjarad/mobilenet_v2_1.0_224-plant-disease-identification")

# Helper to predict disease
def predict_disease(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = image_processor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = disease_model(**inputs)
    logits = outputs.logits
    predicted_class_idx = logits.argmax(-1).item()
    predicted_disease = disease_model.config.id2label[predicted_class_idx]
    return predicted_disease

config.json:   0%|          | 0.00/3.57k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/9.34M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/408 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


model.safetensors:   0%|          | 0.00/9.26M [00:00<?, ?B/s]

In [18]:
def get_treatment_advice(disease_name, crop_name=None, language='en'):
    """
    Generate treatment advice based on the detected disease.
    """
    prompt = f"""You are an agricultural expert.
A farmer's crop is affected by the disease "{disease_name}" {f"on {crop_name}" if crop_name else ""}.
Suggest a simple, practical treatment plan.
Include both organic and chemical control options where appropriate.

Treatment Plan:"""
    
    output = generator(prompt, max_new_tokens=500, temperature=0.7, do_sample=False)[0]['generated_text']
    advice_start = output.find("Treatment Plan:") + len("Treatment Plan:")
    advice = output[advice_start:].strip()
    
    if language == 'hi':
        advice = translate_to_hindi(advice)
    
    return advice

In [19]:
from PIL import Image

In [20]:
# Upload or pass image
image_path = "/kaggle/input/image-vision/tomato_leaf.jpg"

# Step 1: Predict disease
predicted_disease = predict_disease(image_path)
print(" Detected Disease:", predicted_disease)

 Detected Disease: Strawberry with Leaf Scorch


In [21]:
# Step 2: Get treatment advice
advice = get_treatment_advice(predicted_disease, crop_name="Strawberry", language='en')
print("\n Treatment Advice:\n", advice)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Treatment Advice:
 1. Identification and Diagnosis:
   * Confirm the diagnosis of "Strawberry Leaf Scorch" disease by observing the symptoms such as:
      * Leaves turning yellow, brown, or red, starting from the margins and progressing towards the center.
      * Leaf edges may become brittle and dry, eventually leading to leaf drop.
      * Fruits may be small, misshapen, or have a red or purple tinge.
      * Plants may wilt or die.

2. Cultural Practices:
   * Improve soil drainage to prevent waterlogging, as the disease thrives in wet conditions.
   * Practice crop rotation with non-host plants to reduce the buildup of the disease in the soil.
   * Prune and remove infected plants to prevent the spread of the disease.
   * Maintain proper spacing between plants to ensure good air circulation and sunlight penetration.

3. Organic Control:
   * Use neem oil or horticultural oil to smother the fungus that causes the disease. Apply when new leaves emerge and repeat every 7-10 days.

In [22]:
# Upload or pass image
image_path = "/kaggle/input/image-2/image-2.jpg"

# Step 1: Predict disease
predicted_disease = predict_disease(image_path)
print(" Detected Disease:", predicted_disease)

 Detected Disease: Bell Pepper with Bacterial Spot


In [23]:
# Step 2: Get treatment advice
advice = get_treatment_advice(predicted_disease, crop_name="Bell Pepper", language='en')
print("\n Treatment Advice:\n", advice)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Treatment Advice:
 1. Identification and Early Detection:
   * Inspect the bell pepper plants regularly for early signs of bacterial spot, such as water-soaked lesions on leaves, stems, and fruits.
   * Quarantine and remove infected plants to prevent the spread of the disease.

2. Cultural Practices:
   * Practice good hygiene in the field, such as removing plant debris and using clean tools to prevent the spread of the disease.
   * Avoid overhead irrigation to minimize the splash of water and the spread of the disease.
   * Use mulch to maintain soil moisture and reduce the need for frequent irrigation.
   * Rotate crops to prevent the buildup of the bacterial inoculum in the soil.

3. Organic Control:
   * Use neem oil or copper-based fungicides as a preventative measure. These treatments can be applied as a foliar spray or as a soil drench.
   * Use sulfur or lime sulfur as a foliar spray to control the disease. These treatments can be applied at the first sign of the disease.
 

In [24]:
# Improved Field Report Simulation
def simulate_field_report(crop, region, season):
    prompt = f"""
As an expert agricultural officer, provide a DAILY FIELD REPORT.

Crop: {crop}
Region: {region}
Season: {season}

Include:
- Current crop growth stage
- Observed pest or disease issues (if any)
- Soil and moisture conditions
- Weather impact on the crop
- Clear and actionable advice to farmers (short points)

Only provide the final report. DO NOT restate the prompt.
"""
    response = generator(prompt, max_new_tokens=250, temperature=0.7, do_sample=True, top_p=0.9)[0]['generated_text']
    return response

# Improved Pest Prediction Simulation
def simulate_pest_prediction(crop, region, current_weather):
    prompt = f"""
As an agricultural pest prediction model:

Crop: {crop}
Region: {region}
Current Weather: {current_weather}

Predict likely pests based on climate conditions.
Advise farmers in short actionable points on how to prevent pest outbreaks.

Avoid restating the input, only give prediction and suggestions.
"""
    response = generator(prompt, max_new_tokens=200, temperature=0.7, do_sample=True, top_p=0.9)[0]['generated_text']
    return response

# Improved Smart Irrigation Advice Simulation
def simulate_irrigation_advice(crop, soil_moisture, upcoming_weather):
    prompt = f"""
You are a smart irrigation advisor.

Crop: {crop}
Soil Moisture: {soil_moisture}
Upcoming Weather: {upcoming_weather}

Give specific and technical irrigation advice:
- How much water to apply (mm or inches)
- Best time of day to irrigate
- Special precautions based on weather

Answer clearly without repeating the question.
"""
    response = generator(prompt, max_new_tokens=180, temperature=0.7, do_sample=True, top_p=0.9)[0]['generated_text']
    return response

# Example Usage:
crop = "Wheat"
region = "Punjab"
season = "Rabi"

print("\n Field Report:\n", simulate_field_report(crop, region, season))

print("\n Pest Prediction:\n", simulate_pest_prediction(crop, region, "humid, 30°C"))

print("\n Smart Irrigation Advice:\n", simulate_irrigation_advice(crop, "Low", "Dry and sunny next week"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Field Report:
 
As an expert agricultural officer, provide a DAILY FIELD REPORT.

Crop: Wheat
Region: Punjab
Season: Rabi

Include:
- Current crop growth stage
- Observed pest or disease issues (if any)
- Soil and moisture conditions
- Weather impact on the crop
- Clear and actionable advice to farmers (short points)

Only provide the final report. DO NOT restate the prompt.

---

Date: 15th March 2023

Subject: Daily Field Report - Wheat Crop, Punjab, Rabi Season

Current Crop Growth Stage: The wheat crop in Punjab has entered the dough stage. The grains have become soft and doughy in texture, and the plants are turning golden brown. The crop is almost ready for harvest.

Observed Pest or Disease Issues: Minimal pest or disease issues have been reported in the wheat crop. The occasional occurrence of Hessian fly infestation and Fusarium head blight has been noted in some areas. Farmers are advised to scout their fields regularly and take necessary measures to control these pests and

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Pest Prediction:
 
As an agricultural pest prediction model:

Crop: Wheat
Region: Punjab
Current Weather: humid, 30°C

Predict likely pests based on climate conditions.
Advise farmers in short actionable points on how to prevent pest outbreaks.

Avoid restating the input, only give prediction and suggestions.

Based on the current climate conditions in Punjab, the following pests may pose a risk to wheat crops:
1. Armyworms (Mydesia spp.)
   - Actionable points:
     - Scout fields regularly for armyworms, as they can be difficult to detect until large infestations occur.
     - Consider using insecticidal baits to control armyworms, especially in areas with a history of infestations.
     - Implement cultural practices, such as crop rotation and tillage, to reduce the risk of armyworm infestations.

2. Stem rust (Puccinia graminis)
   - Actionable points:
     - Monitor fields closely for early signs of rust, such as orange or red pustules on the stems and leaves.
     - Consider us

In [25]:
def generate_farmer_conversation(crop, issue):
    prompt = f"""
Create a realistic and natural-sounding conversation between two farmers about a problem with their {crop} crop.

The issue they are discussing is: {issue}.

Farmer 1 should describe the symptoms or problem they are seeing in the crop.
Farmer 2 should respond with personal experience, possible causes, and simple advice.

Use simple, rural-style English like real farmers would speak.
Don't make it too formal. Keep it casual, relatable, and practical.

Start like this:
Farmer 1: Hey, have you noticed something strange with the {crop} plants lately?
"""

    response = generator(prompt, max_new_tokens=350, temperature=0.85, do_sample=True)[0]['generated_text']
    return response.strip()

In [26]:
conversation = generate_farmer_conversation("tomato", "leaf curl disease")
print("👨‍🌾 Farmer Conversation:\n", conversation)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


👨‍🌾 Farmer Conversation:
 Create a realistic and natural-sounding conversation between two farmers about a problem with their tomato crop.

The issue they are discussing is: leaf curl disease.

Farmer 1 should describe the symptoms or problem they are seeing in the crop.
Farmer 2 should respond with personal experience, possible causes, and simple advice.

Use simple, rural-style English like real farmers would speak.
Don't make it too formal. Keep it casual, relatable, and practical.

Start like this:
Farmer 1: Hey, have you noticed something strange with the tomato plants lately?

Farmer 2: Yeah, I have. Some of mine are lookin' kindafunny these days.

Farmer 1: (nods) I'm seein' the same thing. Mostly the leaves are curlin' up, and they got these odd streaks on 'em.

Farmer 2: That's leaf curl disease, my friend. I've had it back in '98, and it ain't nothing pleasant.

Farmer 1: (frowning) I thought I took care of all the common ailments, but I guess not. What causes it?

Farmer 2: 

**Agricultural Chatbot**

In [27]:
# Pest Control Advice: "How do I control aphids on my beans?"
# Best Crop Practices: "What are the best irrigation practices for wheat?"
# Fertilizer Recommendations: "How much fertilizer should I use for corn?"
# Seasonal Tips: "When is the best time to plant rice in Madhya Pradesh?"

In [None]:
while True:
    query = input("👩‍🌾 Ask your agriculture-related question (or type 'exit' to quit): ")
    if query.lower() == 'exit':
        print("👋 Goodbye!")
        break
    answer = generate_answer(query)  # <-- Your function from RAG
    print("🤖 Chatbot:", answer)

👩‍🌾 Ask your agriculture-related question (or type 'exit' to quit):  How do I control aphids on my beans?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🤖 Chatbot: To control aphids on your beans, consider using a combination of beneficial insects and preventive measures. Release ladybird beetles (Coccinella septempunctata) and lacewings (Chrysoperla rufilabris) every 3 weeks as adults. Ladybird beetles prey on aphids and can sustain large populations during aphid outbreaks. Green lacewings produce rapid eradication of aphids and are
