In [13]:
import json
import requests
import time
import re
from typing import Dict, List, Any

class AyurvedaQAGenerator:
    def __init__(self, ollama_url="http://localhost:11434", model_name="gemma3n:e4b"):
        self.ollama_url = ollama_url
        self.model_name = model_name
        self.generated_qa = []
        
    def format_herb_data(self, herb_data: Dict) -> str:
        """Format herb data for prompt"""
        herb_name = herb_data.get("herb", "")
        scientific_name = herb_data.get("scientific_name", "")
        
        # Filter and format ailments
        uses_text = ""
        for ailment in herb_data.get("ailments", []):
            ailment_name = ailment.get("name", "")
            remedy = ailment.get("remedies", "")
            
            # Skip malformed entries
            if len(ailment_name) > 3 and len(remedy) > 10:
                # Skip entries that are clearly OCR errors or scientific names
                if not ailment_name.startswith("(") and "officinale" not in ailment_name:
                    uses_text += f"- {ailment_name}: {remedy}\n"
        
        return herb_name, scientific_name, uses_text
    
    def generate_herb_prompt(self, herb_data: Dict) -> str:
        """Generate dynamic prompt for each herb"""
        herb_name, scientific_name, uses_text = self.format_herb_data(herb_data)
        
        prompt = f"""You are creating educational Q&A pairs for a traditional medicine app that helps Indian families with home remedies. Based on this Ayurvedic herb information, generate exactly 15 question-answer pairs that would help a mother learn about natural treatments.
        Each question must include a reference to Ayurveda, Ayurvedic remedy, or home remedy context.

Herb Information:
Herb: {herb_name}
Scientific Name: {scientific_name}
Traditional Uses:
{uses_text}

Generate 15 Q&A pairs in this format:
Q1: [practical question a mother might ask]
A1: [helpful answer with specific remedy and dosage]

Q2: [another practical question]
A2: [helpful answer with specific remedy and dosage]

Continue this pattern for all 15 pairs. Cover different scenarios: family health issues, children's ailments, preparation methods, dosages, safety warnings, and prevention. Make questions natural and answers practical for home use with proper traditional Indian context."""
        
        return prompt
    
    def query_ollama(self, prompt: str, max_retries: int = 3) -> str:
        """Query Ollama API with retry logic"""
        for attempt in range(max_retries):
            try:
                response = requests.post(
                    f"{self.ollama_url}/api/generate",
                    json={
                        "model": self.model_name,
                        "prompt": prompt,
                        "stream": False,
                        "options": {
                            "temperature": 0.7,
                            "top_p": 0.9,
                            "max_tokens": 2000
                        }
                    },
                    timeout=120  # 2 minutes timeout
                )
                
                if response.status_code == 200:
                    result = response.json()
                    return result.get("response", "")
                else:
                    print(f"HTTP Error {response.status_code}: {response.text}")
                    
            except Exception as e:
                print(f"Attempt {attempt + 1} failed: {e}")
                if attempt < max_retries - 1:
                    time.sleep(5)  # Wait 5 seconds before retry
                
        return ""
    
    def parse_qa_response(self, response: str) -> List[Dict[str, str]]:
        """Parse Q&A pairs from Gemma response"""
        qa_pairs = []
        
        # Split by Q patterns
        questions = re.split(r'Q\d+:', response)[1:]  # Skip first empty split
        
        for i, q_section in enumerate(questions, 1):
            # Split question and answer
            parts = re.split(r'A\d+:', q_section, 1)
            if len(parts) == 2:
                question = parts[0].strip()
                answer = parts[1].strip()
                
                # Clean up answer (remove next question if present)
                answer = re.split(r'Q\d+:', answer)[0].strip()
                
                if question and answer:
                    qa_pairs.append({
                        "question": question,
                        "answer": answer
                    })
        
        return qa_pairs
    
    def generate_qa_for_herb(self, herb_data: Dict) -> Dict:
        """Generate Q&A pairs for a single herb"""
        herb_name, scientific_name, uses_text = self.format_herb_data(herb_data)
        
        print(f"\n🌿 Processing herb: {herb_name}")
        
        # Skip if no valid uses found
        if not uses_text.strip():
            print(f"⚠️  Skipping {herb_name} - no valid ailments found")
            return None
        
        # Generate prompt
        prompt = self.generate_herb_prompt(herb_data)
        
        # Query Ollama
        response = self.query_ollama(prompt)
        
        if not response:
            print(f"❌ Failed to generate Q&A for {herb_name}")
            return None
        
        # Parse response
        qa_pairs = self.parse_qa_response(response)
        
        if not qa_pairs:
            print(f"❌ Failed to parse Q&A for {herb_name}")
            return None
        
        print(f"✅ Generated {len(qa_pairs)} Q&A pairs for {herb_name}")
        
        return {
            "herb": herb_name,
            "scientific_name": scientific_name,
            "qa_pairs": qa_pairs,
            "source": "Ayurveda",
            "page": herb_data.get("page", ""),
            "ailment_count": len([a for a in herb_data.get("ailments", []) if len(a.get("name", "")) > 3])
        }
    
    def process_all_herbs(self, json_file_path: str, output_file_path: str):
        """Process all herbs in the JSON file"""
        print(f"📚 Loading data from: {json_file_path}")
        
        try:
            with open(json_file_path, 'r', encoding='utf-8') as f:
                herbs_data = json.load(f)
        except Exception as e:
            print(f"❌ Error loading JSON file: {e}")
            return
        
        print(f"📊 Found {len(herbs_data)} herbs to process")
        
        all_qa_data = []
        successful_herbs = 0
        
        for i, herb_data in enumerate(herbs_data):
            print(f"\n📍 Progress: {i+1}/{len(herbs_data)}")
            
            # Generate Q&A for this herb
            herb_qa = self.generate_qa_for_herb(herb_data)
            
            if herb_qa:
                all_qa_data.append(herb_qa)
                successful_herbs += 1
            
            # Add delay between requests to be nice to the API
            time.sleep(2)
        
        # Save results
        try:
            with open(output_file_path, 'w', encoding='utf-8') as f:
                json.dump(all_qa_data, f, indent=2, ensure_ascii=False)
            
            print(f"\n🎉 SUCCESS!")
            print(f"✅ Processed {successful_herbs}/{len(herbs_data)} herbs successfully")
            print(f"💾 Saved to: {output_file_path}")
            
            # Calculate total Q&A pairs
            total_qa = sum(len(herb["qa_pairs"]) for herb in all_qa_data)
            print(f"📊 Total Q&A pairs generated: {total_qa}")
            
        except Exception as e:
            print(f"❌ Error saving results: {e}")

In [14]:
input_file = "/Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/ayurveda/ayurveda_structured_data_extract.json"
output_file = "/Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/ayurveda/ayurveda_qa_dataset.json"

In [15]:
qa_generator = AyurvedaQAGenerator()

In [16]:
with open(input_file, 'r', encoding='utf-8') as f:
        herbs_data = json.load(f)

In [17]:
test_result = qa_generator.generate_qa_for_herb(herbs_data[0])


🌿 Processing herb: Adrak/ Sonth
✅ Generated 15 Q&A pairs for Adrak/ Sonth


In [18]:
test_result

{'herb': 'Adrak/ Sonth',
 'scientific_name': '( Zingiber officinale',
 'qa_pairs': [{'question': "My child has a persistent cough and cold. In Ayurveda, what's a natural home remedy using Adrak that can help soothe their respiratory system?",
   'answer': 'In Ayurveda, Adrak (ginger) is excellent for cough and cold. Give your child 2-5 gm of dry ginger powder mixed with jaggery, three times a day in divided doses. You can also prepare a 10ml decoction from a piece of ginger to give in the morning to help prevent recurrent attacks.'},
  {'question': 'My mother-in-law complains of stomach pain and indigestion. Can Adrak be used as a home remedy in Ayurveda for this?',
   'answer': 'Yes, Adrak is traditionally used for indigestion. She can take 5 gm of crushed Adrak rhizome with salt or jaggery twice daily, especially before meals. This helps stimulate digestion.'},
  {'question': 'My voice is hoarse. Does Ayurveda recommend any home remedy involving Adrak for this?',
   'answer': 'Yes, f

In [None]:

    # Initialize generator
    qa_generator = AyurvedaQAGenerator()
    
    # Test with first herb only (for debugging)
    print("🧪 Testing with first herb...")
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            herbs_data = json.load(f)
        
        if herbs_data:
            test_result = qa_generator.generate_qa_for_herb(herbs_data[0])
            if test_result:
                print(f"✅ Test successful! Generated {len(test_result['qa_pairs'])} Q&A pairs")
                
                # Show sample Q&A
                if test_result['qa_pairs']:
                    sample_qa = test_result['qa_pairs'][0]
                    print(f"\n📝 Sample Q&A:")
                    print(f"Q: {sample_qa['question']}")
                    print(f"A: {sample_qa['answer'][:150]}...")
                
                # Ask user if they want to continue with all herbs
                continue_all = input("\n🤔 Continue with all herbs? (y/n): ").lower().strip()
                if continue_all == 'y':
                    qa_generator.process_all_herbs(input_file, output_file)
                else:
                    print("✋ Stopped after test. Adjust settings if needed.")
            else:
                print("❌ Test failed. Check your Ollama setup and model.")
        
    except Exception as e:
        print(f"❌ Error: {e}")

if __name__ == "__main__":
    main()

In [20]:
# Generate Q&A for all herbs
all_qa_results = []

for herb in herbs_data:
    qa_result = qa_generator.generate_qa_for_herb(herb)
    all_qa_results.append(qa_result)


🌿 Processing herb: Adrak/ Sonth
✅ Generated 15 Q&A pairs for Adrak/ Sonth

🌿 Processing herb: ATYAIN
✅ Generated 15 Q&A pairs for ATYAIN

🌿 Processing herb: ANAR
✅ Generated 15 Q&A pairs for ANAR

🌿 Processing herb: AMLA
✅ Generated 15 Q&A pairs for AMLA

🌿 Processing herb: DALCHINI
✅ Generated 15 Q&A pairs for DALCHINI

🌿 Processing herb: DHANIA
✅ Generated 15 Q&A pairs for DHANIA

🌿 Processing herb: ELAICHI
✅ Generated 15 Q&A pairs for ELAICHI

🌿 Processing herb: GHEE
✅ Generated 15 Q&A pairs for GHEE

🌿 Processing herb: HALDI
✅ Generated 15 Q&A pairs for HALDI

🌿 Processing herb: HING
✅ Generated 15 Q&A pairs for HING

🌿 Processing herb: TAYPHAL
✅ Generated 15 Q&A pairs for TAYPHAL

🌿 Processing herb: 
✅ Generated 15 Q&A pairs for 

🌿 Processing herb: KALIMIRACH
✅ Generated 15 Q&A pairs for KALIMIRACH

🌿 Processing herb: KARELA
✅ Generated 15 Q&A pairs for KARELA

🌿 Processing herb: LAHSUN
✅ Generated 15 Q&A pairs for LAHSUN

🌿 Processing herb: LAUNG
✅ Generated 15 Q&A pairs for LA

In [21]:
1

1

In [22]:
all_qa_results[10]

{'herb': 'TAYPHAL',
 'scientific_name': '(Myristica fragmns',
 'qa_pairs': [{'question': "My child is suffering from diarrhea. I've heard of natural remedies in Ayurveda. Can you tell me how Tayphal can help?",
   'answer': 'Yes, in Ayurveda, Tayphal is known to help with diarrhea in children. You can give a pinch of Tayphal powder or fruit rubbed on a clean surface, mixed with milk or water, 3-4 times a day.'},
  {'question': 'My daughter has been very irritable lately, especially during her periods. Is there a home remedy using Tayphal that might help calm her down?',
   'answer': "Absolutely. Tayphal has mild sedative properties. Give her 1-2 pinches of Tayphal powder mixed with milk, 3-4 times a day, if she's restless and irritable. This is a common Ayurvedic practice for calming the mind."},
  {'question': "I've noticed some dark spots on my face, particularly around my eyes. Is this common, and can Tayphal help?",
   'answer': 'Yes, dark pigmentation is often seen in women, espec

In [None]:
with open("../datasets/ayurveda/ayurveda_qa_dataset.json", 'w', encoding='utf-8') as f:
    json.dump(all_qa_results, f, ensure_ascii=False, indent=2)