In [1]:
import json
import requests
import time
import re
from typing import Dict, List, Any

class DisasterManagementQAGenerator:
    def __init__(self, ollama_url="http://localhost:11434", model_name="gemma3n:e4b"):
        self.ollama_url = ollama_url
        self.model_name = model_name
        self.generated_qa = []
        
    def format_disaster_data(self, record: Dict) -> tuple:
        """Format disaster data for prompt"""
        disaster_type = record.get("disaster_type", "")
        content = record.get("content", "")
        pages = record.get("pages", [])
        source = record.get("source", "Disaster Management India")
        
        return disaster_type, content, pages, source
    
    def generate_disaster_prompt(self, record: Dict) -> str:
        """Generate prompt for disaster type (20 Q&A pairs)"""
        disaster_type, content, pages, source = self.format_disaster_data(record)
        
        # Check if this is the support system record (not a disaster)
        if "Emergency Response Support System" in disaster_type:
            prompt = f"""You are creating educational Q&A pairs for Indian citizens learning about emergency response systems. Based on this information about India's emergency response support system, generate exactly 20 question-answer pairs that would help Indian citizens understand and use emergency services effectively.

Each question must reference emergency response, emergency services, or emergency preparedness in Indian context.

Emergency System Information:
System: {disaster_type}
Content: {content}
Source: {source}

Generate 20 Q&A pairs in this format:
Q1: [question]
A1: [answer]

Q2: [question]
A2: [answer]

Continue for all 20 pairs. Cover comprehensive aspects of emergency response system including how to use 112, mobile app features, different emergency services, response procedures, and citizen guidance for Indian emergency management."""
        else:
            prompt = f"""You are creating educational Q&A pairs for Indian citizens learning about disaster management and preparedness. Based on this disaster management information, generate exactly 20 question-answer pairs that would help Indian citizens prepare for, respond to, and recover from disasters.

Each question must reference disaster management, disaster preparedness, or emergency response in Indian context.

Disaster Information:
Disaster Type: {disaster_type}
Content: {content}
Source: {source}

Generate 20 Q&A pairs in this format:
Q1: [question]
A1: [answer]

Q2: [question]
A2: [answer]

Continue for all 20 pairs. Cover comprehensive aspects of {disaster_type} including preparation, safety measures, response actions, evacuation procedures, post-disaster recovery, and safety guidelines for Indian disaster management."""
        
        return prompt
    
    def query_ollama(self, prompt: str, max_retries: int = 10) -> str:
        """Query Ollama API with enhanced retry logic"""
        for attempt in range(max_retries):
            try:
                response = requests.post(
                    f"{self.ollama_url}/api/generate",
                    json={
                        "model": self.model_name,
                        "prompt": prompt,
                        "stream": False,
                        "options": {
                            "temperature": 0.7,
                            "top_p": 0.9,
                            "max_tokens": 4000
                        }
                    },
                    timeout=1000  # 3 minutes timeout
                )
                
                if response.status_code == 200:
                    result = response.json()
                    return result.get("response", "")
                else:
                    print(f"HTTP Error {response.status_code}: {response.text}")
                    
            except Exception as e:
                print(f"Attempt {attempt + 1}/{max_retries} failed: {e}")
                if attempt < max_retries - 1:
                    print(f"⏳ Waiting 60 seconds for Ollama to recover...")
                    time.sleep(60)  # Wait 60 seconds for Ollama to recover
                
        print(f"❌ Failed after {max_retries} attempts. Ollama may need manual restart.")
        return ""
    
    def parse_qa_response(self, response: str) -> List[Dict[str, str]]:
        """Parse Q&A pairs from response"""
        qa_pairs = []
        
        # Split by Q patterns
        questions = re.split(r'Q\d+:', response)[1:]
        
        for i, q_section in enumerate(questions, 1):
            # Split question and answer
            parts = re.split(r'A\d+:', q_section, 1)
            if len(parts) == 2:
                question = parts[0].strip()
                answer = parts[1].strip()
                
                # Clean up answer (remove next question if present)
                answer = re.split(r'Q\d+:', answer)[0].strip()
                
                if question and answer:
                    qa_pairs.append({
                        "question": question,
                        "answer": answer
                    })
        
        return qa_pairs
    
    def generate_qa_for_disaster(self, record: Dict) -> Dict:
        """Generate 20 Q&A pairs for a disaster type"""
        disaster_type, content, pages, source = self.format_disaster_data(record)
        
        print(f"\n🚨 Processing: {disaster_type}")
        
        if not content.strip():
            print(f"⚠️  Skipping {disaster_type} - no content found")
            return None
        
        prompt = self.generate_disaster_prompt(record)
        response = self.query_ollama(prompt)
        
        if not response:
            print(f"❌ Failed to generate Q&A for {disaster_type}")
            return None
        
        qa_pairs = self.parse_qa_response(response)
        
        if not qa_pairs:
            print(f"❌ Failed to parse Q&A for {disaster_type}")
            return None
        
        print(f"✅ Generated {len(qa_pairs)} Q&A pairs for {disaster_type}")
        
        # Determine if it's emergency system or disaster
        category = "emergency_system" if "Emergency Response Support System" in disaster_type else "disaster"
        
        return {
            "disaster_type": disaster_type,
            "category": category,
            "qa_pairs": qa_pairs,
            "pages": pages,
            "source": source,
            "content_length": len(content)
        }
    
    def process_all_disasters(self, json_file_path: str, output_file_path: str):
        """Process all disaster records - generate 20 Q&A per disaster"""
        print(f"🚨 Loading disaster management data from: {json_file_path}")
        
        try:
            with open(json_file_path, 'r', encoding='utf-8') as f:
                records = json.load(f)
        except Exception as e:
            print(f"❌ Error loading JSON file: {e}")
            return
        
        print(f"📊 Found {len(records)} disaster management records to process")
        
        all_qa_data = []
        successful_disasters = 0
        disaster_count = 0
        emergency_system_count = 0
        
        # Process each disaster/emergency system
        for i, record in enumerate(records):
            disaster_type = record.get("disaster_type", f"Record_{i}")
            print(f"\n📍 Progress: {i+1}/{len(records)} - {disaster_type}")
            
            # Generate Q&A for this disaster/system
            disaster_qa = self.generate_qa_for_disaster(record)
            
            if disaster_qa:
                all_qa_data.append(disaster_qa)
                successful_disasters += 1
                
                # Count by category
                if disaster_qa["category"] == "emergency_system":
                    emergency_system_count += 1
                else:
                    disaster_count += 1
            
            # Rate limiting
            time.sleep(3)
        
        # Save results
        try:
            with open(output_file_path, 'w', encoding='utf-8') as f:
                json.dump(all_qa_data, f, indent=2, ensure_ascii=False)
            
            print(f"\n🎉 SUCCESS!")
            print(f"✅ Processed {successful_disasters}/{len(records)} records successfully")
            print(f"🚨 Disasters processed: {disaster_count}")
            print(f"📞 Emergency systems processed: {emergency_system_count}")
            print(f"💾 Saved to: {output_file_path}")
            
            # Calculate total Q&A pairs
            total_qa = sum(len(item["qa_pairs"]) for item in all_qa_data)
            print(f"📊 Total Q&A pairs generated: {total_qa}")
            print(f"📈 Expected: {len(records)} × 20 = {len(records) * 20}")
            
        except Exception as e:
            print(f"❌ Error saving results: {e}")

# # Usage example:
# if __name__ == "__main__":
#     # Initialize the generator
#     qa_generator = DisasterManagementQAGenerator()
    
#     # Process all disaster records
#     input_file = "disaster_management.json"  # Your input file
#     output_file = "disaster_qa_results.json"  # Output file
    
#     qa_generator.process_all_disasters(input_file, output_file)
    
#     # Or test single record:
#     sample_record = {
#         "disaster_type": "Cyclone: Do's & Dont's",
#         "content": "Before the Cyclone season: Check the house; secure loose tiles and carry out repairs of doors and windows Remove dead branches or dying trees close to the house...",
#         "pages": [1, 2, 3],
#         "source": "Disaster Management India"
#     }
    
#     # Test single disaster (20 Q&A)
#     print("🧪 Testing single disaster...")
#     test_result = qa_generator.generate_qa_for_disaster(sample_record)
#     if test_result:
#         print(f"✅ Test successful! Generated {len(test_result['qa_pairs'])} Q&A pairs")
#         print(f"Category: {test_result['category']}")
#         for i, qa in enumerate(test_result['qa_pairs'][:2], 1):
#             print(f"\nQ{i}: {qa['question']}")
#             print(f"A{i}: {qa['answer'][:100]}...")
#     else:
#         print("❌ Test failed")



In [2]:
qa_generator = DisasterManagementQAGenerator()

In [3]:
input_file = "/Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/disaster_management/disaster_management_structured_data_extract.json"
output_file = "/Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/disaster_management/disaster_management_qa_dataset.json"

In [4]:
qa_generator.process_all_disasters(input_file, output_file)

🚨 Loading disaster management data from: /Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/disaster_management/disaster_management_structured_data_extract.json
📊 Found 8 disaster management records to process

📍 Progress: 1/8 - Cyclone: Do's & Dont's

🚨 Processing: Cyclone: Do's & Dont's
✅ Generated 20 Q&A pairs for Cyclone: Do's & Dont's

📍 Progress: 2/8 - Tsunami

🚨 Processing: Tsunami
✅ Generated 20 Q&A pairs for Tsunami

📍 Progress: 3/8 - Heat wave: Do's & Dont's

🚨 Processing: Heat wave: Do's & Dont's
✅ Generated 20 Q&A pairs for Heat wave: Do's & Dont's

📍 Progress: 4/8 - Landslide: Do's & Dont's

🚨 Processing: Landslide: Do's & Dont's
✅ Generated 20 Q&A pairs for Landslide: Do's & Dont's

📍 Progress: 5/8 - Urban Floods: Do’s & Don’ts

🚨 Processing: Urban Floods: Do’s & Don’ts
✅ Generated 20 Q&A pairs for Urban Floods: Do’s & Don’ts

📍 Progress: 6/8 - Floods: Do’s & Don’ts

🚨 Processing: Floods: Do’s & Don’ts
✅ Generated 20 Q&A pairs for Floods: Do’s & Don’ts

📍 Pro

In [37]:
1

1

In [None]:
# # Usage example:
# if __name__ == "__main__":
#     # Initialize the generator
#     qa_generator = RiceDiseaseQAGenerator()
    
#     # Process all rice disease records
#     input_file = "rice_diseases.json"  # Your input file
#     output_file = "rice_disease_qa_results.json"  # Output file
    
#     qa_generator.process_all_rice_diseases(input_file, output_file)
    
    # # Or test single record:
    # sample_record = {
    #     "disease": "Blast",
    #     "causal_organism": "Pyricularia oryzae", 
    #     "main_heading": "Symptoms",
    #     "sub_heading": "",
    #     "content": "All aboveground parts of the rice plant (leaves, leaf collar, culm, culm nodes, neck, and panicle) are attacked by the fungus initial symptoms are white to gray-green lesions or spots with brown borders Small specks originate on leaves - subsequently enlarge into spindle shaped spots(0.5 to 1.5cm length, 0.3 to 0.5cm width) with ashy center.",
    #     "pages": [1, 2],
    #     "images": [],
    #     "source": "Rice Diseases"
    # }
    
    # # Test single record (5 Q&A)
    # print("🧪 Testing single record...")
    # test_result = qa_generator.generate_qa_for_record(sample_record)
    # if test_result:
    #     print(f"✅ Test successful! Generated {len(test_result['qa_pairs'])} Q&A pairs")
    #     for i, qa in enumerate(test_result['qa_pairs'][:2], 1):
    #         print(f"\nQ{i}: {qa['question']}")
    #         print(f"A{i}: {qa['answer'][:100]}...")
    # else:
    #     print("❌ Test failed")