In [1]:
import json
import requests
import time
import re
from typing import Dict, List, Any

class DepressionQAGenerator:
    def __init__(self, ollama_url="http://localhost:11434", model_name="gemma3n:e4b"):
        self.ollama_url = ollama_url
        self.model_name = model_name
        self.generated_qa = []
        
    def format_depression_data(self, depression_data: Dict) -> str:
        """Format depression data for prompt"""
        main_topic = depression_data.get("main_topic", "")
        sub_topic = depression_data.get("sub_topic", "")
        content = depression_data.get("content", "")
        source = depression_data.get("source", "Mental Health Guide")
        
        return main_topic, sub_topic, content, source
    
    def generate_depression_prompt(self, depression_data: Dict) -> str:
        """Generate dynamic prompt for each depression record"""
        main_topic, sub_topic, content, source = self.format_depression_data(depression_data)
        
        prompt = f"""You are creating educational Q&A pairs for a mental health app that helps families understand depression. Based on this depression information, generate exactly 5 question-answer pairs that would help someone learn about depression and mental health.

Each question must include a reference to depression, mental health, or emotional wellbeing context.

Depression Information:
Source: {source}
Topic: {main_topic}
Subtopic: {sub_topic}
Content: {content}

Generate 5 Q&A pairs in this format:
Q1: [practical question about depression someone might ask]
A1: [helpful answer with specific information]

Q2: [another practical question about depression]
A2: [helpful answer with specific information]

Q3: [practical question about depression symptoms or signs]
A3: [helpful answer with specific information]

Q4: [practical question about depression treatment or support]
A4: [helpful answer with specific information]

Q5: [practical question about helping others with depression]
A5: [helpful answer with specific information]

Cover different aspects: understanding depression, recognizing symptoms, treatment options, helping others, and getting support. Make questions natural and answers practical for real-life situations."""
        
        return prompt
    
    def query_ollama(self, prompt: str, max_retries: int = 3) -> str:
        """Query Ollama API with retry logic"""
        for attempt in range(max_retries):
            try:
                response = requests.post(
                    f"{self.ollama_url}/api/generate",
                    json={
                        "model": self.model_name,
                        "prompt": prompt,
                        "stream": False,
                        "options": {
                            "temperature": 0.7,
                            "top_p": 0.9,
                            "max_tokens": 1500
                        }
                    },
                    timeout=120  # 2 minutes timeout
                )
                
                if response.status_code == 200:
                    result = response.json()
                    return result.get("response", "")
                else:
                    print(f"HTTP Error {response.status_code}: {response.text}")
                    
            except Exception as e:
                print(f"Attempt {attempt + 1} failed: {e}")
                if attempt < max_retries - 1:
                    time.sleep(5)  # Wait 5 seconds before retry
                
        return ""
    
    def parse_qa_response(self, response: str) -> List[Dict[str, str]]:
        """Parse Q&A pairs from Gemma response"""
        qa_pairs = []
        
        # Split by Q patterns
        questions = re.split(r'Q\d+:', response)[1:]  # Skip first empty split
        
        for i, q_section in enumerate(questions, 1):
            # Split question and answer
            parts = re.split(r'A\d+:', q_section, 1)
            if len(parts) == 2:
                question = parts[0].strip()
                answer = parts[1].strip()
                
                # Clean up answer (remove next question if present)
                answer = re.split(r'Q\d+:', answer)[0].strip()
                
                if question and answer:
                    qa_pairs.append({
                        "question": question,
                        "answer": answer
                    })
        
        return qa_pairs
    
    def generate_qa_for_depression_record(self, depression_data: Dict) -> Dict:
        """Generate Q&A pairs for a single depression record"""
        main_topic, sub_topic, content, source = self.format_depression_data(depression_data)
        
        print(f"\n🧠 Processing topic: {main_topic}")
        
        # Skip if no content found
        if not content.strip():
            print(f"⚠️  Skipping {main_topic} - no content found")
            return None
        
        # Generate prompt
        prompt = self.generate_depression_prompt(depression_data)
        
        # Query Ollama
        response = self.query_ollama(prompt)
        
        if not response:
            print(f"❌ Failed to generate Q&A for {main_topic}")
            return None
        
        # Parse response
        qa_pairs = self.parse_qa_response(response)
        
        if not qa_pairs:
            print(f"❌ Failed to parse Q&A for {main_topic}")
            return None
        
        print(f"✅ Generated {len(qa_pairs)} Q&A pairs for {main_topic}")
        
        return {
            "main_topic": main_topic,
            "sub_topic": sub_topic,
            "source": source,
            "qa_pairs": qa_pairs,
            "pages": depression_data.get("pages", []),
            "content_length": len(content)
        }
    
    def process_all_depression_records(self, json_file_path: str, output_file_path: str):
        """Process all depression records in the JSON file"""
        print(f"📚 Loading data from: {json_file_path}")
        
        try:
            with open(json_file_path, 'r', encoding='utf-8') as f:
                depression_data = json.load(f)
        except Exception as e:
            print(f"❌ Error loading JSON file: {e}")
            return
        
        print(f"📊 Found {len(depression_data)} records to process")
        
        all_qa_data = []
        successful_records = 0
        
        for i, record in enumerate(depression_data):
            print(f"\n📍 Progress: {i+1}/{len(depression_data)}")
            
            # Generate Q&A for this record
            record_qa = self.generate_qa_for_depression_record(record)
            
            if record_qa:
                all_qa_data.append(record_qa)
                successful_records += 1
            
            # Add delay between requests to be nice to the API
            time.sleep(2)
        
        # Save results
        try:
            with open(output_file_path, 'w', encoding='utf-8') as f:
                json.dump(all_qa_data, f, indent=2, ensure_ascii=False)
            
            print(f"\n🎉 SUCCESS!")
            print(f"✅ Processed {successful_records}/{len(depression_data)} records successfully")
            print(f"💾 Saved to: {output_file_path}")
            
            # Calculate total Q&A pairs
            total_qa = sum(len(record["qa_pairs"]) for record in all_qa_data)
            print(f"📊 Total Q&A pairs generated: {total_qa}")
            
        except Exception as e:
            print(f"❌ Error saving results: {e}")

# Usage example:
# if __name__ == "__main__":
#     # Initialize the generator
#     qa_generator = DepressionQAGenerator()
    
#     # Process all depression records
#     input_file = "depression_data.json"  # Your input file
#     output_file = "depression_qa_results.json"  # Output file
    
#     qa_generator.process_all_depression_records(input_file, output_file)
    
    # Or process a single record for testing:
    # sample_record = {
    #     "main_topic": "Contents",
    #     "sub_topic": "",
    #     "content": "What is depression? What are the different forms of depression?...",
    #     "pages": [3, 4],
    #     "source": "NIMH Depression Guide"
    # }
    # 
    # test_result = qa_generator.generate_qa_for_depression_record(sample_record)
    # print(json.dumps(test_result, indent=2))



In [3]:
# Or process a single record for testing:
qa_generator = DepressionQAGenerator()
sample_record = {
    "main_topic": "What Is Depression?",
    "sub_topic": "",
    "content": "Everyone occasionally feels blue or sad, but these feelings are usually fleeting and pass within a couple of days. When a person has a depressive disorder, it interferes with daily life, normal functioning, and causes pain for both the person with the disorder and those who care about him or her. Depression is a common but serious illness, and most who experience it need treatment to get better. Many people with a depressive illness never seek treat- ment. But the vast majority, even those with the most severe depression, can get better with treatment. Intensive research into the illness has resulted in the development of medications, psychotherapies, and other methods to treat people with this disabling disorder.",
    "pages": [4],
    "source": "NIMH Depression Guide"
}

test_result = qa_generator.generate_qa_for_depression_record(sample_record)
print(json.dumps(test_result, indent=2))


🧠 Processing topic: What Is Depression?
✅ Generated 5 Q&A pairs for What Is Depression?
{
  "main_topic": "What Is Depression?",
  "sub_topic": "",
  "source": "NIMH Depression Guide",
  "qa_pairs": [
    {
      "question": "I sometimes feel sad for a few days, but it doesn't really affect my ability to do things. How is that different from depression, and when should I be concerned about my mental health?",
      "answer": "It's normal to feel down occasionally. However, depression is different because it's a persistent feeling of sadness or loss of interest that interferes with your daily life \u2013 things like work, school, or relationships. If these feelings last for more than a couple of weeks and are impacting your ability to function, it's important to seek support. Depression is a serious illness that needs treatment."
    },
    {
      "question": "I've heard that depression is common. Does that mean it's not a big deal, or that there's a good chance I can recover?",
     

In [4]:
# Initialize the generator
qa_generator = DepressionQAGenerator()
    
# Process all depression records
input_file = "/Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/depression/depression_structured_data_extract.json"  # Your input file
output_file = "/Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/depression/depression_qa_dataset.json"  # Output file

qa_generator.process_all_depression_records(input_file, output_file)

📚 Loading data from: /Users/saikumarallaka/kaggle/gemma_3n_impact_challenge/datasets/depression/depression_structured_data_extract.json
📊 Found 25 records to process

📍 Progress: 1/25

🧠 Processing topic: Contents
✅ Generated 5 Q&A pairs for Contents

📍 Progress: 2/25

🧠 Processing topic: What Is Depression?
✅ Generated 5 Q&A pairs for What Is Depression?

📍 Progress: 3/25

🧠 Processing topic: What are the different forms of depression?
✅ Generated 5 Q&A pairs for What are the different forms of depression?

📍 Progress: 4/25

🧠 Processing topic: What are the symptoms of depression?
✅ Generated 5 Q&A pairs for What are the symptoms of depression?

📍 Progress: 5/25

🧠 Processing topic: What illnesses often co-exist with depression?
✅ Generated 5 Q&A pairs for What illnesses often co-exist with depression?

📍 Progress: 6/25

🧠 Processing topic: What causes depression?
✅ Generated 5 Q&A pairs for What causes depression?

📍 Progress: 7/25

🧠 Processing topic: What causes depression?
✅ Gener