In [4]:
# Real GPT Response Test - Test actual GPT responses to different prompts

# Cell 1: Setup with actual GPT service
import sys
import os
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables
load_dotenv()

# Add your project directory to path
project_directory = "../"  # Adjust as needed
sys.path.append(project_directory)

try:
    from pdf_processor import PDFProcessor
    from gpt_service import GPTService
    from response_handlers import ResponseHandlers
    print("✅ All modules imported successfully!")
except ImportError as e:
    print(f"❌ Import Error: {e}")
    print("Make sure you're running from the right directory")

# Initialize services
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    print("❌ OPENAI_API_KEY not found in environment variables")
    print("Make sure you have a .env file with your API key")
else:
    client = OpenAI(api_key=api_key)
    response_handlers = ResponseHandlers()
    gpt_service = GPTService(client, response_handlers)
    pdf_processor = PDFProcessor()
    print("✅ GPT Service initialized successfully!")

# Cell 2: Extract text from PDF (same as before)
def extract_resume_text():
    """Extract text from the test resume"""
    pdf_path = "test.pdf"
    
    if not os.path.exists(pdf_path):
        print(f"❌ PDF file '{pdf_path}' not found!")
        return None
    
    with open(pdf_path, 'rb') as file:
        extracted_text = pdf_processor.extract_text_from_pdf(file)
    
    print(f"✅ Extracted {len(extracted_text)} characters from PDF")
    return extracted_text

extracted_text = extract_resume_text()

# Cell 3: Test actual GPT responses to different prompts
def test_real_gpt_responses(resume_text):
    """Test actual GPT responses to different user requests"""
    
    if not resume_text:
        print("❌ No resume text to test with")
        return
    
    test_cases = [
        {
            "user_message": "can you rewrite the sections in this resume?",
            "expected_behavior": "Should rewrite/restructure sections",
            "keywords_to_check": ["rewrite", "improved", "revised", "better"]
        },
        {
            "user_message": "analyze this resume and give me feedback",
            "expected_behavior": "Should provide analysis and feedback",
            "keywords_to_check": ["analysis", "feedback", "suggestions", "improve"]
        },
        {
            "user_message": "what skills should I add to this resume?",
            "expected_behavior": "Should suggest specific skills to add",
            "keywords_to_check": ["skills", "add", "consider", "include"]
        },
        {
            "user_message": "make this resume more suitable for data science roles",
            "expected_behavior": "Should tailor for data science",
            "keywords_to_check": ["data science", "machine learning", "analytics", "python"]
        }
    ]
    
    print("🧪 Testing Real GPT Responses")
    print("=" * 70)
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n📝 Test {i}: {test_case['user_message']}")
        print("-" * 50)
        print(f"Expected: {test_case['expected_behavior']}")
        
        try:
            # Create the full prompt (similar to what app.py should do)
            full_prompt = f"{test_case['user_message']}\n\nHere's the resume content:\n\n{resume_text}"
            
            # Call the actual GPT service
            response = gpt_service.chat_about_resumes(
                full_prompt,
                {},  # empty user_info
                ""   # empty chat_history
            )
            
            print(f"\n🤖 GPT Response (first 200 chars):")
            print(f"'{response[:200]}...'")
            
            # Check if response seems to address the request
            response_lower = response.lower()
            user_message_lower = test_case['user_message'].lower()
            
            # Check if GPT response relates to the user's request
            keywords_found = [kw for kw in test_case['keywords_to_check'] if kw in response_lower]
            
            print(f"\n🔍 Analysis:")
            print(f"   Keywords found: {keywords_found}")
            
            # Simple check: does the response seem relevant?
            if len(keywords_found) >= 1:
                print(f"   ✅ Response seems relevant to request")
            else:
                print(f"   ❌ Response may not be addressing the specific request")
                print(f"   Expected keywords: {test_case['keywords_to_check']}")
            
            # Check if it's giving generic feedback vs following instruction
            generic_phrases = ["overall structure", "well-organized", "hiring managers", "comprehensive"]
            generic_count = sum(1 for phrase in generic_phrases if phrase in response_lower)
            
            if generic_count >= 2:
                print(f"   ⚠️  Warning: Response contains {generic_count} generic feedback phrases")
                print(f"   This might indicate it's giving generic analysis instead of following the specific instruction")
            
        except Exception as e:
            print(f"❌ Error calling GPT service: {e}")
        
        print("\n" + "="*50)

# Run the real GPT test
if extracted_text and 'gpt_service' in locals():
    test_real_gpt_responses(extracted_text)
else:
    print("❌ Cannot run GPT tests - missing resume text or GPT service")

# Cell 4: Test the difference between requests
def compare_responses():
    """Compare responses to see if they're actually different"""
    
    if not extracted_text:
        return
    
    print("\n🔄 RESPONSE COMPARISON TEST")
    print("=" * 70)
    
    # Two very different requests
    request1 = "can you rewrite the sections in this resume?"
    request2 = "what skills should I add to this resume?"
    
    try:
        prompt1 = f"{request1}\n\nHere's the resume:\n\n{extracted_text}"
        prompt2 = f"{request2}\n\nHere's the resume:\n\n{extracted_text}"
        
        response1 = gpt_service.chat_about_resumes(prompt1, {}, "")
        response2 = gpt_service.chat_about_resumes(prompt2, {}, "")
        
        print(f"Request 1: {request1}")
        print(f"Response 1 preview: {response1[:150]}...")
        print()
        print(f"Request 2: {request2}")
        print(f"Response 2 preview: {response2[:150]}...")
        
        # Check if responses are meaningfully different
        if response1[:100] == response2[:100]:
            print("\n❌ PROBLEM: Responses are identical or very similar!")
            print("This suggests GPT is ignoring the specific instruction")
        else:
            print("\n✅ Responses are different - GPT is following instructions")
            
    except Exception as e:
        print(f"❌ Error in comparison test: {e}")

# Run comparison test
if extracted_text and 'gpt_service' in locals():
    compare_responses()

# Cell 5: Diagnosis and Recommendations
def print_diagnosis():
    """Print diagnosis based on test results"""
    
    print("\n📊 DIAGNOSIS & RECOMMENDATIONS")
    print("=" * 70)
    
    print("\n🔍 What to Look For in the Test Results:")
    print("1. Do responses vary based on the user's request?")
    print("2. Does 'rewrite sections' actually rewrite vs just analyze?") 
    print("3. Does 'add skills' suggest skills vs give general feedback?")
    print("4. Are responses tailored to the specific instruction?")
    
    print("\n✅ If Tests Show GPT Follows Instructions:")
    print("   → The issue is in app.py upload handler")
    print("   → Update app.py to use user's actual message")
    print("   → Update frontend to send user message with file")
    
    print("\n❌ If Tests Show GPT Ignores Instructions:")
    print("   → The issue might be in gpt_service.py")
    print("   → Check the system prompt in GPTService")
    print("   → The prompt might be too generic or overriding user instructions")
    
    print("\n🔧 Next Steps:")
    print("1. Review the test results above")
    print("2. If responses are different and relevant → fix app.py")
    print("3. If responses are same/generic → check gpt_service.py")
    print("4. Test with actual Flask app after fixes")

print_diagnosis()

incorrect startxref pointer(3)


✅ All modules imported successfully!
✅ GPT Service initialized successfully!
✅ Extracted 442 characters from PDF
🧪 Testing Real GPT Responses

📝 Test 1: can you rewrite the sections in this resume?
--------------------------------------------------
Expected: Should rewrite/restructure sections

🤖 GPT Response (first 200 chars):
'Sure, John! Here’s a revised version of your resume sections to enhance clarity, impact, and professionalism.

---

**John Smith**  
123 Main Street, Toronto, ON M5V 1A1  
Phone: (416) 555-1234 | Emai...'

🔍 Analysis:
   Keywords found: ['revised']
   ✅ Response seems relevant to request


📝 Test 2: analyze this resume and give me feedback
--------------------------------------------------
Expected: Should provide analysis and feedback

🤖 GPT Response (first 200 chars):
'Hi John! Thanks for sharing your resume content. Let's analyze each section and provide some actionable feedback to enhance it.

### **1. Contact Information**
Your contact information is well-