# HHPF Setup Verification

This notebook verifies that your environment is properly configured.

## 1. Import Core Dependencies

In [None]:
import sys
import numpy as np
import pandas as pd
import torch
import transformers
import xgboost
import spacy
import matplotlib.pyplot as plt
import seaborn as sns

print("✅ All core dependencies imported successfully")

## 2. Check Python Version

In [None]:
print(f"Python version: {sys.version}")
assert sys.version_info >= (3, 9), "Python 3.9+ required"
print("✅ Python version is compatible")

## 3. Check PyTorch and MPS

In [None]:
print(f"PyTorch version: {torch.__version__}")
print(f"MPS available: {torch.backends.mps.is_available()}")

if torch.backends.mps.is_available():
    print("✅ GPU acceleration available (MPS)")
    device = torch.device("mps")
else:
    print("⚠️ Using CPU - this is fine for most operations")
    device = torch.device("cpu")

print(f"Device: {device}")

## 4. Load spaCy Model

In [None]:
try:
    nlp = spacy.load("en_core_web_sm")
    print("✅ spaCy model loaded successfully")
    
    # Test it
    doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    print(f"Test entities extracted: {entities}")
except:
    print("❌ spaCy model not found. Run: python -m spacy download en_core_web_sm")

## 5. Check API Configuration

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

together_key = os.getenv("TOGETHER_API_KEY")
groq_key = os.getenv("GROQ_API_KEY")

if together_key and together_key != "your_together_api_key_here":
    print("✅ Together AI API key configured")
else:
    print("⚠️ Together AI API key not configured")

if groq_key and groq_key != "your_groq_api_key_here":
    print("✅ Groq API key configured")
else:
    print("⚠️ Groq API key not configured")

if not ((together_key and together_key != "your_together_api_key_here") or 
        (groq_key and groq_key != "your_groq_api_key_here")):
    print("\n❌ No API keys configured!")
    print("Edit .env file and add your API key")

## 6. Load HHPF Configuration

In [None]:
import sys
sys.path.append('..')

from src.utils import load_config

# Load configurations
datasets_config = load_config('datasets')
model_config = load_config('model')
features_config = load_config('features')

print("✅ All configuration files loaded")
print(f"\nDatasets configured: {list(datasets_config['datasets'].keys())}")
print(f"Default Llama model: {model_config['llama']['default_model']}")

## 7. Test Transformers Model Loading

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

print("Loading DeBERTa-v3-large (this may take a minute on first run)...")

try:
    tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large")
    # Note: We're not loading the full model here to save memory
    print("✅ DeBERTa tokenizer loaded successfully")
    print("Note: Full model will be loaded when needed for semantic entropy")
except Exception as e:
    print(f"⚠️ Could not load DeBERTa: {e}")
    print("This is needed for semantic entropy. Check your internet connection.")

## 8. Summary

In [None]:
print("="*60)
print("  Setup Verification Complete")
print("="*60)
print("\n✅ Your environment is ready for HHPF development!")
print("\nNext steps:")
print("1. Place your datasets in data/raw/")
print("2. Open 01_data_exploration.ipynb")
print("3. Start with a single domain (Math recommended)")