### Install Basic Requirements

In [None]:
# Install core requirements
%cd ..
!pip install -q -r requirements.txt


### Establish Paths

In [None]:
import os
from pathlib import Path
from src.path import PathRepository, PathValidator

def find_project_root() -> Path:
    """Find the project root by looking for the src directory."""
    # Start from the current directory
    current = Path.cwd()
    
    # Look for src directory in current and parent directories
    while current != current.parent:
        if (current / "src").exists() and \
           (current / "config").exists() and \
           (current / "notebooks").exists():
            return current
        current = current.parent
    
    raise RuntimeError("Could not find project root directory. Expected to find src/, config/, and notebooks/ directories.")

try:
    # Find project root automatically
    project_root = find_project_root()
    print(f"🔍 Found project root at: {project_root}")
    
    # Set up environment variables
    os.environ["PROJECT_ROOT"] = str(project_root)
    os.environ["USER_HOME"] = str(Path.home())
    os.environ["TEMP_DIR"] = str(Path(os.getenv("TEMP", "/tmp")))
    
    # Initialize path system
    print("🔄 Initializing path system...")
    paths = PathRepository.get_instance()
    
    # Validate paths
    print("🔍 Validating paths...")
    validator = PathValidator(paths.get_path("project_root"))
    validation_results = validator.validate_all_paths(paths.get_all_paths())
    
    # Create directories
    print("📁 Creating required directories...")
    paths.ensure_directories()
    
    # Display key paths
    print("\n✨ Path system initialized successfully!")
    print("\nKey project paths:")
    print(f"📂 Project Root: {paths.get_path('project_root')}")
    print(f"�� Model Directory: {paths.get_path('models_pixtral')}")
    print(f"📊 Data Directory: {paths.get_path('data_input')}")
    print(f"📝 Log Directory: {paths.get_path('logs')}")
    
except Exception as e:
    print(f"❌ Error initializing path system: {str(e)}")
    print("Please check your environment configuration and try again.")

### Install Torch for ML 

In [None]:

# Install PyTorch separately with CUDA support
!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118

# Verify installation
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")

## Config Env and Download Model

In [None]:
from pathlib import Path
from src.model.setup import setup_environment

# Set up paths
config_path = Path("config/models/pixtral.yaml")
model_path = Path("models/pixtral-12b")

# Run setup
status = setup_environment()

# Check results
print(f"Setup status: {status['status']}")
print(f"Model path: {status['model_path']}")
print(f"Hardware status: {status['hardware_status']}")

## Load Prompt and Run Model

### Chose Variables

In [None]:
from src.prompt.config import PromptConfigurationManager
from IPython.display import display, Markdown

# Display header
display(Markdown("## 🧾 Invoice Data Extraction Configuration"))

# Initialize and display configuration manager
config_manager = PromptConfigurationManager()
config_manager.display()

# Store the configuration for later use
config = config_manager.get_config()

### Run Model

In [None]:
# Process Images with Selected Configuration
from pathlib import Path
from src.prompt.batch_processor import BatchProcessor
from src.path.repository import PathRepository

# Get paths
paths = PathRepository.get_instance()
model_path = paths.get_model_path("pixtral")
image_dir = Path("data/images")

# Initialize batch processor
processor = BatchProcessor(model_path)

# Process images and log results
log_file = Path("logs/batch_processing.log")
processor.process_and_log(
    image_dir=image_dir,
    prompt_name=config['prompt_name'],
    quantization=config['quantization'],
    log_file=log_file
)

print(f"Processing complete. Results logged to {log_file}")