# Environment Setup and Validation
## Overview


## Import Libraries

In [None]:
# Import required libraries
import os
import sys
import platform
import torch
import transformers
from pathlib import Path
import yaml
import logging
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

## Configure Logging

In [None]:
# Configure basic logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('setup_notebook')

## Verify System Setings

In [None]:

# Display basic system information
print(f"üîç Environment Setup and Verification")
print(f"üêç Python version: {platform.python_version()}")
print(f"üìä PyTorch version: {torch.__version__}")
print(f"ü§ñ Transformers version: {transformers.__version__}")
print(f"üìÖ Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## Set Up Path Configurations

In [None]:

# Set up project paths
# This cell finds the project root and initializes path configuration

# First, try to find the project root directory
notebook_path = Path().resolve()
project_root = None

# Look for project root markers (traversing upward from the notebook)
for parent in [notebook_path] + list(notebook_path.parents):
    if any((parent / marker).exists() for marker in ['.git', 'setup.py', 'requirements.txt']):
        project_root = parent
        break

if project_root is None:
    # If markers not found, assume we're in a subdirectory of the project
    # and the parent directory is the project root
    project_root = notebook_path.parent
    print("‚ö†Ô∏è Could not definitively locate project root, using parent directory")
else:
    print(f"‚úÖ Found project root: {project_root}")

# Add project root to Python path if not already there
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
    print(f"üìå Added {project_root} to Python path")

# Set environment variable for project root
os.environ['PROJECT_ROOT'] = str(project_root)
print(f"üîÑ Set PROJECT_ROOT environment variable to {project_root}")

# Verify critical directories exist, create if needed
critical_dirs = [
    "configs/environments",
    "src/config",
    "data/images",
    "models/cache",
    "results/raw",
    "logs"
]

for dir_path in critical_dirs:
    full_path = project_root / dir_path
    if not full_path.exists():
        print(f"üìÅ Creating directory: {dir_path}")
        full_path.mkdir(parents=True, exist_ok=True)
    else:
        print(f"‚úÖ Directory exists: {dir_path}")
# Check for GPU availability
if torch.cuda.is_available():
    device_name = torch.cuda.get_device_name(0)
    memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"üñ•Ô∏è GPU: {device_name} ({memory:.2f} GB)")
else:
    print("‚ö†Ô∏è No GPU detected - running in CPU mode")

## Initialize Environment

In [None]:
# Import and initialize environment configuration
# This cell loads appropriate configuration for local or RunPod environment

try:
    # Import configuration modules
    from src.config.environment import get_environment_config
    from src.config.paths import get_path_config
    
    # Get environment configuration
    env_config = get_environment_config()
    print(f"üìå Detected environment: {env_config.environment}")
    
    # Get path configuration for this experiment
    experiment_name = f"setup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    paths = get_path_config(experiment_name=experiment_name)
    
    # Display configurations
    print("\nüîç Environment Configuration Summary:")
    env_config.print_summary()
    
    print("\nüìÇ Path Configuration:")
    print(paths)
    
    # Verify paths are valid
    if paths.verify(verbose=True):
        print("‚úÖ All critical paths verified")
    else:
        print("‚ö†Ô∏è Some paths could not be verified")
    
except ImportError as e:
    print(f"‚ùå Error importing configuration modules: {e}")
    print("üìù This suggests there might be an issue with your project structure or Python path")
    print("üí° Make sure the src/config directory exists and contains the necessary files")
    
except Exception as e:
    print(f"‚ùå Error initializing configuration: {e}")
    print("üí° Check your YAML configuration files for syntax errors")

## Final Verification

In [None]:

# Final verification and setup confirmation
# This cell confirms the environment is ready for experiments

print("üîç Performing final environment checks...")

# Check for critical data files
gt_path = paths.ground_truth_path
if os.path.exists(gt_path):
    # Load and show basic info about ground truth data
    try:
        ground_truth_df = pd.read_csv(gt_path)
        print(f"‚úÖ Ground truth data loaded: {len(ground_truth_df)} records")
        print(f"   Columns: {', '.join(ground_truth_df.columns[:5])}{'...' if len(ground_truth_df.columns) > 5 else ''}")
    except Exception as e:
        print(f"‚ö†Ô∏è Ground truth file exists but couldn't be loaded: {e}")
else:
    print(f"‚ö†Ô∏è Ground truth file not found at: {gt_path}")
    print("   You'll need to add this before running experiments.")

# Check for images
image_paths = list(paths.get_image_paths())
if image_paths:
    print(f"‚úÖ Found {len(image_paths)} invoice images")
else:
    print("‚ö†Ô∏è No invoice images found in data directory")
    print("   You'll need to add images before running experiments.")