# GIS Image Analysis - Exploration Notebook

This notebook provides an interactive environment for exploring GIS TIFF images and testing our machine learning pipeline for land cover classification.

## Overview
- Load and visualize TIFF images
- Test preprocessing functions
- Explore feature extraction methods
- Train and evaluate classification models
- Generate visualizations and reports

## 1. Install Required Packages

First, let's install all the necessary packages for our GIS analysis pipeline.

In [None]:
# Install required packages
import sys
import subprocess

def install_packages():
    """Install required packages from requirements.txt"""
    packages = [
        'numpy>=1.24.0',
        'matplotlib>=3.7.0',
        'rasterio>=1.3.0',
        'opencv-python>=4.8.0',
        'scikit-learn>=1.3.0',
        'scikit-image>=0.21.0',
        'seaborn>=0.12.0',
        'torch>=2.0.0',
        'torchvision>=0.15.0'
    ]
    
    for package in packages:
        try:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])
            print(f"✓ Installed {package}")
        except subprocess.CalledProcessError:
            print(f"✗ Failed to install {package}")

# Uncomment the line below to install packages
# install_packages()

## 2. Import Libraries and Configure Environment

Import all necessary libraries and set up the development environment.

In [None]:
# Standard library imports
import os
import sys
import logging
from pathlib import Path

# Add src to path
sys.path.append('../src')

# Scientific computing
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# GIS and image processing
try:
    import rasterio
    import cv2
    from sklearn.cluster import KMeans
    from sklearn.preprocessing import StandardScaler
    print("✓ All required libraries imported successfully")
except ImportError as e:
    print(f"✗ Import error: {e}")
    print("Please install missing packages using the cell above")

# Configure matplotlib
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("Environment configured successfully!")

## 3. Set Up Project Structure and Load Modules

Load our custom modules and verify the project structure.

In [None]:
# Load our custom modules
try:
    from preprocessing import ImagePreprocessor
    from feature_extraction import FeatureExtractor  
    from models import ModelTrainer
    from evaluation import ModelEvaluator
    from visualization import Visualizer
    from utils import setup_logging, get_default_config, check_dependencies
    
    print("✓ All custom modules loaded successfully")
except ImportError as e:
    print(f"✗ Error loading custom modules: {e}")
    print("Please ensure you're running from the notebooks directory")

# Check project structure
project_root = Path('../')
required_dirs = ['src', 'data', 'models', 'outputs', 'notebooks']

print("\nProject Structure:")
print("-" * 20)
for dir_name in required_dirs:
    dir_path = project_root / dir_name
    status = "✓" if dir_path.exists() else "✗"
    print(f"{dir_name:12} {status}")

# Check dependencies
print("\nDependency Status:")
deps = check_dependencies()
for dep, available in deps.items():
    status = "✓" if available else "✗"
    print(f"{dep:12} {status}")

## 4. Initialize Version Control and Configuration

Set up Git repository and configuration files for the project.

In [None]:
# Initialize Git repository and create .gitignore
import subprocess
import os

def initialize_git():
    """Initialize Git repository and create .gitignore"""
    try:
        # Check if already a git repo
        result = subprocess.run(['git', 'status'], capture_output=True, cwd='../')
        if result.returncode == 0:
            print("✓ Git repository already initialized")
        else:
            # Initialize git
            subprocess.run(['git', 'init'], cwd='../', check=True)
            print("✓ Git repository initialized")
        
        # Create .gitignore
        gitignore_content = '''
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
ENV/

# Jupyter Notebook
.ipynb_checkpoints

# IDE
.vscode/
.idea/

# Data files
data/raw/*.tif
data/raw/*.tiff
*.h5
*.hdf5

# Model files
models/*.pt
models/*.pth
models/*.h5

# Output files
outputs/
*.log

# OS
.DS_Store
Thumbs.db
'''
        
        gitignore_path = Path('../.gitignore')
        if not gitignore_path.exists():
            gitignore_path.write_text(gitignore_content.strip())
            print("✓ .gitignore created")
        else:
            print("✓ .gitignore already exists")
            
    except subprocess.CalledProcessError as e:
        print(f"✗ Git initialization failed: {e}")
    except Exception as e:
        print(f"✗ Error: {e}")

# Load default configuration
config = get_default_config()
print("✓ Default configuration loaded")
print(f"Configuration: {config}")

# Uncomment to initialize git
# initialize_git()

## 5. Configure IDE Settings and Extensions

Set up VS Code workspace settings and recommended extensions for GIS development.

In [None]:
# Create VS Code workspace settings
import json

def setup_vscode_config():
    """Create VS Code settings and extensions configuration"""
    
    # VS Code settings
    vscode_settings = {
        "python.analysis.typeCheckingMode": "basic",
        "python.linting.enabled": True,
        "python.linting.pylintEnabled": True,
        "python.formatting.provider": "black",
        "python.analysis.autoImportCompletions": True,
        "files.exclude": {
            "**/__pycache__": True,
            "**/*.pyc": True,
            "**/.git": True,
            "**/node_modules": True
        },
        "jupyter.askForKernelRestart": False,
        "jupyter.interactiveWindow.textEditor.executeSelection": True
    }
    
    # Recommended extensions
    extensions = {
        "recommendations": [
            "ms-python.python",
            "ms-python.pylint", 
            "ms-toolsai.jupyter",
            "ms-vscode.cpptools",
            "esbenp.prettier-vscode",
            "ms-python.black-formatter",
            "ms-python.isort"
        ]
    }
    
    # Create .vscode directory
    vscode_dir = Path('../.vscode')
    vscode_dir.mkdir(exist_ok=True)
    
    # Save settings
    settings_file = vscode_dir / 'settings.json'
    with open(settings_file, 'w') as f:
        json.dump(vscode_settings, f, indent=2)
    
    # Save extensions
    extensions_file = vscode_dir / 'extensions.json'
    with open(extensions_file, 'w') as f:
        json.dump(extensions, f, indent=2)
    
    print("✓ VS Code configuration files created")
    print(f"  - {settings_file}")
    print(f"  - {extensions_file}")

# Uncomment to create VS Code config
# setup_vscode_config()

print("IDE configuration ready!")

## 6. Test Environment Setup

Run verification tests to ensure all tools and dependencies are working correctly.

In [None]:
# Comprehensive environment testing
def test_environment():
    """Run comprehensive tests to verify environment setup"""
    
    print("🧪 Running Environment Tests")
    print("=" * 50)
    
    tests_passed = 0
    total_tests = 0
    
    # Test 1: Import core libraries
    total_tests += 1
    try:
        import numpy as np
        import matplotlib.pyplot as plt
        assert np.__version__ >= '1.20.0'
        print("✓ Test 1: Core scientific libraries")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 1: Core libraries failed - {e}")
    
    # Test 2: Image processing libraries  
    total_tests += 1
    try:
        import cv2
        import skimage
        print("✓ Test 2: Image processing libraries")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 2: Image processing failed - {e}")
    
    # Test 3: GIS libraries
    total_tests += 1
    try:
        import rasterio
        print("✓ Test 3: GIS libraries")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 3: GIS libraries failed - {e}")
    
    # Test 4: Machine learning libraries
    total_tests += 1
    try:
        from sklearn.cluster import KMeans
        print("✓ Test 4: Machine learning libraries")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 4: ML libraries failed - {e}")
    
    # Test 5: Deep learning libraries
    total_tests += 1
    try:
        import torch
        print(f"✓ Test 5: PyTorch (version {torch.__version__})")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 5: PyTorch failed - {e}")
    
    # Test 6: Custom modules
    total_tests += 1
    try:
        from preprocessing import ImagePreprocessor
        from models import ModelTrainer
        preprocessor = ImagePreprocessor()
        print("✓ Test 6: Custom modules")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 6: Custom modules failed - {e}")
    
    # Test 7: File system and directories
    total_tests += 1
    try:
        required_dirs = ['../src', '../data', '../models', '../outputs']
        for dir_path in required_dirs:
            assert Path(dir_path).exists(), f"Missing directory: {dir_path}"
        print("✓ Test 7: Project structure")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 7: Project structure failed - {e}")
    
    # Test 8: Sample data creation
    total_tests += 1
    try:
        # Create a small test image
        test_image = np.random.rand(64, 64, 3)
        assert test_image.shape == (64, 64, 3)
        print("✓ Test 8: Data generation")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 8: Data generation failed - {e}")
    
    # Summary
    print("\n" + "=" * 50)
    print(f"Tests Passed: {tests_passed}/{total_tests}")
    
    if tests_passed == total_tests:
        print("🎉 All tests passed! Environment is ready.")
        return True
    else:
        print("⚠️  Some tests failed. Check the errors above.")
        return False

# Run the tests
success = test_environment()

if success:
    print("\n✅ Workspace setup completed successfully!")
    print("You can now proceed with GIS image analysis.")
else:
    print("\n❌ Workspace setup needs attention.")
    print("Please resolve the failed tests before proceeding.")