# Secure Jupyter Notebook Best Practices

This notebook demonstrates security best practices for Jupyter notebooks, showing how to avoid common vulnerabilities.

## 1. Secure Credential Management

✅ **GOOD**: Use environment variables for secrets

In [None]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve credentials securely
api_key = os.getenv('API_KEY')
database_url = os.getenv('DATABASE_URL')

if not api_key:
    raise ValueError("API_KEY not found in environment variables")

## 2. Safe Data Loading with Validation

✅ **GOOD**: Validate data types and schemas

In [None]:
import pandas as pd

# Specify expected data types
dtype_spec = {
    'id': 'int64',
    'name': 'string',
    'age': 'int64',
    'score': 'float64'
}

# Load data with type validation
df = pd.read_csv('data.csv', dtype=dtype_spec)

# Additional validation
assert df['age'].min() >= 0, "Age cannot be negative"
assert df['age'].max() <= 120, "Age seems unrealistic"

print(f"Loaded {len(df)} rows with validated schema")

## 3. Secure Model Loading

✅ **GOOD**: Verify model integrity before loading

In [None]:
import hashlib
import torch

# Known good checksum (from trusted source)
EXPECTED_CHECKSUM = "abc123..."  # Replace with actual checksum

def verify_model_integrity(model_path: str, expected_checksum: str) -> bool:
    """Verify model file integrity using SHA-256 checksum."""
    with open(model_path, 'rb') as f:
        file_hash = hashlib.sha256(f.read()).hexdigest()
    return file_hash == expected_checksum

# Verify before loading
model_path = 'model.pth'
if verify_model_integrity(model_path, EXPECTED_CHECKSUM):
    model = torch.load(model_path)
    print("Model loaded successfully")
else:
    raise ValueError("Model integrity check failed - possible tampering")

## 4. Safe Command Execution

✅ **GOOD**: Use subprocess with argument lists (shell=False)

In [None]:
import subprocess
from pathlib import Path

def safe_file_operation(filename: str) -> str:
    """Safely read a file using validated paths."""
    # Validate filename (no path traversal)
    safe_filename = Path(filename).name
    safe_path = Path('/safe/directory') / safe_filename
    
    # Use subprocess with argument list (not shell)
    result = subprocess.run(
        ['cat', str(safe_path)],
        shell=False,
        capture_output=True,
        text=True,
        check=True
    )
    
    return result.stdout

# Usage
# content = safe_file_operation('data.txt')

## 5. Safe HTML Display

✅ **GOOD**: Escape user input before HTML display

In [None]:
from IPython.display import HTML, Text
import html

def safe_html_display(user_input: str):
    """Safely display user input in HTML by escaping special characters."""
    escaped_input = html.escape(user_input)
    return HTML(f"<p>{escaped_input}</p>")

# For untrusted content, prefer Text display
def display_untrusted(content: str):
    """Display untrusted content safely without HTML rendering."""
    return Text(content)

# Usage
# user_input = "<script>alert('xss')</script>"
# safe_html_display(user_input)  # Escapes HTML
# display_untrusted(user_input)   # No HTML rendering

## 6. PII Protection

✅ **GOOD**: Use placeholder values and never commit real PII

In [None]:
# Use placeholders for examples
EXAMPLE_EMAIL = "user@example.com"  # Placeholder only
EXAMPLE_SSN = "***-**-****"
EXAMPLE_PHONE = "555-555-5555"

# For real data, load from environment
user_email = os.getenv('USER_EMAIL', 'user@example.com')

# Never print PII in outputs
print(f"Processing data for user: {user_email[:3]}***@***")

## 7. Secure Serialization

✅ **GOOD**: Use JSON instead of pickle when possible

In [None]:
import json

# Safe serialization with JSON
data = {
    'model_params': {'lr': 0.001, 'epochs': 100},
    'metrics': {'accuracy': 0.95, 'loss': 0.05}
}

# Save safely
with open('config.json', 'w') as f:
    json.dump(data, f, indent=2)

# Load safely
with open('config.json', 'r') as f:
    loaded_data = json.load(f)

print("Data loaded securely using JSON")

## 8. Input Validation

✅ **GOOD**: Always validate user input

In [None]:
import re

def validate_email(email_addr: str) -> bool:
    """Validate email format."""
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, email_addr))

def validate_age(age: int) -> bool:
    """Validate age range."""
    return 0 <= age <= 120

def safe_eval(expression: str):
    """Safely evaluate literal expressions."""
    import ast
    try:
        return ast.literal_eval(expression)
    except (ValueError, SyntaxError):
        raise ValueError("Invalid expression - only literals allowed")

# Usage
# email = input("Email: ")
# if validate_email(email):
#     print("Valid email")

# Safe evaluation of user input
# result = safe_eval("[1, 2, 3]")  # OK
# result = safe_eval("__import__('os').system('ls')")  # Raises ValueError

## 9. Output Sanitization

✅ **GOOD**: Clear sensitive outputs before sharing notebooks

In [None]:
# Before sharing notebooks:
# 1. Clear all outputs: Cell → All Output → Clear
# 2. Or use command line:
#    jupyter nbconvert --clear-output --inplace notebook.ipynb

# For programmatic output clearing:
from IPython.display import clear_output

# Process sensitive data
sensitive_result = "secret data"

# Clear output before committing
clear_output(wait=False)
print("Processing complete (sensitive data cleared)")

## 10. Error Handling

✅ **GOOD**: Handle errors gracefully without exposing sensitive information

In [None]:
def safe_operation(data):
    """Perform operation with secure error handling."""
    try:
        # Process data
        result = process_data(data)
        return result
    except ValueError as e:
        # Log error without exposing sensitive details
        print(f"Validation error: {type(e).__name__}")
        # Don't expose full traceback or sensitive data
        return None
    except Exception as e:
        # Generic error handling
        print(f"Operation failed: {type(e).__name__}")
        return None

# Don't do this:
# try:
#     result = api.call(api_key)
# except Exception as e:
#     print(f"Error: {e}")  # Might expose API key in error message!

## Security Checklist Before Sharing Notebooks

Before committing or sharing your notebook:

- [ ] Clear all cell outputs
- [ ] Remove hardcoded credentials
- [ ] Remove PII (emails, SSNs, phone numbers, etc.)
- [ ] Replace real data with example/placeholder values
- [ ] Remove absolute file paths
- [ ] Check for API keys in code or outputs
- [ ] Verify no shell commands with sensitive data
- [ ] Run PyGuard security scan
- [ ] Review .gitignore for .env files
- [ ] Test notebook runs from clean state

## Run PyGuard Security Scan

```bash
# Scan this notebook for security issues
python -c "
from pyguard.lib.notebook_security import scan_notebook
issues = scan_notebook('secure_notebook_example.ipynb')
print(f'Found {len(issues)} security issues')
"
```

Expected result: **0 issues** (this notebook follows all best practices!)