diff --git a/.gitignore b/.gitignore index 1fc79b8..551a329 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,41 @@ data/molecules/zinc-full/*.pkl dataset/ +# Testing +.pytest_cache/ +.coverage +htmlcov/ +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.tox/ +.nox/ + +# Claude +.claude/* + +# Build artifacts +dist/ +build/ +*.egg-info/ +*.egg + +# Virtual environments +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +.python-version + +# IDE files +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Poetry +poetry.lock + diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..c63b50c --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,23 @@ +# Claude Working Memory + +## Project Overview +Python machine learning project focused on graph neural networks and molecular modeling. + +## Important Commands +- Testing: `poetry run test` or `poetry run tests` +- Linting: `poetry run flake8` +- Type checking: `poetry run mypy` +- Code formatting: `poetry run black .` + +## Recent Changes +- Set up comprehensive testing infrastructure with Poetry +- Added pytest, pytest-cov, and pytest-mock for testing +- Created test directory structure with unit and integration subdirectories +- Configured pytest and coverage settings in pyproject.toml +- Added comprehensive shared fixtures in conftest.py +- Updated .gitignore with testing and Claude-specific entries + +## Notes +- This project originally uses Conda for dependency management (see environment_cpu.yml and environment_gpu.yml) +- DGL (Deep Graph Library) needs to be installed separately via conda as it's not available on PyPI +- Poetry has been set up alongside Conda for managing testing dependencies \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e3abd6c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,149 @@ +[tool.poetry] +name = "graph-transformer" +version = "0.1.0" +description = "Graph Transformer for molecular modeling and node classification" +authors = ["Your Name "] +readme = "README.md" +packages = [{include = "layers"}, {include = "nets"}, {include = "train"}] + +[tool.poetry.dependencies] +python = "^3.7" +torch = ">=1.6.0" +torchvision = ">=0.7.0" +numpy = ">=1.19.0" +matplotlib = ">=3.1.0" +tensorboard = ">=2.1.0" +tensorboardx = ">=1.8" +future = ">=0.18.2" +absl-py = "*" +networkx = ">=2.3" +scikit-learn = ">=0.21.2" +scipy = ">=1.3.0" +h5py = ">=2.9.0" +scikit-image = ">=0.15.0" +requests = ">=2.22.0" +tqdm = ">=4.43.0" +pillow = ">=6.1" +python-dateutil = ">=2.8.0" +# Note: DGL needs to be installed separately via conda + +[tool.poetry.group.dev.dependencies] +pytest = ">=7.0.0" +pytest-cov = ">=4.0.0" +pytest-mock = ">=3.10.0" +black = ">=22.0.0" +flake8 = ">=5.0.0" +mypy = ">=0.990" +isort = ">=5.10.0" +pre-commit = ">=2.20.0" + +[tool.poetry.scripts] +test = "pytest:main" +tests = "pytest:main" + +[tool.pytest.ini_options] +minversion = "7.0" +testpaths = ["tests"] +python_files = ["test_*.py", "*_test.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "-ra", + "--strict-markers", + "--cov=layers", + "--cov=nets", + "--cov=train", + "--cov-branch", + "--cov-report=term-missing:skip-covered", + "--cov-report=html", + "--cov-report=xml", + "--cov-fail-under=80", + "-vv", + "--tb=short", + "--maxfail=1", +] +markers = [ + "unit: Unit tests", + "integration: Integration tests", + "slow: Slow tests", +] +filterwarnings = [ + "ignore::DeprecationWarning", + "ignore::PendingDeprecationWarning", +] + +[tool.coverage.run] +branch = true +source = ["layers", "nets", "train"] +omit = [ + "*/tests/*", + "*/__init__.py", + "*/conftest.py", + "*/setup.py", +] + +[tool.coverage.report] +precision = 2 +show_missing = true +skip_covered = true +skip_empty = true +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "if self.debug:", + "if __name__ == .__main__.:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if False:", + "pass", +] + +[tool.coverage.html] +directory = "htmlcov" + +[tool.coverage.xml] +output = "coverage.xml" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.isort] +profile = "black" +line_length = 100 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true + +[tool.black] +line-length = 100 +target-version = ['py37'] +include = '\.pyi?$' +extend-exclude = ''' +/( + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist +)/''' + +[tool.mypy] +python_version = "3.7" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = false +ignore_missing_imports = true +exclude = [ + "tests/", + "build/", + "dist/", +] \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..cdb4a5d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,219 @@ +""" +Shared pytest fixtures and configuration for the test suite. +""" +import os +import shutil +import tempfile +from pathlib import Path +from typing import Generator, Dict, Any +from unittest.mock import Mock, MagicMock + +import pytest +import torch +import numpy as np + + +@pytest.fixture +def temp_dir() -> Generator[Path, None, None]: + """ + Create a temporary directory for test files. + + Yields: + Path: Path to the temporary directory + """ + temp_path = tempfile.mkdtemp() + yield Path(temp_path) + shutil.rmtree(temp_path) + + +@pytest.fixture +def mock_config() -> Dict[str, Any]: + """ + Provide a mock configuration dictionary for tests. + + Returns: + Dict[str, Any]: Mock configuration with common parameters + """ + return { + 'model': { + 'name': 'test_model', + 'hidden_dim': 128, + 'num_layers': 2, + 'dropout': 0.1, + 'batch_norm': True, + }, + 'training': { + 'batch_size': 32, + 'learning_rate': 0.001, + 'epochs': 10, + 'optimizer': 'adam', + 'scheduler': 'cosine', + }, + 'data': { + 'dataset': 'test_dataset', + 'num_workers': 2, + 'pin_memory': True, + 'split_ratio': [0.8, 0.1, 0.1], + }, + 'device': 'cpu', + 'seed': 42, + } + + +@pytest.fixture +def sample_graph_data() -> Dict[str, torch.Tensor]: + """ + Create sample graph data for testing. + + Returns: + Dict[str, torch.Tensor]: Dictionary containing graph components + """ + num_nodes = 10 + num_edges = 15 + feature_dim = 16 + + return { + 'node_features': torch.randn(num_nodes, feature_dim), + 'edge_index': torch.randint(0, num_nodes, (2, num_edges)), + 'edge_features': torch.randn(num_edges, 8), + 'labels': torch.randint(0, 5, (num_nodes,)), + 'batch': torch.zeros(num_nodes, dtype=torch.long), + } + + +@pytest.fixture +def sample_molecular_data() -> Dict[str, Any]: + """ + Create sample molecular data for testing. + + Returns: + Dict[str, Any]: Dictionary containing molecular graph data + """ + return { + 'smiles': 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O', + 'num_atoms': 15, + 'num_bonds': 16, + 'atom_features': torch.randn(15, 32), + 'bond_features': torch.randn(16, 16), + 'adjacency_matrix': torch.randint(0, 2, (15, 15)), + 'target': torch.tensor([0.5]), + } + + +@pytest.fixture +def mock_model() -> Mock: + """ + Create a mock PyTorch model for testing. + + Returns: + Mock: Mock model object with common methods + """ + model = MagicMock() + model.forward.return_value = torch.randn(32, 10) # batch_size x num_classes + model.parameters.return_value = [torch.randn(10, 10) for _ in range(5)] + model.train.return_value = model + model.eval.return_value = model + model.to.return_value = model + return model + + +@pytest.fixture +def mock_dataloader() -> Mock: + """ + Create a mock DataLoader for testing. + + Returns: + Mock: Mock DataLoader with sample batches + """ + dataloader = MagicMock() + batch_data = { + 'input': torch.randn(32, 128), + 'target': torch.randint(0, 10, (32,)), + } + dataloader.__iter__.return_value = iter([batch_data] * 5) + dataloader.__len__.return_value = 5 + return dataloader + + +@pytest.fixture +def sample_checkpoint(temp_dir: Path) -> Path: + """ + Create a sample model checkpoint file. + + Args: + temp_dir: Temporary directory fixture + + Returns: + Path: Path to the checkpoint file + """ + checkpoint_path = temp_dir / 'model_checkpoint.pt' + checkpoint_data = { + 'epoch': 5, + 'model_state_dict': {'layer1.weight': torch.randn(10, 10)}, + 'optimizer_state_dict': {'param_groups': [{'lr': 0.001}]}, + 'loss': 0.123, + 'metrics': {'accuracy': 0.95, 'f1_score': 0.93}, + } + torch.save(checkpoint_data, checkpoint_path) + return checkpoint_path + + +@pytest.fixture +def device() -> torch.device: + """ + Get the appropriate torch device for testing. + + Returns: + torch.device: CPU device for consistent testing + """ + return torch.device('cpu') + + +@pytest.fixture +def random_seed() -> int: + """ + Set random seeds for reproducible tests. + + Returns: + int: The seed value used + """ + seed = 42 + torch.manual_seed(seed) + np.random.seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + return seed + + +@pytest.fixture(autouse=True) +def cleanup_cuda(): + """ + Automatically clean up CUDA cache after each test. + """ + yield + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +@pytest.fixture +def mock_tensorboard_writer() -> Mock: + """ + Create a mock TensorBoard SummaryWriter. + + Returns: + Mock: Mock SummaryWriter object + """ + writer = MagicMock() + writer.add_scalar = MagicMock() + writer.add_histogram = MagicMock() + writer.add_graph = MagicMock() + writer.close = MagicMock() + return writer + + +# Markers for different test types +def pytest_configure(config): + """Configure pytest with custom markers.""" + config.addinivalue_line("markers", "unit: Unit tests") + config.addinivalue_line("markers", "integration: Integration tests") + config.addinivalue_line("markers", "slow: Slow tests that take significant time") \ No newline at end of file diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_setup_validation.py b/tests/test_setup_validation.py new file mode 100644 index 0000000..e46b548 --- /dev/null +++ b/tests/test_setup_validation.py @@ -0,0 +1,102 @@ +""" +Validation tests to ensure the testing infrastructure is properly configured. +""" +import sys +from pathlib import Path + +import pytest +import torch + + +class TestSetupValidation: + """Tests to validate the testing infrastructure setup.""" + + @pytest.mark.unit + def test_pytest_is_installed(self): + """Test that pytest is properly installed.""" + assert 'pytest' in sys.modules + + @pytest.mark.unit + def test_coverage_is_configured(self): + """Test that coverage is properly configured.""" + try: + import coverage + assert coverage is not None + except ImportError: + pytest.fail("Coverage module not found") + + @pytest.mark.unit + def test_mock_is_available(self): + """Test that pytest-mock is available.""" + try: + from unittest.mock import Mock, patch + assert Mock is not None + assert patch is not None + except ImportError: + pytest.fail("Mock functionality not available") + + @pytest.mark.unit + def test_project_structure_exists(self): + """Test that key project directories exist.""" + project_root = Path(__file__).parent.parent + + assert project_root.exists() + assert (project_root / 'layers').exists() + assert (project_root / 'nets').exists() + assert (project_root / 'train').exists() + assert (project_root / 'tests').exists() + assert (project_root / 'tests' / 'unit').exists() + assert (project_root / 'tests' / 'integration').exists() + + @pytest.mark.unit + def test_conftest_fixtures_available(self, temp_dir, mock_config, device): + """Test that conftest fixtures are properly loaded.""" + assert temp_dir.exists() + assert isinstance(mock_config, dict) + assert 'model' in mock_config + assert 'training' in mock_config + assert isinstance(device, torch.device) + + @pytest.mark.unit + def test_markers_are_registered(self, request): + """Test that custom markers are properly registered.""" + markers = [marker.name for marker in request.node.iter_markers()] + assert 'unit' in markers + + @pytest.mark.integration + def test_torch_is_available(self): + """Test that PyTorch is properly installed.""" + assert torch is not None + tensor = torch.tensor([1, 2, 3]) + assert tensor.shape == (3,) + + @pytest.mark.integration + def test_sample_fixtures_work(self, sample_graph_data, sample_molecular_data): + """Test that data fixtures provide expected structure.""" + # Test graph data + assert 'node_features' in sample_graph_data + assert 'edge_index' in sample_graph_data + assert isinstance(sample_graph_data['node_features'], torch.Tensor) + + # Test molecular data + assert 'smiles' in sample_molecular_data + assert 'atom_features' in sample_molecular_data + assert isinstance(sample_molecular_data['atom_features'], torch.Tensor) + + @pytest.mark.slow + def test_slow_marker_works(self): + """Test that the slow marker is properly configured.""" + import time + start = time.time() + time.sleep(0.1) + elapsed = time.time() - start + assert elapsed >= 0.1 + + def test_random_seed_fixture(self, random_seed): + """Test that random seed fixture provides reproducibility.""" + assert random_seed == 42 + # Test PyTorch randomness + tensor1 = torch.randn(5) + torch.manual_seed(42) + tensor2 = torch.randn(5) + assert torch.allclose(tensor1, tensor2) \ No newline at end of file diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29