# Setup and Installation

This notebook prepares the project environment, creates the expected folder structure, and validates that key dependencies are installed. Run this notebook first to ensure the rest of the workflow executes smoothly.

In [None]:
import os
import sys
from pathlib import Path

PROJECT_DIRS = [
    'data/raw',
    'data/processed',
    'models',
    'figures',
    'reports',
    'notebooks',
]

for directory in PROJECT_DIRS:
    path = Path(directory)
    path.mkdir(parents=True, exist_ok=True)
    print(f'Ensured directory exists: {path}')

print('Python version:', sys.version)

## Verify core dependencies

Install packages from `requirements.txt` if any imports fail. This quick check confirms the modules used throughout the workflow are available.

In [None]:
required = [
    'pandas', 'numpy', 'sklearn', 'matplotlib', 'seaborn', 'xgboost', 'shap', 'fuzzywuzzy'
]

missing = []
for package in required:
    try:
        __import__(package)
        print(f'✅ {package} available')
    except ImportError:
        missing.append(package)
        print(f'⚠️ {package} missing')

if missing:
    print('
Install missing packages with:')
    print('  pip install -r requirements.txt')