# 00 — Environment Check
## HumanForYou — Employee Attrition Prediction
---
Quick validation that all dependencies are installed and data files are accessible.

Run this notebook **first** after setting up the environment.

In [1]:
import sys
import os

print(f"Python: {sys.version}")
print(f"Executable: {sys.executable}")
print()

# Check all required packages
packages = [
    ("numpy", "numpy"),
    ("pandas", "pandas"),
    ("scipy", "scipy"),
    ("matplotlib", "matplotlib"),
    ("seaborn", "seaborn"),
    ("plotly", "plotly"),
    ("sklearn", "scikit-learn"),
    ("xgboost", "xgboost"),
    ("imblearn", "imbalanced-learn"),
    ("statsmodels", "statsmodels"),
    ("shap", "shap"),
    ("joblib", "joblib"),
]

all_ok = True
for import_name, pkg_name in packages:
    try:
        mod = __import__(import_name)
        version = getattr(mod, '__version__', 'N/A')
        print(f"  [OK] {pkg_name:20s} v{version}")
    except ImportError:
        print(f"  [FAIL] {pkg_name:20s} — pip install {pkg_name}")
        all_ok = False

print()
if all_ok:
    print("All packages installed successfully.")
else:
    print("Some packages are missing. Run: pip install -r requirements.txt")

Python: 3.13.5 | packaged by Anaconda, Inc. | (main, Jun 12 2025, 16:37:03) [MSC v.1929 64 bit (AMD64)]
Executable: C:\Users\yanis\Documents\CESI\A5\AI Project\HumanForYou\.venv\Scripts\python.exe

  [OK] numpy                v2.3.5
  [OK] pandas               v3.0.0
  [OK] scipy                v1.15.3
  [OK] matplotlib           v3.10.8
  [OK] seaborn              v0.13.2
  [OK] plotly               v6.5.2
  [OK] scikit-learn         v1.8.0
  [OK] xgboost              v3.2.0
  [OK] imbalanced-learn     v0.14.1
  [OK] statsmodels          v0.14.6
  [OK] shap                 v0.50.0
  [OK] joblib               v1.5.3

All packages installed successfully.


In [2]:
# Check data files
DATA_DIR = "../data/raw"
expected_files = [
    "general_data.csv",
    "employee_survey_data.csv",
    "manager_survey_data.csv",
    "in_time.csv",
    "out_time.csv",
]

print("DATA FILES CHECK")
print("=" * 50)
for f in expected_files:
    path = os.path.join(DATA_DIR, f)
    if os.path.isfile(path):
        size_mb = os.path.getsize(path) / (1024 * 1024)
        print(f"  [OK] {f:35s} ({size_mb:.1f} MB)")
    else:
        print(f"  [FAIL] {f:35s} — file not found")

# Check outputs directory
OUTPUT_DIR = "../outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"\nOutputs directory: {os.path.abspath(OUTPUT_DIR)} — ready")

print("\nEnvironment is ready. Proceed to 01_Data_Validation_Pipeline.ipynb")

DATA FILES CHECK
  [OK] general_data.csv                    (0.5 MB)
  [OK] employee_survey_data.csv            (0.0 MB)
  [OK] manager_survey_data.csv             (0.0 MB)
  [OK] in_time.csv                         (22.2 MB)
  [OK] out_time.csv                        (22.2 MB)

Outputs directory: C:\Users\yanis\Documents\CESI\A5\AI Project\HumanForYou\outputs — ready

Environment is ready. Proceed to 01_Data_Validation_Pipeline.ipynb
