In [1]:
# =============================================================
# üß≠ UNIVERSAL ENVIRONMENT BOOTSTRAP ‚Äî Local / Kaggle CPU / Kaggle GPU
# =============================================================
import sys
from pathlib import Path
import torch

# -------------------------------------------------------------
# 1Ô∏è‚É£ Detect project root (auto-resolve for any run location)
# -------------------------------------------------------------
CWD = Path.cwd().resolve()

if (CWD / "src").exists():
    PROJECT_ROOT = CWD
elif (CWD.name == "notebooks") and (CWD.parent / "src").exists():
    PROJECT_ROOT = CWD.parent
else:
    PR = CWD
    for _ in range(3):
        if (PR / "src").exists():
            PROJECT_ROOT = PR
            break
        PR = PR.parent
    else:
        raise RuntimeError("‚ùå Could not locate project root containing 'src/' folder.")

# Ensure src/ is importable
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print(f"‚úÖ Project root detected: {PROJECT_ROOT}")

# -------------------------------------------------------------
# 2Ô∏è‚É£ Environment flags
# -------------------------------------------------------------
IS_KAGGLE = Path("/kaggle").exists()
DEVICE = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if getattr(torch.backends, "mps", None)
    and torch.backends.mps.is_available()
    else "cpu"
)
IS_GPU = DEVICE in ("cuda", "mps")

print(f"üß© Running on: {'Kaggle' if IS_KAGGLE else 'Local'} environment")
print(f"‚öôÔ∏è Device detected: {DEVICE} | GPU available: {IS_GPU}")

# -------------------------------------------------------------
# 3Ô∏è‚É£ Import core project modules
# -------------------------------------------------------------
from src import config
from src.data_loading import load_train_data, load_test_data
from src.model_utils import build_model

print("üì¶ Imports OK ‚Äî config, data_loading, and model_utils are accessible.")

# -------------------------------------------------------------
# 4Ô∏è‚É£ Verify essential paths
# -------------------------------------------------------------
print(f"DATA_DIR: {config.DATA_DIR}")
print(f"TRAIN_IMG_DIR: {config.TRAIN_IMG_DIR.exists()} ‚Üí {config.TRAIN_IMG_DIR}")
print(f"TEST_IMG_DIR:  {config.TEST_IMG_DIR.exists()}  ‚Üí {config.TEST_IMG_DIR}")

print("\n‚úÖ Environment bootstrap complete ‚Äî safe to proceed.")


‚úÖ Project root detected: /Users/olia_/projects/Kaggle/csiro-biomass
üß© Running on: Local environment
‚öôÔ∏è Device detected: mps | GPU available: True
üì¶ Imports OK ‚Äî config, data_loading, and model_utils are accessible.
DATA_DIR: /Users/olia_/projects/Kaggle/csiro-biomass/input_local
TRAIN_IMG_DIR: True ‚Üí /Users/olia_/projects/Kaggle/csiro-biomass/input_local/train
TEST_IMG_DIR:  True  ‚Üí /Users/olia_/projects/Kaggle/csiro-biomass/input_local/test

‚úÖ Environment bootstrap complete ‚Äî safe to proceed.


In [2]:
# =============================================================
# üß© DATA LAYOUT VERIFICATION ‚Äî Safe for Local + Kaggle
# =============================================================
from pathlib import Path
from src import config
from src.data_loading import load_train_data, load_test_data, sample_image_stats

print("üìÇ DATA DIRECTORY OVERVIEW")
print(f"DATA_DIR:        {config.DATA_DIR}")
print(f"TRAIN_IMG_DIR:   {config.TRAIN_IMG_DIR.exists()} ‚Üí {config.TRAIN_IMG_DIR}")
print(f"TEST_IMG_DIR:    {config.TEST_IMG_DIR.exists()}  ‚Üí {config.TEST_IMG_DIR}")
print(f"TRAIN_CSV:       {config.TRAIN_CSV.exists()} ‚Üí {config.TRAIN_CSV}")
print(f"TEST_CSV:        {config.TEST_CSV.exists()}  ‚Üí {config.TEST_CSV}")

# -------------------------------------------------------------
# 1Ô∏è‚É£ Load CSVs safely
# -------------------------------------------------------------
train_df, train_img_col, target_col = load_train_data()
test_df,  test_img_col = load_test_data()

print("\nüìä DATAFRAME SHAPES")
print(f"   train_df: {train_df.shape} | image_col='{train_img_col}' | target_col='{target_col}'")
print(f"   test_df:  {test_df.shape}  | image_col='{test_img_col}'")

print("\nüß≠ TRAIN COLUMNS:", list(train_df.columns))
print("üß≠ TEST  COLUMNS:", list(test_df.columns))

# -------------------------------------------------------------
# 2Ô∏è‚É£ Missing values check
# -------------------------------------------------------------
print("\nüîé Missing values (train):")
display(train_df.isna().sum())
print("\nüîé Missing values (test):")
display(test_df.isna().sum())

# -------------------------------------------------------------
# 3Ô∏è‚É£ Optional: Sample image stats (quick integrity probe)
# -------------------------------------------------------------
train_stats = sample_image_stats(train_df, image_path_col="image_path", max_samples=100)
test_stats  = sample_image_stats(test_df,  image_path_col="image_path", max_samples=50)

print("\nüñºÔ∏è Train image sample stats:", train_stats)
print("üñºÔ∏è Test  image sample stats:",  test_stats)

print("\n‚úÖ Data layout verified successfully.")


üìÇ DATA DIRECTORY OVERVIEW
DATA_DIR:        /Users/olia_/projects/Kaggle/csiro-biomass/input_local
TRAIN_IMG_DIR:   True ‚Üí /Users/olia_/projects/Kaggle/csiro-biomass/input_local/train
TEST_IMG_DIR:    True  ‚Üí /Users/olia_/projects/Kaggle/csiro-biomass/input_local/test
TRAIN_CSV:       True ‚Üí /Users/olia_/projects/Kaggle/csiro-biomass/input_local/train.csv
TEST_CSV:        True  ‚Üí /Users/olia_/projects/Kaggle/csiro-biomass/input_local/test.csv

üìä DATAFRAME SHAPES
   train_df: (1785, 9) | image_col='image_path' | target_col='target'
   test_df:  (5, 3)  | image_col='image_path'

üß≠ TRAIN COLUMNS: ['sample_id', 'image_path', 'Sampling_Date', 'State', 'Species', 'Pre_GSHH_NDVI', 'Height_Ave_cm', 'target_name', 'target']
üß≠ TEST  COLUMNS: ['sample_id', 'image_path', 'target_name']

üîé Missing values (train):


sample_id        0
image_path       0
Sampling_Date    0
State            0
Species          0
Pre_GSHH_NDVI    0
Height_Ave_cm    0
target_name      0
target           0
dtype: int64


üîé Missing values (test):


sample_id      0
image_path     0
target_name    0
dtype: int64


üñºÔ∏è Train image sample stats: {'n_examined': 100, 'missing': 0, 'bad': 0, 'sizes_count': 100, 'width_mean': 2000.0, 'height_mean': 1000.0, 'width_min': 2000, 'height_min': 1000, 'width_max': 2000, 'height_max': 1000}
üñºÔ∏è Test  image sample stats: {'n_examined': 5, 'missing': 0, 'bad': 0, 'sizes_count': 5, 'width_mean': 2000.0, 'height_mean': 1000.0, 'width_min': 2000, 'height_min': 1000, 'width_max': 2000, 'height_max': 1000}

‚úÖ Data layout verified successfully.
