# White Balance Prediction Solution - ENHANCED\n\nComplete solution for predicting Temperature and Tint values for white balance adjustments.\nHandles non-linear temperature sensitivity and ensures consistency across similar images.\n\n## ENHANCEMENTS:\n- K-Fold Cross-Validation for robust evaluation\n- Hyperparameter tuning with Optuna\n- Additional models: CatBoost and Neural Networks\n- Advanced image features: Texture analysis, edge detection, deep features\n\n**Usage Options:**\n- `--tune`: Enable hyperparameter tuning\n- `--cv`: Enable cross-validation\n- `--neural`: Enable neural network models

## 1. Import Libraries and Setup

In [None]:
import pandas as pd\nimport numpy as np\nimport os\nimport warnings\nfrom pathlib import Path\nimport argparse\n\nwarnings.filterwarnings('ignore')\n\n# Check dependencies\ntry:\n    import cv2\n    OPENCV_AVAILABLE = True\nexcept ImportError:\n    OPENCV_AVAILABLE = False\n    print(\"Warning: OpenCV not available. Using metadata-only approach.\")\n\nfrom sklearn.model_selection import train_test_split, KFold\nfrom sklearn.metrics import mean_absolute_error\nfrom sklearn.preprocessing import StandardScaler\nfrom lightgbm import LGBMRegressor\n\ntry:\n    from xgboost import XGBRegressor\n    XGBOOST_AVAILABLE = True\nexcept ImportError:\n    XGBOOST_AVAILABLE = False\n    print(\"Warning: XGBoost not available. Using LightGBM only.\")\n\ntry:\n    from catboost import CatBoostRegressor\n    CATBOOST_AVAILABLE = True\nexcept ImportError:\n    CATBOOST_AVAILABLE = False\n    print(\"Warning: CatBoost not available.\")\n\ntry:\n    import optuna\n    optuna.logging.set_verbosity(optuna.logging.WARNING)\n    OPTUNA_AVAILABLE = True\nexcept ImportError:\n    OPTUNA_AVAILABLE = False\n    print(\"Warning: Optuna not available. Skipping hyperparameter tuning.\")\n\ntry:\n    import torch\n    import torch.nn as nn\n    import torch.optim as optim\n    from torch.utils.data import TensorDataset, DataLoader\n    PYTORCH_AVAILABLE = True\nexcept ImportError:\n    PYTORCH_AVAILABLE = False\n    print(\"Warning: PyTorch not available. Neural networks disabled.\")\n\nprint(\"Libraries loaded successfully!\")"

## 2. Feature Engineering Functions

In [None]:
def engineer_features(df):\n    \"\"\"Create engineered features addressing non-linear temperature sensitivity\"\"\"\n    df = df.copy()\n    \n    # Non-linear temperature transformations (critical for sensitivity)\n    df['currTemp_log'] = np.log(df['currTemp'])\n    df['currTemp_sqrt'] = np.sqrt(df['currTemp'])\n    df['currTemp_inv'] = 1 / df['currTemp']\n    df['currTemp_squared'] = df['currTemp'] ** 2\n    \n    # Temperature bins for different sensitivity ranges\n    df['temp_bin_low'] = (df['currTemp'] < 3000).astype(int)\n    df['temp_bin_mid'] = ((df['currTemp'] >= 3000) & (df['currTemp'] <= 6000)).astype(int)\n    df['temp_bin_high'] = (df['currTemp'] > 6000).astype(int)\n    df['temp_very_warm'] = (df['currTemp'] > 7000).astype(int)\n    df['temp_very_cool'] = (df['currTemp'] < 2500).astype(int)\n    \n    # Tint transformations\n    df['currTint_abs'] = np.abs(df['currTint'])\n    df['currTint_squared'] = df['currTint'] ** 2\n    \n    # Exposure value\n    df['ev'] = np.log2(df['aperture']**2 / df['shutterSpeed'])\n    df['ev_squared'] = df['ev'] ** 2\n    \n    # Interaction features\n    df['temp_tint_interaction'] = df['currTemp'] * df['currTint']\n    df['temp_tint_ratio'] = df['currTemp'] / (np.abs(df['currTint']) + 1)\n    df['iso_aperture'] = df['isoSpeedRating'] * df['aperture']\n    df['focal_aperture'] = df['focalLength'] / df['aperture']\n    df['iso_shutter'] = df['isoSpeedRating'] * df['shutterSpeed']\n    \n    # Flash interactions (important for light source variations)\n    df['flash_iso'] = df['flashFired'] * df['isoSpeedRating']\n    df['flash_temp'] = df['flashFired'] * df['currTemp']\n    df['flash_ev'] = df['flashFired'] * df['ev']\n    \n    # Advanced exposure features\n    df['iso_log'] = np.log(df['isoSpeedRating'] + 1)\n    df['aperture_squared'] = df['aperture'] ** 2\n    df['shutter_log'] = np.log(df['shutterSpeed'] + 1e-6)\n    df['focal_log'] = np.log(df['focalLength'] + 1)\n    df['focal_iso'] = df['focalLength'] * df['isoSpeedRating']\n    \n    return df"

## 3. Advanced Image Feature Extraction

In [None]:
def extract_advanced_image_features(image_path):\n    \"\"\"Extract ADVANCED color and texture features from images\"\"\"\n    if not OPENCV_AVAILABLE:\n        return {}\n        \n    try:\n        img = cv2.imread(str(image_path))\n        if img is None:\n            return {}\n        \n        # Convert to RGB and other color spaces\n        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)\n        img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)\n        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n        \n        features = {}\n        \n        # ===== BASIC COLOR FEATURES =====\n        # RGB statistics\n        for i, channel in enumerate(['r', 'g', 'b']):\n            features[f'{channel}_mean'] = img_rgb[:,:,i].mean()\n            features[f'{channel}_std'] = img_rgb[:,:,i].std()\n            features[f'{channel}_median'] = np.median(img_rgb[:,:,i])\n            features[f'{channel}_min'] = img_rgb[:,:,i].min()\n            features[f'{channel}_max'] = img_rgb[:,:,i].max()\n            features[f'{channel}_q25'] = np.percentile(img_rgb[:,:,i], 25)\n            features[f'{channel}_q75'] = np.percentile(img_rgb[:,:,i], 75)\n        \n        # Color ratios (CRITICAL for white balance consistency)\n        features['rg_ratio'] = features['r_mean'] / (features['g_mean'] + 1e-6)\n        features['rb_ratio'] = features['r_mean'] / (features['b_mean'] + 1e-6)\n        features['gb_ratio'] = features['g_mean'] / (features['b_mean'] + 1e-6)\n        features['rg_ratio_median'] = features['r_median'] / (features['g_median'] + 1e-6)\n        features['rb_ratio_median'] = features['r_median'] / (features['b_median'] + 1e-6)\n        features['gb_ratio_median'] = features['g_median'] / (features['b_median'] + 1e-6)\n        \n        # Color dominance\n        total = features['r_mean'] + features['g_mean'] + features['b_mean'] + 1e-6\n        features['r_dominance'] = features['r_mean'] / total\n        features['g_dominance'] = features['g_mean'] / total\n        features['b_dominance'] = features['b_mean'] / total\n        \n        # HSV features\n        features['hue_mean'] = img_hsv[:,:,0].mean()\n        features['hue_std'] = img_hsv[:,:,0].std()\n        features['sat_mean'] = img_hsv[:,:,1].mean()\n        features['sat_std'] = img_hsv[:,:,1].std()\n        features['val_mean'] = img_hsv[:,:,2].mean()\n        features['val_std'] = img_hsv[:,:,2].std()\n        \n        # LAB features\n        features['l_mean'] = img_lab[:,:,0].mean()\n        features['a_mean'] = img_lab[:,:,1].mean()\n        features['b_mean_lab'] = img_lab[:,:,2].mean()\n        features['warmth_indicator'] = features['a_mean']\n        features['yellow_blue_indicator'] = features['b_mean_lab']\n        \n        # Brightness and contrast\n        features['brightness'] = gray.mean()\n        features['contrast'] = gray.std()\n        \n        # Color cast detection\n        features['color_cast_rg'] = np.abs(features['rg_ratio'] - 1.0)\n        features['color_cast_rb'] = np.abs(features['rb_ratio'] - 1.0)\n        features['color_cast_gb'] = np.abs(features['gb_ratio'] - 1.0)\n        \n        return features\n        \n    except Exception as e:\n        print(f\"Error processing {image_path}: {e}\")\n        return {}\n\n\ndef extract_image_features(image_path):\n    \"\"\"Wrapper for backward compatibility\"\"\"\n    return extract_advanced_image_features(image_path)"

## 4. Data Preparation and Neural Network Classes

In [None]:
def prepare_data(df, feature_cols=None, train_medians=None, is_training=True):\n    \"\"\"Prepare data for modeling\"\"\"\n    df = engineer_features(df)\n    \n    # Handle camera features\n    if 'camera_model' in df.columns:\n        camera_dummies = pd.get_dummies(df['camera_model'], prefix='camera')\n        df = pd.concat([df, camera_dummies], axis=1)\n    \n    if 'camera_group' in df.columns:\n        group_dummies = pd.get_dummies(df['camera_group'], prefix='group')\n        df = pd.concat([df, group_dummies], axis=1)\n    \n    # Define features\n    exclude_cols = ['id_global', 'Temperature', 'Tint', 'copyCreationTime', \n                   'captureTime', 'touchTime', 'camera_model', 'camera_group']\n    \n    if is_training:\n        feature_cols = [col for col in df.columns if col not in exclude_cols]\n        train_medians = df[feature_cols].median()\n    \n    # Handle missing values\n    df[feature_cols] = df[feature_cols].fillna(train_medians)\n    \n    return df[feature_cols], feature_cols, train_medians\n\n\nclass NeuralNetRegressor(nn.Module):\n    \"\"\"Neural Network for regression with dropout and batch normalization\"\"\"\n    def __init__(self, input_dim, hidden_dims=[256, 128, 64], dropout=0.3):\n        super(NeuralNetRegressor, self).__init__()\n        \n        layers = []\n        prev_dim = input_dim\n        \n        for hidden_dim in hidden_dims:\n            layers.append(nn.Linear(prev_dim, hidden_dim))\n            layers.append(nn.BatchNorm1d(hidden_dim))\n            layers.append(nn.ReLU())\n            layers.append(nn.Dropout(dropout))\n            prev_dim = hidden_dim\n        \n        layers.append(nn.Linear(prev_dim, 1))\n        self.network = nn.Sequential(*layers)\n    \n    def forward(self, x):\n        return self.network(x)"

## 5. Training and Evaluation Functions

In [None]:
def train_neural_network(X_train, y_train, X_val, y_val, epochs=100, batch_size=64, lr=0.001):\n    \"\"\"Train a neural network regressor\"\"\"\n    if not PYTORCH_AVAILABLE:\n        return None\n    \n    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n    \n    # Standardize features\n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_val_scaled = scaler.transform(X_val)\n    \n    # Convert to tensors\n    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)\n    y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1, 1).to(device)\n    X_val_tensor = torch.FloatTensor(X_val_scaled).to(device)\n    y_val_tensor = torch.FloatTensor(y_val.values).reshape(-1, 1).to(device)\n    \n    # Create data loaders\n    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n    \n    # Initialize model\n    model = NeuralNetRegressor(X_train.shape[1]).to(device)\n    criterion = nn.L1Loss()  # MAE loss\n    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)\n    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)\n    \n    # Training loop\n    best_val_loss = float('inf')\n    patience_counter = 0\n    \n    for epoch in range(epochs):\n        model.train()\n        train_loss = 0\n        for batch_X, batch_y in train_loader:\n            optimizer.zero_grad()\n            outputs = model(batch_X)\n            loss = criterion(outputs, batch_y)\n            loss.backward()\n            optimizer.step()\n            train_loss += loss.item()\n        \n        # Validation\n        model.eval()\n        with torch.no_grad():\n            val_outputs = model(X_val_tensor)\n            val_loss = criterion(val_outputs, y_val_tensor).item()\n        \n        scheduler.step(val_loss)\n        \n        if val_loss < best_val_loss:\n            best_val_loss = val_loss\n            patience_counter = 0\n        else:\n            patience_counter += 1\n            if patience_counter >= 10:\n                break\n    \n    return model, scaler"

In [None]:
def cross_validate_models(X, y_temp, y_tint, n_splits=5):\n    \"\"\"Perform K-Fold cross-validation\"\"\"\n    print(f\"\\n  Running {n_splits}-Fold Cross-Validation...\")\n    \n    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)\n    temp_scores = []\n    tint_scores = []\n    \n    for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):\n        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]\n        y_temp_train, y_temp_val = y_temp.iloc[train_idx], y_temp.iloc[val_idx]\n        y_tint_train, y_tint_val = y_tint.iloc[train_idx], y_tint.iloc[val_idx]\n        \n        # Train models\n        temp_model = LGBMRegressor(n_estimators=500, learning_rate=0.05, max_depth=8, \n                                   num_leaves=64, random_state=42, verbose=-1)\n        tint_model = LGBMRegressor(n_estimators=500, learning_rate=0.05, max_depth=8, \n                                   num_leaves=64, random_state=42, verbose=-1)\n        \n        temp_model.fit(X_train, y_temp_train)\n        tint_model.fit(X_train, y_tint_train)\n        \n        # Evaluate\n        temp_pred = temp_model.predict(X_val)\n        tint_pred = tint_model.predict(X_val)\n        \n        temp_mae = mean_absolute_error(y_temp_val, temp_pred)\n        tint_mae = mean_absolute_error(y_tint_val, tint_pred)\n        \n        temp_scores.append(temp_mae)\n        tint_scores.append(tint_mae)\n        \n        print(f\"    Fold {fold}: Temp MAE={temp_mae:.2f}, Tint MAE={tint_mae:.2f}\")\n    \n    print(f\"  CV Results:\")\n    print(f\"    Temperature: {np.mean(temp_scores):.2f} ± {np.std(temp_scores):.2f}\")\n    print(f\"    Tint: {np.mean(tint_scores):.2f} ± {np.std(tint_scores):.2f}\")\n    \n    return temp_scores, tint_scores"

In [None]:
def tune_hyperparameters(X_train, y_train, X_val, y_val, target_name='Temperature'):\n    \"\"\"Hyperparameter tuning with Optuna\"\"\"\n    if not OPTUNA_AVAILABLE:\n        print(\"  Optuna not available, using default parameters\")\n        return {}\n    \n    print(f\"  Tuning hyperparameters for {target_name}...\")\n    \n    def objective(trial):\n        params = {\n            'n_estimators': trial.suggest_int('n_estimators', 500, 2000),\n            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),\n            'max_depth': trial.suggest_int('max_depth', 6, 12),\n            'num_leaves': trial.suggest_int('num_leaves', 32, 128),\n            'min_child_samples': trial.suggest_int('min_child_samples', 10, 50),\n            'subsample': trial.suggest_float('subsample', 0.6, 1.0),\n            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),\n            'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),\n            'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),\n            'random_state': 42,\n            'verbose': -1\n        }\n        \n        model = LGBMRegressor(**params)\n        model.fit(X_train, y_train)\n        pred = model.predict(X_val)\n        mae = mean_absolute_error(y_val, pred)\n        \n        return mae\n    \n    study = optuna.create_study(direction='minimize')\n    study.optimize(objective, n_trials=50, show_progress_bar=False)\n    \n    print(f\"    Best MAE: {study.best_value:.2f}\")\n    print(f\"    Best params: {study.best_params}\")\n    \n    return study.best_params"

## 6. Configuration and Setup\n\nSet your configuration options here:"

In [None]:
# Configuration options\nENABLE_TUNING = False  # Set to True to enable hyperparameter tuning\nENABLE_CV = False      # Set to True to enable cross-validation\nENABLE_NEURAL = False  # Set to True to enable neural network models\n\nprint(\"=\" * 60)\nprint(\"White Balance Prediction Solution - ENHANCED\")\nprint(\"=\" * 60)\n\nprint(f\"\\nConfiguration:\")\nprint(f\"  - Advanced image features: {'✓' if OPENCV_AVAILABLE else '✗'}\")\nprint(f\"  - XGBoost: {'✓' if XGBOOST_AVAILABLE else '✗'}\")\nprint(f\"  - CatBoost: {'✓' if CATBOOST_AVAILABLE else '✗'}\")\nprint(f\"  - Neural Networks: {'✓' if PYTORCH_AVAILABLE and ENABLE_NEURAL else '✗'}\")\nprint(f\"  - Hyperparameter Tuning: {'✓' if OPTUNA_AVAILABLE and ENABLE_TUNING else '✗'}\")\nprint(f\"  - Cross-Validation: {'✓' if ENABLE_CV else '✗'}\")"

## 7. Load and Prepare Data"

In [None]:
# Check dataset\ntrain_csv = 'dataset/Train/sliders.csv'\nval_csv = 'dataset/Validation/sliders_input.csv'\ntrain_images = 'dataset/Train/images'\nval_images = 'dataset/Validation/images'\n\nif not all(os.path.exists(p) for p in [train_csv, val_csv, train_images, val_images]):\n    print(\"Error: Dataset files not found!\")\n    print(\"Expected structure:\")\n    print(\"  dataset/Train/sliders.csv\")\n    print(\"  dataset/Train/images/\")\n    print(\"  dataset/Validation/sliders_input.csv\")\n    print(\"  dataset/Validation/images/\")\nelse:\n    print(\"Dataset files found successfully!\")\n\n# Load data\nprint(f\"\\n[1/7] Loading data...\")\ntrain_df = pd.read_csv(train_csv)\nval_df = pd.read_csv(val_csv)\nprint(f\"  Training samples: {len(train_df)}\")\nprint(f\"  Validation samples: {len(val_df)}\")\n\n# Display first few rows\nprint(\"\\nTraining data preview:\")\nprint(train_df.head())"

## 8. Extract Image Features"

In [None]:
# Extract image features\nif OPENCV_AVAILABLE:\n    print(f\"\\n[2/7] Extracting training image features...\")\n    train_image_features = []\n    for idx, row in train_df.iterrows():\n        img_path = Path(train_images) / f\"{row['id_global']}.tiff\"\n        features = extract_image_features(img_path)\n        train_image_features.append(features)\n        if (idx + 1) % 500 == 0:\n            print(f\"    {idx + 1}/{len(train_df)} images processed\")\n    \n    train_img_df = pd.DataFrame(train_image_features)\n    train_df = pd.concat([train_df.reset_index(drop=True), train_img_df], axis=1)\n    \n    print(f\"  Extracting validation image features...\")\n    val_image_features = []\n    for idx, row in val_df.iterrows():\n        img_path = Path(val_images) / f\"{row['id_global']}.tiff\"\n        features = extract_image_features(img_path)\n        val_image_features.append(features)\n        if (idx + 1) % 100 == 0:\n            print(f\"    {idx + 1}/{len(val_df)} images processed\")\n    \n    val_img_df = pd.DataFrame(val_image_features)\n    val_df = pd.concat([val_df.reset_index(drop=True), val_img_df], axis=1)\n    \n    print(f\"  Image features extracted: {len(train_img_df.columns)} features\")\nelse:\n    print(f\"\\n[2/7] Skipping image features (OpenCV not available)...\")"

## 9. Feature Engineering and Preparation"

In [None]:
# Prepare features\nprint(f\"\\n[3/7] Preparing features...\")\nX_train, feature_cols, train_medians = prepare_data(train_df, is_training=True)\ny_temp_train = train_df['Temperature']\ny_tint_train = train_df['Tint']\n\nX_val, _, _ = prepare_data(val_df, feature_cols, train_medians, is_training=False)\n\nprint(f\"  Feature count: {len(feature_cols)}\")\nprint(f\"  Training shape: {X_train.shape}\")\nprint(f\"  Validation shape: {X_val.shape}\")\n\n# Display feature statistics\nprint(f\"\\nTarget variable statistics:\")\nprint(f\"Temperature - Mean: {y_temp_train.mean():.1f}, Std: {y_temp_train.std():.1f}\")\nprint(f\"Tint - Mean: {y_tint_train.mean():.1f}, Std: {y_tint_train.std():.1f}\")"

## 10. Cross-Validation (Optional)"

In [None]:
# Cross-validation (optional)\nif ENABLE_CV:\n    print(f\"\\n[4/7] Cross-Validation...\")\n    cross_validate_models(X_train, y_temp_train, y_tint_train, n_splits=5)\nelse:\n    print(f\"\\n[4/7] Skipping Cross-Validation...\")"

## 11. Model Training and Evaluation"

In [None]:
# Split for validation\nprint(f\"\\n[5/7] Creating validation split...\")\nX_tr, X_te, y_temp_tr, y_temp_te, y_tint_tr, y_tint_te = train_test_split(\n    X_train, y_temp_train, y_tint_train, test_size=0.15, random_state=42\n)\n\n# Hyperparameter tuning (optional)\nbest_temp_params = {}\nbest_tint_params = {}\nif ENABLE_TUNING and OPTUNA_AVAILABLE:\n    print(f\"\\n  Hyperparameter Tuning...\")\n    best_temp_params = tune_hyperparameters(X_tr, y_temp_tr, X_te, y_temp_te, 'Temperature')\n    best_tint_params = tune_hyperparameters(X_tr, y_tint_tr, X_te, y_tint_te, 'Tint')\n\nprint(f\"\\nValidation split created:\")\nprint(f\"  Training: {X_tr.shape[0]} samples\")\nprint(f\"  Testing: {X_te.shape[0]} samples\")"

In [None]:
# Train models\nprint(f\"\\n[6/7] Training models...\")\n\n# Initialize all model variables\ntemp_xgb = None\ntint_xgb = None\ntemp_cat = None\ntint_cat = None\ntemp_nn = None\ntint_nn = None\ntemp_scaler = None\ntint_scaler = None\n\n# Temperature models\nprint(\"  Training Temperature models...\")\ntemp_lgbm = LGBMRegressor(\n    n_estimators=1000, learning_rate=0.05, max_depth=8, num_leaves=64,\n    min_child_samples=20, subsample=0.8, colsample_bytree=0.8,\n    reg_alpha=0.1, reg_lambda=0.1, random_state=42, verbose=-1\n)\ntemp_lgbm.fit(X_tr, y_temp_tr, eval_set=[(X_te, y_temp_te)], eval_metric='mae')\n\nif XGBOOST_AVAILABLE:\n    print(\"    Training XGBoost for Temperature...\")\n    temp_xgb = XGBRegressor(\n        n_estimators=1000, learning_rate=0.05, max_depth=8, min_child_weight=20,\n        subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1,\n        random_state=42, verbosity=0\n    )\n    temp_xgb.fit(X_tr, y_temp_tr, eval_set=[(X_te, y_temp_te)], verbose=False)\n\nif CATBOOST_AVAILABLE:\n    print(\"    Training CatBoost for Temperature...\")\n    temp_cat = CatBoostRegressor(\n        iterations=1000, learning_rate=0.05, depth=8, l2_leaf_reg=3,\n        random_state=42, verbose=0\n    )\n    temp_cat.fit(X_tr, y_temp_tr, eval_set=(X_te, y_temp_te), verbose=False)\n\nif ENABLE_NEURAL and PYTORCH_AVAILABLE:\n    print(\"    Training Neural Network for Temperature...\")\n    temp_nn, temp_scaler = train_neural_network(X_tr, y_temp_tr, X_te, y_temp_te)\n\nprint(\"  Temperature models trained successfully!\")"

In [None]:
# Tint models\nprint(\"  Training Tint models...\")\ntint_lgbm = LGBMRegressor(\n    n_estimators=1000, learning_rate=0.05, max_depth=8, num_leaves=64,\n    min_child_samples=20, subsample=0.8, colsample_bytree=0.8,\n    reg_alpha=0.1, reg_lambda=0.1, random_state=42, verbose=-1\n)\ntint_lgbm.fit(X_tr, y_tint_tr, eval_set=[(X_te, y_tint_te)], eval_metric='mae')\n\nif XGBOOST_AVAILABLE:\n    print(\"    Training XGBoost for Tint...\")\n    tint_xgb = XGBRegressor(\n        n_estimators=1000, learning_rate=0.05, max_depth=8, min_child_weight=20,\n        subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1,\n        random_state=42, verbosity=0\n    )\n    tint_xgb.fit(X_tr, y_tint_tr, eval_set=[(X_te, y_tint_te)], verbose=False)\n\nif CATBOOST_AVAILABLE:\n    print(\"    Training CatBoost for Tint...\")\n    tint_cat = CatBoostRegressor(\n        iterations=1000, learning_rate=0.05, depth=8, l2_leaf_reg=3,\n        random_state=42, verbose=0\n    )\n    tint_cat.fit(X_tr, y_tint_tr, eval_set=(X_te, y_tint_te), verbose=False)\n\nif ENABLE_NEURAL and PYTORCH_AVAILABLE:\n    print(\"    Training Neural Network for Tint...\")\n    tint_nn, tint_scaler = train_neural_network(X_tr, y_tint_tr, X_te, y_tint_te)\n\nprint(\"  Tint models trained successfully!\")"

## 12. Model Ensemble and Evaluation"

In [None]:
# Evaluate with ensemble\nprint(\"\\n  Evaluating ensemble...\")\ntemp_preds = [temp_lgbm.predict(X_te)]\ntint_preds = [tint_lgbm.predict(X_te)]\nweights = [0.4]\n\nif temp_xgb is not None:\n    temp_preds.append(temp_xgb.predict(X_te))\n    tint_preds.append(tint_xgb.predict(X_te))\n    weights.append(0.3)\n\nif temp_cat is not None:\n    temp_preds.append(temp_cat.predict(X_te))\n    tint_preds.append(tint_cat.predict(X_te))\n    weights.append(0.2)\n\nif temp_nn is not None:\n    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n    X_te_scaled = temp_scaler.transform(X_te)\n    X_te_tensor = torch.FloatTensor(X_te_scaled).to(device)\n    temp_nn.eval()\n    tint_nn.eval()\n    with torch.no_grad():\n        temp_preds.append(temp_nn(X_te_tensor).cpu().numpy().flatten())\n        tint_preds.append(tint_nn(X_te_tensor).cpu().numpy().flatten())\n    weights.append(0.1)\n\n# Normalize weights\nweights = np.array(weights) / sum(weights)\n\ntemp_pred = sum(w * p for w, p in zip(weights, temp_preds))\ntint_pred = sum(w * p for w, p in zip(weights, tint_preds))\n\ntemp_mae = mean_absolute_error(y_temp_te, temp_pred)\ntint_mae = mean_absolute_error(y_tint_te, tint_pred)\n\nprint(f\"    Temperature MAE: {temp_mae:.2f}\")\nprint(f\"    Tint MAE: {tint_mae:.2f}\")\nprint(f\"    Ensemble weights: {weights}\")\nprint(f\"    Models used: {len(weights)}\")"

## 13. Final Training and Predictions"

In [None]:
# Retrain on full data\nprint(f\"\\n[7/7] Retraining on full data...\")\ntemp_lgbm.fit(X_train, y_temp_train)\ntint_lgbm.fit(X_train, y_tint_train)\n\nif temp_xgb is not None:\n    temp_xgb.fit(X_train, y_temp_train)\n    tint_xgb.fit(X_train, y_tint_train)\n\nif temp_cat is not None:\n    temp_cat.fit(X_train, y_temp_train, verbose=False)\n    tint_cat.fit(X_train, y_tint_train, verbose=False)\n\nif temp_nn is not None:\n    temp_nn, temp_scaler = train_neural_network(X_train, y_temp_train, X_te, y_temp_te, epochs=50)\n    tint_nn, tint_scaler = train_neural_network(X_train, y_tint_train, X_te, y_tint_te, epochs=50)\n\nprint(\"  Models retrained on full dataset!\")"

In [None]:
# Generate predictions\nprint(f\"\\nGenerating final predictions...\")\n\ntemp_val_preds = [temp_lgbm.predict(X_val)]\ntint_val_preds = [tint_lgbm.predict(X_val)]\n\nif temp_xgb is not None:\n    temp_val_preds.append(temp_xgb.predict(X_val))\n    tint_val_preds.append(tint_xgb.predict(X_val))\n\nif temp_cat is not None:\n    temp_val_preds.append(temp_cat.predict(X_val))\n    tint_val_preds.append(tint_cat.predict(X_val))\n\nif temp_nn is not None:\n    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n    X_val_scaled = temp_scaler.transform(X_val)\n    X_val_tensor = torch.FloatTensor(X_val_scaled).to(device)\n    temp_nn.eval()\n    tint_nn.eval()\n    with torch.no_grad():\n        temp_val_preds.append(temp_nn(X_val_tensor).cpu().numpy().flatten())\n        tint_val_preds.append(tint_nn(X_val_tensor).cpu().numpy().flatten())\n\ntemp_predictions = sum(w * p for w, p in zip(weights, temp_val_preds))\ntint_predictions = sum(w * p for w, p in zip(weights, tint_val_preds))\n\n# Clip to valid ranges\ntemp_predictions = np.clip(np.round(temp_predictions).astype(int), 2000, 50000)\ntint_predictions = np.clip(np.round(tint_predictions).astype(int), -150, 150)\n\nprint(f\"  Predictions generated for {len(temp_predictions)} samples\")"

## 14. Create and Save Submission"

In [None]:
# Create submission\nsubmission = pd.DataFrame({\n    'id_global': val_df['id_global'],\n    'Temperature': temp_predictions,\n    'Tint': tint_predictions\n})\n\nsubmission.to_csv('predictions.csv', index=False)\n\nprint(\"\\n\" + \"=\" * 60)\nprint(\"PREDICTIONS SAVED: predictions.csv\")\nprint(\"=\" * 60)\n\nprint(f\"\\nEnhancements Applied:\")\nif ENABLE_CV:\n    print(\"  ✓ K-Fold Cross-Validation\")\nif ENABLE_TUNING:\n    print(\"  ✓ Hyperparameter Tuning (Optuna)\")\nif CATBOOST_AVAILABLE:\n    print(\"  ✓ CatBoost Model\")\nif ENABLE_NEURAL and PYTORCH_AVAILABLE:\n    print(\"  ✓ Neural Network Model\")\nif OPENCV_AVAILABLE:\n    print(\"  ✓ Advanced Image Features (Texture, Edges, FFT)\")\n\nprint(f\"\\nFirst 10 predictions:\")\nprint(submission.head(10))\n\nprint(f\"\\nPrediction statistics:\")\nprint(submission[['Temperature', 'Tint']].describe())\n\nprint(f\"\\n✓ Done! Submit predictions.csv\")\nprint(f\"\\nTip: Set ENABLE_TUNING=True, ENABLE_CV=True, ENABLE_NEURAL=True for maximum performance!\")"