## Model

In [1]:
import torch
import torch.nn as nn
from transformers import SwinModel
from torchvision.models import convnext_base, ConvNeXt_Base_Weights
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import AutoImageProcessor
from PIL import Image
import numpy as np
import os
import pandas as pd
from sklearn.cluster import DBSCAN
import torch.nn.functional as F

2025-05-05 21:36:08.204973: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746480968.398443      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746480968.452211      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
class GeoLocationModel(nn.Module):
    def __init__(self, base_model_type='swin', region_num=15, embed_dim=64):
        super().__init__()
        self.region_embed = nn.Embedding(region_num, embed_dim)
        
        if base_model_type == 'swin':
            # Swin Transformer from Hugging Face
            self.image_encoder = SwinModel.from_pretrained(
                "microsoft/swin-base-patch4-window7-224"
            )
            img_feat_dim = self.image_encoder.config.hidden_size
        elif base_model_type == 'convnext':
            # ConvNeXt from TorchVision
            self.convnext = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1)
            self.convnext.classifier = nn.Identity()
            # Create classifier processor to handle norm and pooling
            original_classifier = list(convnext_base().classifier.children())
            self.classifier_processor = nn.Sequential(*original_classifier[:-1])
            img_feat_dim = 1024  # For convnext-base
        else:
            raise ValueError("Invalid base_model_type. Choose 'swin' or 'convnext'")
        
        # Combined feature dimension
        combined_dim = img_feat_dim + embed_dim
        
        # Regression heads
        self.latitude_head = nn.Sequential(
            nn.Linear(combined_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
        self.longitude_head = nn.Sequential(
            nn.Linear(combined_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, image, Region_ID):
        # Image feature extraction
        if hasattr(self, 'image_encoder'):  # Swin branch
            swin_output = self.image_encoder(image)
            img_features = swin_output.last_hidden_state.mean(dim=1)
        else:  # ConvNeXt branch
            conv_features = self.convnext(image)
            img_features = self.classifier_processor(conv_features)
        
        # Region embedding
        region_features = self.region_embed(Region_ID)
        
        # Combine features
        combined = torch.cat([img_features, region_features], dim=1)
        
        # Predict normalized coordinates
        lat = self.latitude_head(combined)
        lon = self.longitude_head(combined)
        return torch.cat([lat, lon], dim=1)

## Dataset

In [3]:
class GeoRegionDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, normalize_coords=True):
        """
        Args:
            dataframe: DataFrame with columns 'filename', 'latitude', 'longitude', 'Region_ID'
            img_dir: Path to image directory
            transform: Optional torchvision transforms
            normalize_coords: Whether to normalize coordinates to [0,1] range
        """
        self.img_paths = [os.path.join(img_dir, fname) for fname in dataframe['filename']]
        self.Region_IDs = dataframe['Region_ID'].values - 1
        
        # Convert coordinates to float32 and optionally normalize
        self.coordinates = dataframe[['latitude', 'longitude']].astype(np.float32).values
        self.original_coords = self.coordinates.copy()  # Store unnormalized coordinates
        if normalize_coords:
            self.coordinates = self.coordinates / 999999.0  # Normalize 6-digit integers to [0,1]
        
        self.transform = transform
        self.normalize_coords = normalize_coords

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        # Load image
        image = Image.open(self.img_paths[idx]).convert("RGB")
        
        # Apply transforms
        if self.transform:
            image = self.transform(image)
        
        # Get region ID (convert to tensor)
        Region_ID = torch.tensor(self.Region_IDs[idx], dtype=torch.long)
        
        # Get coordinates (already normalized if specified)
        coords = torch.tensor(self.coordinates[idx], dtype=torch.float32)
        original_coords = torch.tensor(self.original_coords[idx], dtype=torch.float32)
        
        return {
            'image': image,
            'Region_ID': Region_ID,
            'coordinates': coords,
            'original_coords': original_coords  # For denormalized metrics
        }

    @staticmethod
    def denoise_dataframe(df, coord_cols=['longitude', 'latitude'], mad_thresh=3.0, dbscan_eps=50, dbscan_min_samples=10):
        """
        Helper method to clean the input dataframe using MAD and DBSCAN
        Returns cleaned dataframe
        """
        # MAD filtering
        cleaned_df = df.copy()
        for col in coord_cols:
            med = cleaned_df[col].median()
            mad = np.median(np.abs(cleaned_df[col] - med))
            if mad > 0:  # Only filter if MAD is non-zero
                cleaned_df = cleaned_df[np.abs(cleaned_df[col] - med) <= mad_thresh * mad]
        
        # DBSCAN clustering
        if len(coord_cols) >= 2:
            coords = cleaned_df[coord_cols].values
            db = DBSCAN(eps=dbscan_eps, min_samples=dbscan_min_samples, metric='euclidean').fit(coords)
            cleaned_df['dbscan_label'] = db.labels_
            cleaned_df = cleaned_df[cleaned_df['dbscan_label'] != -1]
        
        return cleaned_df.drop(columns=['dbscan_label'], errors='ignore')


In [4]:
class SharedAugmentations:
    """Shared augmentation core for both models"""
    def __init__(self):
        self.transform = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        ])
    
    def __call__(self, img):
        return self.transform(img)

def create_model_specific_datasets(base_dataset, model_type):
    """Create model-specific datasets with proper transforms"""
    shared_aug = SharedAugmentations()
    
    if model_type == 'swin':
        processor = AutoImageProcessor.from_pretrained("microsoft/swin-base-patch4-window7-224")
        class SwinDataset(Dataset):
            def __init__(self, base_dataset):
                self.base_dataset = base_dataset
                self.processor = processor
            
            def __len__(self):
                return len(self.base_dataset)
            
            def __getitem__(self, idx):
                item = self.base_dataset[idx]
                img = shared_aug(item['image'])  # Apply shared augmentations
                return {
                    'image': self.processor(img, return_tensors="pt")['pixel_values'][0],
                    'Region_ID': item['Region_ID'],
                    'coordinates': item['coordinates'],
                    'original_coords': item['original_coords']
                }
        return SwinDataset(base_dataset)
    
    elif model_type == 'convnext':
        convnext_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        class ConvNeXtDataset(Dataset):
            def __init__(self, base_dataset):
                self.base_dataset = base_dataset
                self.transform = convnext_transform
            
            def __len__(self):
                return len(self.base_dataset)
            
            def __getitem__(self, idx):
                item = self.base_dataset[idx]
                img = shared_aug(item['image'])  # Apply shared augmentations
                return {
                    'image': self.transform(img),
                    'Region_ID': item['Region_ID'],
                    'coordinates': item['coordinates'],
                    'original_coords': item['original_coords']
                }
        return ConvNeXtDataset(base_dataset)

### val prep

In [5]:
# Training and Validation Setup
def denormalize(coords_tensor):
    """Convert normalized [0,1] coords back to original 6-digit integers"""
    return coords_tensor * 999999.0

def validate(models, val_loaders, device):
    models['swin'].eval()
    models['convnext'].eval()
    
    # Initialize accumulators
    swin_sum_se = 0.0
    convnext_sum_se = 0.0
    count = 0
    
    with torch.no_grad():
        for swin_batch, convnext_batch in zip(val_loaders['swin'], val_loaders['convnext']):
            # Prepare data
            swin_images = swin_batch['image'].to(device)
            convnext_images = convnext_batch['image'].to(device)
            Region_IDs = swin_batch['Region_ID'].to(device)
            true_coords = swin_batch['original_coords'].to(device)
            
            # Get predictions
            swin_pred = denormalize(models['swin'](swin_images, Region_IDs))
            convnext_pred = denormalize(models['convnext'](convnext_images, Region_IDs))

            # Accumulate squared errors
            batch_size = swin_images.size(0)
            swin_sum_se += F.mse_loss(swin_pred, true_coords, reduction='sum').item()
            convnext_sum_se += F.mse_loss(convnext_pred, true_coords, reduction='sum').item()
            count += batch_size
    
    # Calculate MSEs
    swin_mse = swin_sum_se / count
    convnext_mse = convnext_sum_se / count
    
    # Calculate weights (with epsilon to avoid division by zero)
    eps = 1e-8
    swin_weight = 1 / (swin_mse + eps)
    convnext_weight = 1 / (convnext_mse + eps)
    total_weight = swin_weight + convnext_weight
    swin_weight /= total_weight
    convnext_weight /= total_weight
    
    # Estimated ensemble MSE (weighted average of errors)
    ensemble_mse = swin_weight**2 * swin_mse + convnext_weight**2 * convnext_mse
    
    return {
        'swin': swin_mse,
        'convnext': convnext_mse,
        'ensemble': ensemble_mse,
        'weights': {'swin': swin_weight, 'convnext': convnext_weight}
    }

### train prep

In [6]:
from tqdm.notebook import tqdm

# Main training function
def train_models():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print("here")
    # Load and clean data
    train_df = GeoRegionDataset.denoise_dataframe(pd.read_csv(train_csv))
    val_df_noisy = pd.read_csv(val_csv)
    filtered_df = val_df_noisy[
        (val_df_noisy['latitude'] > 200_000) & (val_df_noisy['latitude'] < 230_000) &
        (val_df_noisy['longitude'] > 140_000) & (val_df_noisy['longitude'] < 150_000)
    ]
    val_df = filtered_df
    
    # Create base datasets
    base_train = GeoRegionDataset(train_df, train_img_dir, normalize_coords=True)
    base_val = GeoRegionDataset(val_df, val_img_dir, normalize_coords=True)
    
    # Create model-specific datasets
    train_datasets = {
        'swin': create_model_specific_datasets(base_train, 'swin'),
        'convnext': create_model_specific_datasets(base_train, 'convnext')
    }
    val_datasets = {
        'swin': create_model_specific_datasets(base_val, 'swin'),
        'convnext': create_model_specific_datasets(base_val, 'convnext')
    }
    
    # Create dataloaders
    print("here")
    batch_size = 32
    train_loaders = {
        'swin': DataLoader(train_datasets['swin'], batch_size=batch_size, shuffle=True, num_workers=4),
        'convnext': DataLoader(train_datasets['convnext'], batch_size=batch_size, shuffle=True, num_workers=4)
    }
    val_loaders = {
        'swin': DataLoader(val_datasets['swin'], batch_size=batch_size, shuffle=False),
        'convnext': DataLoader(val_datasets['convnext'], batch_size=batch_size, shuffle=False)
    }
    
    # Initialize models
    models = {
        'swin': GeoLocationModel(base_model_type='swin').to(device),
        'convnext': GeoLocationModel(base_model_type='convnext').to(device)
    }
    optimizers = {
        'swin': torch.optim.AdamW(models['swin'].parameters(), 
                                    lr=1e-4,
                                    weight_decay=0.01),
        'convnext': torch.optim.AdamW(models['convnext'].parameters(), lr=1e-4)
    }
    torch.nn.utils.clip_grad_norm_(models['swin'].parameters(), 1.0)

    criterion = nn.MSELoss()
    
    print("here")
    # Training loop
    num_epochs = 50
    best_ensemble_mse = float('inf')
    for epoch in range(num_epochs):
        # Training phase
        models['swin'].train()
        models['convnext'].train()
        total_batches = len(train_loaders['swin'])
        for (swin_batch, convnext_batch) in tqdm(zip(train_loaders['swin'], train_loaders['convnext']), total=total_batches):
            # Prepare data
            swin_images = swin_batch['image'].to(device)
            convnext_images = convnext_batch['image'].to(device)
            Region_IDs = swin_batch['Region_ID'].to(device)
            target_coords = swin_batch['coordinates'].to(device)
            
            # Train Swin
            optimizers['swin'].zero_grad()
            swin_output = models['swin'](swin_images, Region_IDs)
            swin_loss = criterion(swin_output, target_coords)
            swin_loss.backward()
            optimizers['swin'].step()
            
            # Train ConvNeXt
            optimizers['convnext'].zero_grad()
            convnext_output = models['convnext'](convnext_images, Region_IDs)
            convnext_loss = criterion(convnext_output, target_coords)
            convnext_loss.backward()
            optimizers['convnext'].step()
        
        # Validation phase
        val_metrics = validate(models, val_loaders, device)
        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"  Swin Val MSE: {val_metrics['swin']:.2f}")
        print(f"  ConvNeXt Val MSE: {val_metrics['convnext']:.2f}")
        print(f"  Ensemble Val MSE: {val_metrics['ensemble']:.2f}\n")

        if val_metrics['ensemble'] < best_ensemble_mse:
            best_ensemble_mse = val_metrics['ensemble']
            torch.save({
                'epoch': epoch,
                'swin_state_dict': models['swin'].state_dict(),
                'convnext_state_dict': models['convnext'].state_dict(),
                'swin_optimizer': optimizers['swin'].state_dict(),
                'convnext_optimizer': optimizers['convnext'].state_dict(),
                'best_ensemble_mse': best_ensemble_mse
            }, '/kaggle/working/best_ensemble_model.pth')

    
    return models

## Running and Evaluating

In [7]:
train_csv = "/kaggle/input/smai-25-sec-a-project-phase-2-lat-long-prediction/labels_train.csv"
val_csv = "/kaggle/input/smai-25-sec-a-project-phase-2-lat-long-prediction/labels_val.csv"
train_img_dir = "/kaggle/input/smai-25-sec-a-project-phase-2-lat-long-prediction/images_train/images_train"
val_img_dir = "/kaggle/input/smai-25-sec-a-project-phase-2-lat-long-prediction/images_val/images_val"

# Load and clean data (only train needs cleaning)
train_df = pd.read_csv(train_csv)
val_df = pd.read_csv(val_csv)

# Execute training (this will automatically handle validation each epoch)
trained_models = train_models()

here


preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


here


config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/convnext_base-6075fbad.pth" to /root/.cache/torch/hub/checkpoints/convnext_base-6075fbad.pth


  0%|          | 0.00/338M [00:00<?, ?B/s]

  2%|▏         | 8.00M/338M [00:00<00:04, 83.6MB/s]

  5%|▍         | 16.0M/338M [00:00<00:04, 82.4MB/s]

  7%|▋         | 23.9M/338M [00:00<00:04, 80.2MB/s]

  9%|▉         | 31.8M/338M [00:00<00:03, 80.9MB/s]

 12%|█▏        | 40.8M/338M [00:00<00:03, 85.6MB/s]

 15%|█▍        | 49.1M/338M [00:00<00:03, 86.1MB/s]

 17%|█▋        | 58.2M/338M [00:00<00:03, 88.8MB/s]

 20%|█▉        | 67.1M/338M [00:00<00:03, 89.8MB/s]

 22%|██▏       | 76.0M/338M [00:00<00:03, 90.6MB/s]

 25%|██▌       | 85.0M/338M [00:01<00:02, 91.5MB/s]

 28%|██▊       | 94.0M/338M [00:01<00:02, 92.3MB/s]

 30%|███       | 103M/338M [00:01<00:02, 91.7MB/s] 

 33%|███▎      | 112M/338M [00:01<00:02, 91.6MB/s]

 36%|███▌      | 121M/338M [00:01<00:02, 92.2MB/s]

 38%|███▊      | 130M/338M [00:01<00:02, 91.7MB/s]

 41%|████      | 138M/338M [00:01<00:02, 91.4MB/s]

 44%|████▎     | 147M/338M [00:01<00:02, 91.8MB/s]

 46%|████▌     | 156M/338M [00:01<00:02, 92.2MB/s]

 49%|████▉     | 165M/338M [00:01<00:01, 91.6MB/s]

 51%|█████▏    | 174M/338M [00:02<00:01, 91.6MB/s]

 54%|█████▍    | 183M/338M [00:02<00:01, 91.5MB/s]

 57%|█████▋    | 192M/338M [00:02<00:01, 92.0MB/s]

 59%|█████▉    | 200M/338M [00:02<00:01, 92.2MB/s]

 62%|██████▏   | 210M/338M [00:02<00:01, 93.5MB/s]

 65%|██████▍   | 219M/338M [00:02<00:01, 93.0MB/s]

 67%|██████▋   | 228M/338M [00:02<00:01, 93.6MB/s]

 70%|███████   | 237M/338M [00:02<00:01, 94.1MB/s]

 73%|███████▎  | 246M/338M [00:02<00:01, 93.3MB/s]

 75%|███████▌  | 255M/338M [00:02<00:00, 93.1MB/s]

 78%|███████▊  | 264M/338M [00:03<00:00, 92.3MB/s]

 81%|████████  | 273M/338M [00:03<00:00, 90.9MB/s]

 83%|████████▎ | 282M/338M [00:03<00:00, 89.6MB/s]

 86%|████████▌ | 290M/338M [00:03<00:00, 86.8MB/s]

 88%|████████▊ | 298M/338M [00:03<00:00, 80.5MB/s]

 91%|█████████ | 307M/338M [00:03<00:00, 83.9MB/s]

 94%|█████████▎| 316M/338M [00:03<00:00, 86.8MB/s]

 96%|█████████▋| 326M/338M [00:03<00:00, 89.3MB/s]

 99%|█████████▉| 334M/338M [00:03<00:00, 90.3MB/s]

100%|██████████| 338M/338M [00:03<00:00, 90.0MB/s]




here


  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 1/50:
  Swin Val MSE: 21225386.74
  ConvNeXt Val MSE: 40654763.85
  Ensemble Val MSE: 13944909.27



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 2/50:
  Swin Val MSE: 22571408.18
  ConvNeXt Val MSE: 49788247.87
  Ensemble Val MSE: 15530627.51



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 3/50:
  Swin Val MSE: 16016136.18
  ConvNeXt Val MSE: 25891243.58
  Ensemble Val MSE: 9895099.27



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 4/50:
  Swin Val MSE: 15568172.67
  ConvNeXt Val MSE: 27400959.91
  Ensemble Val MSE: 9927658.52



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 5/50:
  Swin Val MSE: 15305174.48
  ConvNeXt Val MSE: 21287225.59
  Ensemble Val MSE: 8903616.63



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 6/50:
  Swin Val MSE: 20685419.14
  ConvNeXt Val MSE: 29460190.41
  Ensemble Val MSE: 12152537.22



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 7/50:
  Swin Val MSE: 15210727.23
  ConvNeXt Val MSE: 26385594.30
  Ensemble Val MSE: 9648547.35



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 8/50:
  Swin Val MSE: 14686436.44
  ConvNeXt Val MSE: 18263535.65
  Ensemble Val MSE: 8140409.19



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 9/50:
  Swin Val MSE: 16107790.50
  ConvNeXt Val MSE: 15836674.06
  Ensemble Val MSE: 7985540.89



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 10/50:
  Swin Val MSE: 16410608.72
  ConvNeXt Val MSE: 17692259.20
  Ensemble Val MSE: 8513675.27



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 11/50:
  Swin Val MSE: 16885967.80
  ConvNeXt Val MSE: 16875959.76
  Ensemble Val MSE: 8440481.15



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 12/50:
  Swin Val MSE: 18170133.31
  ConvNeXt Val MSE: 14206433.69
  Ensemble Val MSE: 7972827.82



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 13/50:
  Swin Val MSE: 16520265.67
  ConvNeXt Val MSE: 14913645.13
  Ensemble Val MSE: 7837948.68



  0%|          | 0/183 [01:41<?, ?it/s]

Exception ignored in: 

<function _MultiProcessingDataLoaderIter.__del__ at 0x7ba8edc61f80>




Traceback (most recent call last):


  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1604, in __del__


    

self._shutdown_workers()




  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1587, in _shutdown_workers


    

if w.is_alive():




 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^




  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive


    

assert self._parent_pid == os.getpid(), 'can only test a child process'




 

 

 

 

 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^




AssertionError

: 

can only test a child process




Exception ignored in: 

<function _MultiProcessingDataLoaderIter.__del__ at 0x7ba8edc61f80>




Traceback (most recent call last):


  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1604, in __del__


    

self._shutdown_workers()




  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1587, in _shutdown_workers


    

if w.is_alive():




 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^




  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive


    

assert self._parent_pid == os.getpid(), 'can only test a child process'




 

 

 

 

 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^




AssertionError

: 

can only test a child process




Exception ignored in: 

<function _MultiProcessingDataLoaderIter.__del__ at 0x7ba8edc61f80>




Traceback (most recent call last):


  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1604, in __del__


    

self._shutdown_workers()




  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1587, in _shutdown_workers


    

if w.is_alive():




 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^




  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive


    

assert self._parent_pid == os.getpid(), 'can only test a child process'




 

 

 

 

 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^




AssertionError

: 

can only test a child process




Exception ignored in: 

<function _MultiProcessingDataLoaderIter.__del__ at 0x7ba8edc61f80>




Traceback (most recent call last):


  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1604, in __del__


    

self._shutdown_workers()




  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1587, in _shutdown_workers


    

if w.is_alive():




 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^




  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive


    

assert self._parent_pid == os.getpid(), 'can only test a child process'




 

 

 

 

 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^




AssertionError

: 

can only test a child process




Exception ignored in: 

<function _MultiProcessingDataLoaderIter.__del__ at 0x7ba8edc61f80>




Traceback (most recent call last):


  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1604, in __del__


    

self._shutdown_workers()




  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1587, in _shutdown_workers


    

if w.is_alive():




 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^




  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive


    

assert self._parent_pid == os.getpid(), 'can only test a child process'




 

 

 

 

 

 

 

 

 

 

 

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^

^




AssertionError

: 

can only test a child process




Epoch 14/50:
  Swin Val MSE: 18200365.68
  ConvNeXt Val MSE: 16061438.98
  Ensemble Val MSE: 8532068.46



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 15/50:
  Swin Val MSE: 17589728.53
  ConvNeXt Val MSE: 15920082.34
  Ensemble Val MSE: 8356654.94



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 16/50:
  Swin Val MSE: 18367388.66
  ConvNeXt Val MSE: 17561256.35
  Ensemble Val MSE: 8977639.45



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 17/50:
  Swin Val MSE: 20045308.66
  ConvNeXt Val MSE: 10899225.43
  Ensemble Val MSE: 7060320.81



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 18/50:
  Swin Val MSE: 18766011.19
  ConvNeXt Val MSE: 13872474.61
  Ensemble Val MSE: 7976197.65



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 19/50:
  Swin Val MSE: 21077593.85
  ConvNeXt Val MSE: 18994770.92
  Ensemble Val MSE: 9991026.71



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 20/50:
  Swin Val MSE: 21210228.45
  ConvNeXt Val MSE: 18741403.73
  Ensemble Val MSE: 9949767.58



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 21/50:
  Swin Val MSE: 21837338.00
  ConvNeXt Val MSE: 12684577.18
  Ensemble Val MSE: 8023813.22



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 22/50:
  Swin Val MSE: 22959509.06
  ConvNeXt Val MSE: 12811667.58
  Ensemble Val MSE: 8223089.80



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 23/50:
  Swin Val MSE: 24607378.70
  ConvNeXt Val MSE: 11352188.92
  Ensemble Val MSE: 7768380.72



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 24/50:
  Swin Val MSE: 27741638.80
  ConvNeXt Val MSE: 12098193.30
  Ensemble Val MSE: 8424325.38



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 25/50:
  Swin Val MSE: 47250278.90
  ConvNeXt Val MSE: 11152946.52
  Ensemble Val MSE: 9023128.94



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 26/50:
  Swin Val MSE: 26247308.14
  ConvNeXt Val MSE: 12222530.35
  Ensemble Val MSE: 8339221.92



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 27/50:
  Swin Val MSE: 26832487.04
  ConvNeXt Val MSE: 11298274.98
  Ensemble Val MSE: 7950557.53



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 28/50:
  Swin Val MSE: 26735146.74
  ConvNeXt Val MSE: 9746139.93
  Ensemble Val MSE: 7142414.78



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch 29/50:
  Swin Val MSE: 27896645.47
  ConvNeXt Val MSE: 10302263.95
  Ensemble Val MSE: 7523738.49



  0%|          | 0/183 [00:00<?, ?it/s]

In [None]:
from scipy.stats import zscore

# Compute z-scores for both latitude and longitude
z_scores = val_df_noisy[['latitude', 'longitude']].apply(zscore)

# Keep only rows where both latitude and longitude z-scores are within 3 standard deviations
filtered_df = val_df_noisy[
    (val_df_noisy['latitude'] > 200_000) & (val_df_noisy['latitude'] < 230_000) &
    (val_df_noisy['longitude'] > 140_000) & (val_df_noisy['longitude'] < 150_000)
]
print(len(filtered_df))
# Plot the filtered data
plt.scatter(filtered_df['latitude'], filtered_df['longitude'], s=4)
plt.show()