In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm


class CombinedGeoModel(nn.Module):
    def __init__(self,
                 num_region_classes=15,
                 head_hidden_dims=[512, 128],
                 use_softmax_for_region=True,
                 effnet_name='efficientnet_b0'):
        super().__init__()
        self.num_region_classes = num_region_classes
        self.use_softmax_for_region = use_softmax_for_region

        # 1. Initialize Frozen Region Model STRUCTURE using TIMM
        print(f"--- Initializing Frozen Region Model STRUCTURE ({effnet_name} using TIMM) ---")
        self.frozen_region_model = timm.create_model(
            effnet_name, pretrained=False, num_classes=num_region_classes
        )
        for param in self.frozen_region_model.parameters():
            param.requires_grad = False
        self.frozen_region_model.eval()
        print("Frozen Region Model (TIMM) structure initialized and frozen.")

        # 2. Initialize Trainable Image Embedding Model STRUCTURE using TIMM
        print(f"\n--- Initializing Trainable Image Embedder STRUCTURE ({effnet_name} using TIMM) ---")
        self.trainable_image_embedder = timm.create_model(
            effnet_name, pretrained=False # Weights loaded from state_dict
        )
        self.embedding_dim = self.trainable_image_embedder.get_classifier().in_features
        self.trainable_image_embedder.reset_classifier(0, '') # Remove classifier
        print(f"Trainable Image Embedder (TIMM) structure initialized (output dim after pool: {self.embedding_dim}).")

        # ADD Global Pooling Layer
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        print("Added AdaptiveAvgPool2d layer for trainable embedder features.")

        # 3. Define the Regression Head
        print("\n--- Defining Regression Head ---")
        input_head_dim = self.embedding_dim + self.num_region_classes
        layers = []
        current_dim = input_head_dim
        for hidden_dim in head_hidden_dims:
           layers.extend([
                nn.Linear(current_dim, hidden_dim), nn.BatchNorm1d(hidden_dim),
                nn.ReLU(), nn.Dropout(0.3)
            ])
           current_dim = hidden_dim
        layers.append(nn.Linear(current_dim, 2))
        self.regression_head = nn.Sequential(*layers)
        print(f"Regression head structure defined (Input: {input_head_dim}, Output: 2).")

        print("\nCombinedGeoModel Structure Initialization Complete.")

    def forward(self, x):
        # 1. Get Region Features (2D)
        with torch.no_grad():
            region_logits = self.frozen_region_model(x)
            region_features = F.softmax(region_logits, dim=1) if self.use_softmax_for_region else region_logits

        # 2. Get Image Embeddings (Apply Pooling)
        image_feature_map = self.trainable_image_embedder(x) # Output is 4D
        pooled_features = self.global_pool(image_feature_map) # Pool to (B, C, 1, 1)
        image_features = torch.flatten(pooled_features, 1) # Flatten to (B, C)

        # 3. Concatenate features (Both 2D now)
        combined_features = torch.cat((image_features, region_features), dim=1)

        # 4. Pass through the regression head
        output = self.regression_head(combined_features)
        return output


In [None]:
import torch
import pandas as pd
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
import joblib
#from your_model_definition_file import CombinedGeoModel  # Adjust import if needed

# === CONFIGURATION ===
INPUT_CSV_PATH = "/kaggle/input/latlong-dataset/train_combine.csv"  # Only 'filename' column
IMG_DIR_PATH = "/kaggle/input/latlong-dataset/images_train_combine"
OUTPUT_CSV_PATH = "predicted_latlon_train.csv"
TRAINED_MODEL_PATH = "/kaggle/working/combined_model_checkpoints/best_combined_model_finetuned.pth"
SCALER_LAT_PATH = "/kaggle/working/scaler_lat.joblib"
SCALER_LON_PATH = "/kaggle/working/scaler_lon.joblib"
NUM_REGION_CLASSES = 15
EFFNET_NAME = 'efficientnet_b0'
HEAD_HIDDEN_DIMS = [512, 256, 128]
USE_SOFTMAX_FROM_FROZEN = True
IMG_SIZE = 256
BATCH_SIZE = 64
NUM_WORKERS = 2
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Dataset (No Lat/Lon) ===
class FilenameOnlyDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): return len(self.data_frame)

    def __getitem__(self, idx):
        img_filename = self.data_frame.iloc[idx]['filename']
        img_path = os.path.join(self.img_dir, img_filename)
        image = Image.open(img_path).convert('RGB')
        if self.transform: image_tensor = self.transform(image)
        else: image_tensor = transforms.ToTensor()(image)
        return image_tensor, img_filename

def collate_fn(batch):
    images, filenames = zip(*batch)
    return torch.stack(images), filenames

# === Transformations ===
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# === Load Scalers ===
scaler_lat = joblib.load(SCALER_LAT_PATH)
scaler_lon = joblib.load(SCALER_LON_PATH)

# === Load Model ===
model = CombinedGeoModel(
    num_region_classes=NUM_REGION_CLASSES,
    head_hidden_dims=HEAD_HIDDEN_DIMS,
    use_softmax_for_region=USE_SOFTMAX_FROM_FROZEN,
    effnet_name=EFFNET_NAME
)
checkpoint = torch.load(TRAINED_MODEL_PATH, map_location=DEVICE)
model.load_state_dict(checkpoint['model_state_dict'], strict=False)
model.to(DEVICE); model.eval()

# === Inference ===
dataset = FilenameOnlyDataset(INPUT_CSV_PATH, IMG_DIR_PATH, transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, collate_fn=collate_fn)

results = []
with torch.no_grad():
    for images, filenames in dataloader:
        images = images.to(DEVICE)
        preds = model(images)  # (N, 2)
        pred_np = preds.cpu().numpy()
        lat_pred = scaler_lat.inverse_transform(pred_np[:, 0].reshape(-1, 1)).flatten()
        lon_pred = scaler_lon.inverse_transform(pred_np[:, 1].reshape(-1, 1)).flatten()
        results.extend(zip(filenames, lat_pred, lon_pred))

# === Save to CSV ===
df_out = pd.DataFrame(results, columns=['filename', 'predicted_latitude', 'predicted_longitude'])
df_out.to_csv(OUTPUT_CSV_PATH, index=False)
print(f"âœ… Predictions saved to {OUTPUT_CSV_PATH}")