# Iceberg vs Ship Classifier

This deep learning model uses satellite radar data to distinguish between ships and icebergs in the ocean. Built with PyTorch, it processes dual-band radar images (HH and HV polarization) to make predictions that help ensure safe maritime navigation.

Dataset: https://www.kaggle.com/competitions/statoil-iceberg-classifier-challenge/data

Hugging Face: https://huggingface.co/spaces/alperugurcan/iceberg-classifier

In [9]:
import torch, torch.nn as nn, pandas as pd, numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import py7zr

class IcebergClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(2, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(64 * 9 * 9, 64), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(64, 1), nn.Sigmoid()
        )

    def forward(self, x):
        return self.fc(self.conv(x).view(x.size(0), -1))

class IcebergDataset(Dataset):
    def __init__(self, df): self.df = df
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = torch.FloatTensor(np.stack([np.array(row['band_1']).reshape(75, 75), 
                                          np.array(row['band_2']).reshape(75, 75)]))
        return (image, torch.FloatTensor([row['is_iceberg']])) if 'is_iceberg' in self.df.columns else image

def train_model():
    [z.extractall() for z in [py7zr.SevenZipFile(f'/kaggle/input/statoil-iceberg-classifier-challenge/{f}.json.7z', 'r') 
                             for f in ['train', 'test']]]
    
    train_df, test_df = [pd.read_json(f'/kaggle/working/data/processed/{f}.json') for f in ['train', 'test']]
    train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
    
    loaders = [DataLoader(IcebergDataset(df), batch_size=32, shuffle=(i==0)) 
               for i, df in enumerate([train_df, val_df, test_df])]
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = IcebergClassifier().to(device)
    optimizer = torch.optim.Adam(model.parameters())
    criterion = nn.BCELoss()
    
    best_val_loss = float('inf')
    for epoch in range(10):
        model.train()
        for i, (images, labels) in enumerate(loaders[0]):
            images, labels = images.to(device), labels.to(device)
            loss = criterion(model(images), labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_loss = sum(criterion(model(images.to(device)), labels.to(device)).item() 
                          for images, labels in loaders[1]) / len(loaders[1])
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(), 'best_iceberg_model.pth')
    
    model.eval()
    with torch.no_grad():
        predictions = [model(images.to(device)).cpu().numpy().flatten() 
                      for images in loaders[2]]
    
    pd.DataFrame({'id': test_df['id'], 
                 'is_iceberg': np.concatenate(predictions)}).to_csv('submission.csv', index=False)

if __name__ == "__main__": train_model()