In [1]:
import os
import torch
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
import timm
from pathlib import Path
from torchvision import transforms
from PIL import Image
import zipfile

In [2]:
if "KAGGLE_CONTAINER_NAME" in os.environ:
    import kaggle_timm_pretrained
    kaggle_timm_pretrained.patch()

In [None]:
ROOT_DIR =  Path(os.environ.get("ROOT_DIR", "../input/dogs-vs-cats"))
with zipfile.ZipFile(str(ROOT_DIR / "train.zip"),"r") as z:
    z.extractall(".")

with zipfile.ZipFile(str(ROOT_DIR / "test1.zip"),"r") as z:
    z.extractall(".")
    
TRAIN_DATA_DIR = Path("/kaggle/working/train")
TEST_DATA_DIR = Path("/kaggle/working/test1")

In [3]:
class DogsVsCatsDataset(Dataset):
    def __init__(self,  root_dir, transform=None, train=True):
        self._transform = transform
        self._train = train
        self._img_paths =list(root_dir.glob("*.jpg"))
        if not self._train:
            self._img_paths = sorted(self._img_paths, key=lambda p:int(int(p.stem)))
        
    def __len__(self):
        return len(self._img_paths)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_path = self._img_paths[idx]
        img = Image.open(img_path)
        if self._transform:
            img = self._transform(img)
            
        if self._train:
            label = int(img_path.name.startswith("dog"))
            return img, label
        return img

In [None]:
class Network(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.net = timm.create_model("efficientnet_b2", pretrained=True, num_classes=2)
        
    def forward(self, x):
        return self.net(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        loss = F.cross_entropy(self(x), y)
        return {'loss': loss}
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)
    
    def setup(self, stage):
        train_dataset = DogsVsCatsDataset(
            TRAIN_DATA_DIR,
            transform=transforms.Compose([
                transforms.Resize((224, 224)), 
                transforms.ToTensor()
            ])
        )
        train_size = int(len(train_dataset) * 0.8)
        val_size = int(len(train_dataset) - train_size)
        self._train_dataset, self._val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
        self._test_dataset =DogsVsCatsDataset(
            TEST_DATA_DIR,
            transform=transforms.Compose([
                transforms.Resize((224, 224)), 
                transforms.ToTensor()
            ]),
            train=False
        )     
    
    def train_dataloader(self):
        return DataLoader(self._train_dataset, batch_size=32, num_workers=4, shuffle=True)

    def validation_step(self, batch, batch_idx):
        x ,y = batch
        loss = F.cross_entropy(self(x), y)
        return {'val_loss': loss}
    
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        return {"val_loss": avg_loss}
    
    def val_dataloader(self):
        return DataLoader(self._val_dataset, batch_size=4, num_workers=4)
    
    def test_step(self, batch, batch_idx):
        x = batch
        label = torch.argmax(self(x), dim=1)
        return {"label": (batch_idx, label)}

    def test_epoch_end(self, outputs):
        return dict([x["label"] for x in outputs])
    
    def test_dataloader(self):
        return DataLoader(self._test_dataset, batch_size=1)

In [None]:
model = Network()
trainer = pl.Trainer(gpus=1, max_epochs=10)
trainer.fit(model)

In [None]:
result = trainer.test()

In [None]:
submission_csv = "\n".join([f"{id},{label}" for id, label in result.items()])
Path("./submission.csv").write_text(submission_csv)