# Demo: Dogs vs Cats

![Dogs vs Cats](https://kaggle2.blob.core.windows.net/competitions/kaggle/3362/media/woof_meow.jpg)

Do-for-fun kind of competition: https://www.kaggle.com/c/dogs-vs-cats

## Data preparation steps


### Config kaggle client

https://github.com/floydwch/kaggle-cli

```shell
pip install kaggle-cli
kg -u '<username>' -p '<password>' -c 'dogs-vs-cats'
```

### Get data

```shell
mkdir -p data/
cd data/
kg download -f train.zip
kg download -f test1.zip
unzip -q train.zip
unzip -q test1.zip

# make sure we've got 12500 cats and dogs
find . -type f -name 'cat*' | wc -l
find . -type f -name 'dog*' | wc -l
```

### Prepare files

Rename create some useful dirs:

```shell
mv test1/ test
mv train/ catdog
mkdir -p train/dog
mkdir -p train/cat
mkdir -p val/dog
mkdir -p val/cat
mkdir -p sample/train/cat
mkdir -p sample/train/dog
mkdir -p sample/val/cat
mkdir -p sample/val/dog
```

Cats:

```shell
find . -name "cat*" -type f | shuf -n11250 | xargs -I file mv file train/cat/
mv catdog/cat* val/cat/
find . -name "cat*" -type f | shuf -n10 | xargs -I file cp file sample/train/cat/
find . -name "cat*" -type f | shuf -n5 | xargs -I file cp file sample/val/cat/
```

Dogs:

```shell
find . -name "dog*" -type f | shuf -n11250 | xargs -I file mv file train/dog/
mv catdog/dog* val/dog/
find . -name "dog*" -type f | shuf -n10 | xargs -I file cp file sample/train/dog/
find . -name "dog*" -type f | shuf -n5 | xargs -I file cp file sample/val/dog/
```

Cleanup:

```shell
rm -rf catdog/
rm test1.zip
rm train.zip
cd ../
```

Verify data:

```shell
ls data/train/cat/*.jpg | wc -l # 11250
ls data/train/dog/*.jpg | wc -l # 11250
ls data/val/cat/*.jpg | wc -l # 1250
ls data/val/dog/*.jpg | wc -l # 1250
```

In [51]:
import os
import torch.utils.data as data
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import time

In [34]:
data_dir = '../data/sample'
batch_size = 64
traindir = os.path.join(data_dir, 'train')
valdir = os.path.join(data_dir, 'val')

In [35]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

In [36]:
train_loader = data.DataLoader(
        datasets.ImageFolder(traindir,
                             transforms.Compose([
                                 transforms.RandomSizedCrop(224),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 normalize
                             ])),
        batch_size=batch_size,
        shuffle=True,
        num_workers=4)

val_loader = data.DataLoader(
        datasets.ImageFolder(valdir,
                             transforms.Compose([
                                 transforms.Scale(256),
                                 transforms.CenterCrop(224),
                                 transforms.ToTensor(),
                                 normalize,
                             ])),
        batch_size=batch_size,
        shuffle=True,
        num_workers=4)

In [37]:
# ds = train_loader.dataset
# print(len(ds.imgs))
# ds.loader(ds.imgs[0][0])

In [38]:
net = models.vgg16(pretrained=True)

In [39]:
net.classifier = nn.Sequential(
    nn.Linear(25088, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 2)
)

In [40]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.classifier.parameters(), 0.001, weight_decay=0.0001)

In [49]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    model.train()
    end = time.time()


In [52]:
for epoch in range(0, 3):
    print(epoch)
    train(train_loader, net, criterion, optimizer, epoch)
    for i, (images, target) in enumerate(train_loader):

0
1
2


In [41]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count