In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
import torchvision.models as models
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
def prep_model(num_classes=2):
    model_resnet50 = torch.hub.load('pytorch/vision', 'resnet50', pretrained=True)
    
    for name, param in model_resnet50.named_parameters():
        if "bn" not in name:
            param.requires_grad = False
        
    model_resnet50.fc = nn.Sequential(nn.Linear(model_resnet50.fc.in_features,512),
                                      nn.ReLU(),
                                      nn.Dropout(),
                                      nn.Linear(512, num_classes))
    
    return model_resnet50

In [3]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=5, device="cpu"):
    for epoch in range(epochs):
        print("epoch", epoch)
        training_loss = 0.0
        valid_loss = 0.0
        model.train()
        i = 0
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)
            print(i, "training_loss", training_loss)
            i +=1
        training_loss /= len(train_loader.dataset)
        
        model.eval()
        num_correct = 0 
        num_examples = 0
        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output,targets) 
            valid_loss += loss.data.item() * inputs.size(0)
                        
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        valid_loss /= len(val_loader.dataset)

        print('Epoch: {}, Training Loss: {:.4f}, Validation Loss: {:.4f}, accuracy = {:.4f}'.format(epoch, training_loss,
        valid_loss, num_correct / num_examples))

In [22]:
batch_size=32
img_dimensions = 224

# Normalize to the ImageNet mean and standard deviation
# Could calculate it for the cats/dogs data set, but the ImageNet
# values give acceptable results here.
img_transforms = transforms.Compose([
    transforms.Resize((img_dimensions, img_dimensions)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225] )
    ])

img_test_transforms = transforms.Compose([
    transforms.Resize((img_dimensions,img_dimensions)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225] )
    ])

def check_image(path):
    try:
        im = Image.open(path)
        return True
    except:
        return False

# data_path = "/v3io/bigdata/dogs_vs_cats/data/train"
data_path = "/v3io/bigdata/dogs_vs_cats_sample"
data = torchvision.datasets.ImageFolder(root=data_path,transform=img_transforms)
# splits = [16000, 4500, 4500]
# splits = [480, 240, 240, 24040]
splits = [600, 200, 200]

train_data, test_data, validation_data  = torch.utils.data.dataset.random_split(data, splits)

num_workers = 6
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
validation_data_loader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)


if torch.cuda.is_available():
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

In [5]:
print(f'Num training images: {len(train_data_loader.dataset)}')
print(f'Num validation images: {len(validation_data_loader.dataset)}')
print(f'Num test images: {len(test_data_loader.dataset)}')

Num training images: 480
Num validation images: 240
Num test images: 240


In [109]:
def test_model(model):
    test_loss = 0.0
    model.eval()
    num_correct = 0 
    num_examples = 0
    with torch.no_grad():
#         for data in test_data_loader:
#             images, labels = data[0].to(device), data[1].to(device)
#             outputs = model(images)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
            
        for batch in test_data_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            
            _, predicted = torch.max(output.data, 1)
            print(targets, predicted)
            loss = nn.CrossEntropyLoss()(output,targets) 
            test_loss += loss.data.item() * inputs.size(0)
                        
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        test_loss /= len(test_data_loader.dataset)        
            
        print('Test Loss: {:.4f}, Test Accuracy = {:.4f}'.format(test_loss, num_correct / num_examples))
#     print('correct: {:d}  total: {:d}'.format(correct, total))
#     print('accuracy = {:f}'.format(correct / total))

In [90]:
model_resnet50 = prep_model()

NameError: name 'prep_model' is not defined

In [11]:
model_resnet50.to(device)
optimizer = optim.Adam(model_resnet50.parameters(), lr=0.001)
train(model_resnet50, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, validation_data_loader, epochs=2, device=device)

epoch 0
0 training_loss 21.7574462890625
1 training_loss 57.207489013671875
2 training_loss 96.92254638671875
3 training_loss 119.65019416809082
4 training_loss 131.23123264312744
5 training_loss 159.90755367279053
6 training_loss 174.3070936203003
7 training_loss 186.78234004974365
8 training_loss 193.12980270385742
9 training_loss 205.4397315979004
10 training_loss 219.40852737426758
11 training_loss 225.96051740646362
12 training_loss 230.26807641983032
13 training_loss 235.98478317260742
14 training_loss 248.16403484344482
Epoch: 0, Training Loss: 0.5170, Validation Loss: 0.1040, accuracy = 0.9708
epoch 1
0 training_loss 3.416419744491577
1 training_loss 4.694213271141052
2 training_loss 6.6063385009765625
3 training_loss 9.436695337295532
4 training_loss 11.923755645751953
5 training_loss 12.503792345523834
6 training_loss 14.764908850193024
7 training_loss 15.50618052482605
8 training_loss 16.729638814926147
9 training_loss 17.086306750774384
10 training_loss 17.46514990925789
11

In [110]:
test_model(model)

tensor([0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 1, 0, 1, 1, 0, 0, 0]) tensor([0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0,
        1, 1, 0, 1, 1, 0, 0, 0])
tensor([1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
        1, 0, 1, 1, 1, 1, 0, 0]) tensor([1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
        1, 0, 1, 1, 1, 1, 0, 0])
tensor([0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 0, 1, 0, 1, 1, 0]) tensor([0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 0, 1, 0, 1, 1, 0])
tensor([0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 0, 0]) tensor([0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 0, 0])
tensor([1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
        0, 0, 0,

In [14]:
def make_prediction(model, filename):
    # 1 = dog, 0 = cat
    labels = ["cat", "dog"]
    img = Image.open(filename)
    img = img_test_transforms(img)
    img = img.unsqueeze(0)
    prediction = model(img.to(device))
    prediction = prediction.argmax()
    print(labels[prediction])

make_prediction(model_resnet50, "/v3io/bigdata/dogs_vs_cats/data/train/dog/dog.10440.jpg")
make_prediction(model_resnet50, "/v3io/bigdata/dogs_vs_cats/data/train/cat/cat.12262.jpg")

dog
cat


In [78]:
from cloudpickle import loads

In [79]:
with open("/User/igz_repos/igz-dogs-vs-cats-pipeline/pipeline/18c05588-d087-4118-b469-dd8dd26cdccd/model.pkl", "rb") as f:
    model = loads(f.read())

In [None]:
torch.load()

In [16]:
torch.save(model_resnet50.state_dict(), "./dogs_vs_cats_resnet50.pth")