In [1]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

In [2]:
# Change data_path to folder containg data
DATA_PATH = "./cats_and_dogs_filtered" 
## Loading Data
def load_data(data_path=DATA_PATH):
    
    '''
    Returns the data loader for 
    train and validation dataset. Sets batchsize to 32.
    
    ARgs: 
    data_path = folder containing data
    '''

    import torchvision
    import torchvision.datasets as datasets
    import torchvision.transforms as transforms

    batchsize = 32
    train_dataset = torchvision.datasets.ImageFolder(root=os.path.join(DATA_PATH, 'train'),
                                                     transform=transforms.Compose([transforms.RandomResizedCrop(224),
                                                                                   transforms.ToTensor()]))
    val_dataset = torchvision.datasets.ImageFolder(root=os.path.join(DATA_PATH, 'validation'),
                                                   transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                                                                   transforms.ToTensor()]))
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batchsize, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batchsize, shuffle=False)
    
    return train_loader, val_loader

In [3]:
## Defining the model
# defining a cnn model
def cnn_model():
    from torchvision import models
    
    model = models.vgg16(pretrained=False)
#     model = models.resnet18(pretrained=False)
    
    return model
model = cnn_model()

# specifying loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
## Training the model
# You can experiment different numbers for n_epochs, but even 1 epoch should be good enough.
n_epochs = 5

In [4]:
def train_model(model, train_dataloader, n_epoch=n_epochs, optimizer=optimizer, criterion=criterion):
    import torch.optim as optim
    """
    :param model: A CNN model
    :param train_dataloader: the DataLoader of the training data
    :param n_epoch: number of epochs to train
    :return:
        model: trained model
    """
    model.train() # prep model for training
    train_loss = 0
    
    for epoch in range(n_epoch):
        curr_epoch_loss = []
        for data, target in train_dataloader:
            optimizer.zero_grad()
            y_hat = model(data)
            loss = criterion(y_hat, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
            curr_epoch_loss.append(loss.cpu().data.numpy())
        print(f"Epoch {epoch}: curr_epoch_loss={np.mean(curr_epoch_loss)}")
    return model
# get train and val data loader
train_loader, val_loader = load_data()

seed = 24
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

model = train_model(model, train_loader)

Epoch 0: curr_epoch_loss=1.1726555824279785
Epoch 1: curr_epoch_loss=0.7173853516578674
Epoch 2: curr_epoch_loss=0.7126936912536621
Epoch 3: curr_epoch_loss=0.7079043388366699
Epoch 4: curr_epoch_loss=0.7068163156509399


In [5]:
## Testing the model
def eval_model(model, dataloader):
    """
    :return:
        Y_pred: prediction of model on the dataloder.
            Should be an 2D numpy float array where the second dimension has length 2.
        Y_test: truth labels. Should be an numpy array of ints
    """
    model.eval()
    Y_pred = []
    Y_test = []
    for data, target in dataloader:
        # your code here
        y_hat = model(data)
        _, pred = torch.max(y_hat, dim=1)
        Y_pred.append(pred.detach().numpy().reshape(-1,1))
        Y_test.append(target.detach().numpy().reshape(-1,1))
        
    Y_pred = np.concatenate(Y_pred, axis=0)
    Y_test = np.concatenate(Y_test, axis=0)

    return Y_pred, Y_test
from sklearn.metrics import accuracy_score

y_pred, y_true = eval_model(model, val_loader)
acc = accuracy_score(y_true, y_pred)
print(("Validation Accuracy: " + str(acc)))

Validation Accuracy: 0.5


In [6]:
# # local_zip = '/tmp/cats_and_dogs_filtered.zip'
# # zip_ref = zipfile.ZipFile(local_zip, 'r')
# # zip_ref.extractall('/tmp')
# # zip_ref.close()

# base_dir = '/tmp/cats_and_dogs_filtered'
# train_dir = '/cats_and_dogs_filtered/train'
# validation_dir = '/cats_and_dogs_filtered/validation'

# # Directory with our training cat pictures
# train_cats_dir = '/cats_and_dogs_filtered/train/cats'

# # Directory with our training dog pictures
# train_dogs_dir = '/cats_and_dogs_filtered/train/dogs'

# # Directory with our validation cat pictures
# validation_cats_dir = '/cats_and_dogs_filtered/validation/cats'

# # Directory with our validation dog pictures
# validation_dogs_dir = '/cats_and_dogs_filtered/validation/dogs'

In [7]:
# ## CNN:

# # Flow training images in batches of 20 using train_datagen generator
# train_generator = train_datagen.flow_from_directory(train_dir, batch_size = 20, class_mode = 'binary', target_size = (224, 224))

# # Flow validation images in batches of 20 using test_datagen generator
# validation_generator = test_datagen.flow_from_directory( validation_dir,  batch_size = 20, class_mode = 'binary', target_size = (224, 224))

In [8]:
# from tensorflow.keras.applications.vgg16 import VGG16
# base_model = VGG16(input_shape = (200, 200, 3), include_top = False, weights = 'imagenet')

# for layer in base_model.layers:
#     layer.trainable = False

In [9]:
# # Flatten the output layer to 1 dimension
# x = layers.Flatten()(base_model.output)

# # Add a fully connected layer with 512 hidden units and ReLU activation
# x = layers.Dense(512, activation='relu')(x)

# # Add a dropout rate of 0.5
# x = layers.Dropout(0.5)(x)

# # Add a final sigmoid layer with 1 node for classification output
# x = layers.Dense(1, activation='sigmoid')(x)

# model = tf.keras.models.Model(base_model.input, x)

# model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.0001), loss = 'binary_crossentropy',metrics = ['acc'])

# vgghist = model.fit(train_generator, validation_data = validation_generator, steps_per_epoch = 100, epochs = 10)