In [None]:
## CATS AND DOGS EXAMPLE:
## https://medium.com/predict/using-pytorch-for-kaggles-famous-dogs-vs-cats-challenge-part-1-preprocessing-and-training-407017e1a10c

# PART 1: ORGANIZING DATA
import os
train_dir = "./data/train"
train_dogs_dir = f'{train_dir}/dogs'
train_cats_dir = f'{train_dir}/cats'
val_dir = "./data/val"
val_dogs_dir = f'{val_dir}/dogs'
val_cats_dir = f'{val_dir}/cats'
print("Printing data dir")
print(os.listdir("data")) # Shows train, val folders are under data
print("Printing train dir")
!ls {train_dir} | head -n 5 # Shows image files are in train folder
print("Printing train dog dir")
!ls {train_dogs_dir} | head -n 5 # Check the (empty) folder exist
print("Printing train cat dir")
!ls {train_cats_dir} | head -n 5 # Check the (empty) folder exist
print("Printing val dir")
!ls {val_dir} | head -n 5  # Shows subfolder dogs and cats exist
print("Printing val dog dir")
!ls {val_dogs_dir} | head -n 5 # Check the (empty) folder exist
print("Printing val cat dir")
!ls {val_cats_dir} | head -n 5 # Check the (empty) folder exist

In [None]:
import shutil
import re
files = os.listdir(train_dir)
# Move all train cat images to cats folder, dog images to dogs folder
for f in files:
    catSearchObj = re.search("cat", f)
    dogSearchObj = re.search("dog", f)
    if catSearchObj:
        shutil.move(f'{train_dir}/{f}', train_cats_dir)
    elif dogSearchObj:
        shutil.move(f'{train_dir}/{f}', train_dogs_dir)


print("Printing train dir") # shows cats, dogs subfolders only
!ls {train_dir} | head -n 5
print("Printing train dog dir") # there is now dog images in dogs folder
!ls {train_dogs_dir} | head -n 5
print("Printing train cat dir") # there is now cat images in cats folder
!ls {train_cats_dir} | head -n 5

In [None]:
files = os.listdir(train_dogs_dir)
for f in files:
    validationDogsSearchObj = re.search("4", f)
    if validationDogsSearchObj:
        shutil.move(f'{train_dogs_dir}/{f}', val_dogs_dir)
print("Printing val dog dir")
!ls {val_dogs_dir} | head -n 5

files = os.listdir(train_cats_dir)
for f in files:
    validationCatsSearchObj = re.search("4", f)
    if validationCatsSearchObj:
        shutil.move(f'{train_cats_dir}/{f}', val_cats_dir)
print("Printing val cat dir")
!ls {val_cats_dir} | head -n 5

In [19]:
# PART 2: TRAIN MODEL
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import math
print(torch.__version__)
plt.ion()   # interactive mode

1.9.0+cu102


In [20]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(5),
        transforms.RandomHorizontalFlip(),
        transforms.RandomResizedCrop(224, scale=(0.96, 1.0), ratio=(0.95, 1.05)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize([224,224]),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [30]:
data_dir = 'data'
CHECK_POINT_PATH = 'checkpoint.tar'
SUBMISSION_FILE = 'submission.csv'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(class_names) # => ['cats', 'dogs']
print(f'Train image size: {dataset_sizes["train"]}')
print(f'Validation image size: {dataset_sizes["val"]}')

FileNotFoundError: ignored

In [None]:
# ## Transfer Learning - thumb classification
# ## https://github.com/davidRetana/thumbs_jetbot/blob/master/train_model_thumbs.ipynb


# import torch
# import torch.optim as optim
# import torch.nn.functional as F
# import torchvision
# import torchvision.datasets as datasets
# import torchvision.models as models
# import torchvision.transforms as transforms
# # Create dataset instance
# # Now we use the ImageFolder dataset class available with the torchvision.datasets package. We attach transforms from the torchvision.transforms package to prepare the data for training.


# dataset = datasets.ImageFolder(
#     'dataset',
#     transforms.Compose([
#         transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),
#         transforms.Resize((224, 224)),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ])
# )
# # Split dataset into train and test sets
# # Next, we split the dataset into training and test sets. The test set will be used to verify the accuracy of the model we train.

# train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - 50, 50])
# # Create data loaders to load data in batches
# # We'll create two DataLoader instances, which provide utilities for shuffling data, producing batches of images, and loading the samples in parallel with multiple workers.

# train_loader = torch.utils.data.DataLoader(
#     train_dataset,
#     batch_size=16,
#     shuffle=True,
#     num_workers=4
# )

# test_loader = torch.utils.data.DataLoader(
#     test_dataset,
#     batch_size=16,
#     shuffle=True,
#     num_workers=4
# )
# # Define the neural network
# # Now, we define the neural network we'll be training. The torchvision package provides a collection of pre-trained models that we can use.

# # In a process called transfer learning, we can repurpose a pre-trained model (trained on millions of images) for a new task that has possibly much less data available.

# # Important features that were learned in the original training of the pre-trained model are re-usable for the new task. We'll use the alexnet model.

# import torchvision.models as models
# model = models.alexnet(pretrained=True)

# model
# # The alexnet model was originally trained for a dataset that had 1000 class labels, but our dataset only has two class labels! We'll replace the final layer with a new, untrained layer that has only two outputs.


# model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 2)
# # Finally, we transfer our model for execution on the GPU, if available


# device_string = "cuda" if torch.cuda.is_available() else "cpu"
# device = torch.device(device_string)
# model = model.to(device)
# # Train the neural network
# # Using the code below we will train the neural network for 30 epochs, saving the best performing model after each epoch.

# # An epoch is a full run through our data.

# NUM_EPOCHS = 30
# BEST_MODEL_PATH = 'best_model.pth'
# best_accuracy = 0.0

# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# for epoch in range(NUM_EPOCHS):
    
#     for images, labels in iter(train_loader):
#         images = images.to(device)
#         labels = labels.to(device)
#         optimizer.zero_grad()
#         outputs = model(images)
#         loss = F.cross_entropy(outputs, labels)
#         loss.backward()
#         optimizer.step()
    
#     test_error_count = 0.0
#     for images, labels in iter(test_loader):
#         images = images.to(device)
#         labels = labels.to(device)
#         outputs = model(images)
#         test_error_count += float(torch.sum(torch.abs(labels - outputs.argmax(1))))
    
#     test_accuracy = 1.0 - float(test_error_count) / float(len(test_dataset))
#     print('%d: %f' % (epoch, test_accuracy))
#     if test_accuracy > best_accuracy:
#         torch.save(model.state_dict(), BEST_MODEL_PATH)
#         best_accuracy = test_accuracy

# # Once that is finished, you should see a file best_model.pth in the Jupyter Lab file browser.

AttributeError: ignored