In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data.sampler import SubsetRandomSampler
import os
import random
import numpy as np
import time
import matplotlib.pyplot as plt

In [2]:

def process_data(datadir, valid_size = 0.25, batch_size = 64):
    
    train_transforms = transforms.Compose([transforms.Resize(227),transforms.ToTensor(),])
    test_transforms = transforms.Compose([transforms.Resize(227),transforms.ToTensor(),])
    
    train_data = datasets.ImageFolder(datadir, transform=train_transforms)
    test_data = datasets.ImageFolder(datadir, transform=test_transforms)
    
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size*num_train))
    np.random.shuffle(indices)
    train_idx,test_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    
    trainloader = torch.utils.data.DataLoader(train_data, sampler = train_sampler, batch_size = batch_size)
    testloader = torch.utils.data.DataLoader(test_data, sampler = test_sampler, batch_size = batch_size)
    
    return trainloader,testloader

In [3]:
data_dir = "./data/mendeley"

train_loader,test_loader = process_data(data_dir)

In [4]:
class CNN_Model(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4),  # (b x 96 x 55 x 55)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),  # section 3.3
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 96 x 27 x 27)
            nn.Conv2d(96, 256, 5, padding=2),  # (b x 256 x 27 x 27)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 256 x 13 x 13)
            nn.Conv2d(256, 384, 3, padding=1),  # (b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, padding=1),  # (b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, padding=1),  # (b x 256 x 13 x 13)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 256 x 6 x 6)
        )
        # classifier is just a name for linear layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(in_features=(256 * 6 * 6), out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=2),
        )
        self.init_bias()  # initialize bias

    def init_bias(self):
        for layer in self.net:
            if isinstance(layer, nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0)
        # original paper = 1 for Conv2d layers 2nd, 4th, and 5th conv layers
        nn.init.constant_(self.net[4].bias, 1)
        nn.init.constant_(self.net[10].bias, 1)
        nn.init.constant_(self.net[12].bias, 1)

    def forward(self, x):
        """
        Pass the input through the net.
        Args:
            x (Tensor): input tensor
        Returns:
            output (Tensor): output tensor
        """
        x = self.net(x)
        x = x.view(-1, 256 * 6 * 6)  # reduce the dimensions for linear layer input
        return self.classifier(x)
        
#         self.features = nn.Sequential(
#             nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1),
#             nn.ReLU(inplace=False),
#             nn.Conv2d(64, 96, kernel_size=3, padding=1, stride=1),
#             nn.ReLU(inplace=False),
#             nn.MaxPool2d(kernel_size=3, stride=2),
#             nn.Conv2d(96, 96, kernel_size=3, padding=1, stride=1),
#             nn.BatchNorm2d(96),
#             nn.ReLU(inplace=False),
#             nn.Conv2d(96, 128, kernel_size=3, padding=1, stride=1),
#             nn.ReLU(inplace=False),
#             nn.MaxPool2d(kernel_size=3, stride=2),           
#             nn.Conv2d(128, 128, kernel_size=5, padding=2, stride=1),
#             nn.ReLU(inplace=False),
#             nn.Conv2d(128, 128, kernel_size=3, padding=1, stride=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(inplace=False),
#             nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=1),
#             nn.ReLU(inplace=False),
#             nn.Conv2d(256, 256, kernel_size=5, padding=2, stride=1),
#             nn.ReLU(inplace=False),
#             nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=1),
#             nn.ReLU(inplace=False),
#             nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1),
#             nn.ReLU(inplace=False),
#             nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1),
#             nn.BatchNorm2d(512),
#             nn.ReLU(inplace=False),
#             nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1),
#             nn.ReLU(inplace=False),
#             nn.MaxPool2d(kernel_size=2, stride=2),
#         )
        
#         self.classifier = nn.Sequential(
#             nn.Linear(25088, 4096),
#             nn.ReLU(inplace=False),
#             nn.Dropout(p=0.5, inplace=True),
#             nn.Linear(4096, 1024),
#             nn.ReLU(inplace=False),
#             nn.Dropout(p=0.5, inplace=True),
#             nn.Linear(1024, 2),
#         )
#         self.init_bias()
    
#     def init_bias(self):
#         for layer in self.features:
#             if isinstance(layer, nn.Conv2d):
#                 nn.init.normal_(layer.weight, mean=0, std=0.01)
#                 nn.init.constant_(layer.bias, 0)        
    
#     def forward(self, x):
#         x = self.features(x)
#         x = x.view(x.shape[0], -1)
#         x = self.classifier(x)
#         return x


In [5]:
torch.cuda.empty_cache()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_Model().to(device)

In [6]:
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

In [7]:
def accuracy(fp,y):
    preds = fp.max(1, keepdim=True)[1]
    correct = preds.eq(y.view_as(preds)).sum()
    acc = correct.float()/preds.shape[0]
    return acc

In [8]:
def train(model, device, data_itr, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    for x,y in data_itr:
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
        fp = model(x)
        loss = criterion(fp,y)
        acc = accuracy(fp,y)
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += float(loss.item())
        epoch_acc += float(acc.item())
        
    return epoch_loss / len(data_itr), epoch_acc / len(data_itr)

In [9]:
def evaluate(model, device, data_itr, criterion):
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    with torch.no_grad():
        for x,y in data_itr:
            x = x.to(device)
            y = y.to(device)
            
            fp = model(x)
            loss = criterion(fp,y)
            acc = accuracy(fp,y)
            
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    
    return epoch_loss / len(data_itr), epoch_acc / len(data_itr)

In [None]:
epochs = 1000
SAVE_DIR = 'models'
MODEL_SAVE_PATH = os.path.join(SAVE_DIR, 'cnn_crack_mendeleys.pt')

best_val_loss = float('inf')

if not os.path.isdir(f'{SAVE_DIR}'):
    os.makedirs(f'{SAVE_DIR}')
    
for epoch in range(epochs):
    train_loss,train_acc = train(model, device, train_loader, optimizer, criterion)
    val_loss,val_acc = evaluate(model, device, test_loader, criterion)
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        
    print(f'Epoch: {epoch+1:4} | Training loss: {train_loss:.3f} | Training Accuracy: {train_acc*100:5.2f}% | Valid_loss: {val_loss:.3f} | Valid_accuracy: {val_acc*100:5.2f}%')  