In [2]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split

from skimage.io import imread
from sklearn.metrics import accuracy_score

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [4]:
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [23]:
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
BATCH_SIZE = 32

RANDOM_SEED = 42
EPOCHS = 1
# PATH = './resnet18.pth'

In [6]:
train_triplets = np.loadtxt('./train_triplets.txt', dtype='str')
test_triplets = np.loadtxt('./test_triplets.txt', dtype='str')
len(train_triplets)

59515

In [7]:
# train_set, valid_set = train_test_split(train_triplets, test_size=0.1, random_state=RANDOM_SEED)
# len(train_set)

In [8]:
df_train = pd.DataFrame(train_triplets)
df_test = pd.DataFrame(test_triplets)

df_train

Unnamed: 0,0,1,2
0,02461,03450,02678
1,02299,02499,04987
2,04663,01056,03029
3,04532,01186,01297
4,03454,03809,02204
...,...,...,...
59510,00466,02952,02530
59511,02646,03580,02359
59512,03255,04844,04334
59513,02136,04619,00161


In [9]:
df_train.columns = ['A', 'B', 'C']
df_train.insert(df_train.shape[1], 'y', 1)
# df_train_append = df_train.copy()
# df_train_append['A'] = df_train['B'].copy()
# df_train_append['B'] = df_train['A'].copy()
# df_train = df_train.append(df_train_append).reset_index(drop=True)
# df_train = df_train.sample(frac=1, random_state=RANDOM_SEED).reset_index(drop=True)
df_train

Unnamed: 0,A,B,C,y
0,02461,03450,02678,1
1,02299,02499,04987,1
2,04663,01056,03029,1
3,04532,01186,01297,1
4,03454,03809,02204,1
...,...,...,...,...
59510,00466,02952,02530,1
59511,02646,03580,02359,1
59512,03255,04844,04334,1
59513,02136,04619,00161,1


In [10]:
# train_set, valid_set = train_test_split(df_train.to_numpy(), test_size=0.1, random_state=RANDOM_SEED)
train_set, valid_set = train_test_split(df_train.to_numpy(), test_size=0.1, random_state=RANDOM_SEED)
valid_set

array([['01623', '01263', '04221', 1],
       ['02841', '04262', '03258', 1],
       ['00002', '03453', '01963', 1],
       ...,
       ['03934', '02163', '02110', 1],
       ['01042', '04892', '04466', 1],
       ['02976', '02403', '04435', 1]], dtype=object)

In [11]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
    transforms.RandomHorizontalFlip(), 
#     transforms.Normalize((0.608, 0.516, 0.412),(0.264, 0.275, 0.296))
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)), 
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
#     transforms.Normalize((0.608, 0.516, 0.412),(0.264, 0.275, 0.296))
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)),
])

In [12]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=None, trainning=False):

        self.imgs = data

        self.img_A = self.imgs[:, 0]
        self.img_B = self.imgs[:, 1]
        self.img_C = self.imgs[:, 2]
        # self.label = data[:, 3]
        self.transform = transform
        self.trainning = trainning
        
        if self.trainning:
            self.label = self.imgs[:, 3]
    
    def __getitem__(self, index):
        img_A = imread( './food/' + self.img_A[index] + '.jpg')
        img_B = imread( './food/' + self.img_B[index] + '.jpg')
        img_C = imread( './food/' + self.img_C[index] + '.jpg')

        if self.transform is not None:
            img_A = self.transform(img_A)
            img_B = self.transform(img_B)
            img_C = self.transform(img_C)

        if self.trainning:
            label = self.label[index]
            return img_A, img_B, img_C, label
        else:
            return img_A, img_B, img_C

    def __len__(self):
        return self.imgs.shape[0]


train_data = MyDataset(data=train_set, transform=transform_train, trainning=True)
valid_data = MyDataset(data=valid_set, transform=transform_test, trainning=True)
test_data = MyDataset(data=test_triplets, transform=transform_test, trainning=False)
train_data


<__main__.MyDataset at 0x21cd3c2fd00>

In [13]:
from torch.utils.data import DataLoader 

train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=valid_data, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_data,batch_size=BATCH_SIZE)

In [14]:
import torch.nn as nn
import torchvision.models as models

class ConvNet(nn.Module):
    def __init__(self, net):
        super(ConvNet, self).__init__()
        self.net = net
    def forward(self, img_A, img_B, img_C):
        anchor = self.net(img_A)
        postive = self.net(img_B)
        negative = self.net(img_C)
        return anchor, postive, negative

class myLayer(nn.Module):
    def __init__(self, input):
        super(myLayer, self).__init__()
        input_size = input.fc.in_features
        self.bottleneck = nn.Sequential(
            nn.Conv2d(input_size, 512, kernel_size=(1,1), stride=(1,1), bias=False),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 2048, kernel_size=(1,1), stride=(1,1), bias=False),
            nn.BatchNorm2d(2048)
        )
        self.relu = nn.ReLU(inplace=True)
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))

        self.fc1 = torch.nn.Linear(2048, 512)
        self.fc2 = torch.nn.Linear(512, 1024)
        self.drop = nn.Dropout(p=0.5)
        self.activation_fn = nn.Tanh()

    def forward(self, x):
        x = self.bottleneck(x)
        x = self.relu(x)
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.fc1(x)
        x = self.activation_fn(x)
        x = self.drop(x)
        x = self.fc2(x)
        return x

    
# class myLayer(nn.Module):
#     def __init__(self, input):
#         super(myLayer, self).__init__()
#         input_size = input.fc.in_features
#         self.fc1 = nn.Linear(input_size, 512)
#         self.fc2 = nn.Linear(512, 512)
#         self.fc3 = nn.Linear(512, 1024)
#         self.drop = nn.Dropout(p=0.5)
#         self.activation_fn = nn.Tanh()
# #         self.activation_fn = nn.ReLU()
# #         self.activation_fn = nn.LeakyReLU()
        
#     def forward(self, x):
# #         x = self.fc1(x)
# #         x = self.activation_fn(x)
# #         x = self.drop(x)
# #         x = self.fc2(x)
#         x = self.fc1(x)
#         x = self.activation_fn(x)
#         x = self.drop(x)
#         x = self.fc2(x)
#         x = self.activation_fn(x)
#         x = self.drop(x)
#         x = self.fc3(x)
#         return x
    
# model = models.resnet101(pretrained=True)
model = models.resnet50(pretrained=True)
# print(model)

for param in model.parameters():
    param.requires_grad = False

# model.fc = myLayer(model)
# net = ConvNet(model).to(device)
model_new = torch.nn.Sequential(*list(model.children())[:-2])
model_new.add_module("myLayer",myLayer(model))
print(model_new)
net = ConvNet(model_new).to(device)


# model = models.resnet18(pretrained=True)
# # print(model)

# for param in model.parameters():
#     param.requires_grad = False

# # model.fc = myLayer(model)
# fc_features = model.fc.in_features
# model.fc = nn.Linear(fc_features, 1024)
# net = ConvNet(model).to(device)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [15]:
import torch.optim as optim

# criterion_train = nn.TripletMarginLoss(margin=5.0)
criterion_train = nn.TripletMarginLoss(margin=5.0)
criterion_valid = nn.TripletMarginLoss()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=0.001, momentum=0.9, weight_decay=1e-6)

In [16]:
# for A, B, C, label in train_loader:
#     print(A)
#     break
# for i, data in enumerate(train_loader, 0):
#     print(data[3].reshape(-1,1))
#     print(data[3].size())
#     break
# y_pred = [0, 2, 1, 3]
# y_true = [0, 1, 2, 3]
# accuracy_score(y_true, y_pred)

In [17]:
def valid(anchor, positive, negative, label):
    dist_pos = torch.norm(anchor - positive, p=2, dim=1)
    dist_neg = torch.norm(anchor - negative, p=2, dim=1)
    diff = (dist_neg - dist_pos).detach().cpu().numpy()
    predict = np.ceil(diff.clip(0,1))
    
    return accuracy_score(label, predict, normalize=False)

In [18]:
def train(model):
    valid_accuracy = 0.0
    for epoch in range(EPOCHS):
        running_loss = 0.0
        valid_loss = 0.0
        correct = 0
        total = 0
        accuracy = 0.0

        print('Training')
        # trainning
        for i, data in tqdm(enumerate(train_loader, 0)):
            img_A = data[0].to(device)
            img_B = data[1].to(device)
            img_C = data[2].to(device)
            
            optimizer.zero_grad()

            anchor, positive, negetive = model(img_A, img_B, img_C)

            loss = criterion_train(anchor, positive, negetive)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f'[{epoch + 1}] average loss per epoch: {running_loss / len(train_loader):.3f}')

        print('Validation')
        # validation
        model.eval()
        with torch.no_grad():
            for i, data in tqdm(enumerate(valid_loader, 0)):
                img_A = data[0].to(device)
                img_B = data[1].to(device)
                img_C = data[2].to(device)
                labels = data[3].reshape(-1, 1)

                anchor, positive, negetive = model(img_A, img_B, img_C)
                loss = criterion_valid(anchor, positive, negetive)
                valid_loss += loss
                correct += valid(anchor, positive, negetive, labels)
                total += labels.size(0)
                accuracy = correct / total
        print(f'Accuracy of the network on the {total} valid images: {100 * accuracy} %')

#         if accuracy >= valid_accuracy:
#             valid_accuracy = accuracy
#             torch.save(model.state_dict(), PATH)

        PATH = f'./resnet50_epoch{epoch+1}addconvlayers.pth'
        torch.save(model.state_dict(), PATH) 
        model.train()

    print('Finished Training')
    

In [18]:
train(net)

Training


1674it [14:04,  1.98it/s]


[1] average loss per epoch: 3.575
Validation


186it [01:27,  2.12it/s]


Accuracy of the network on the 5952 valid images: 71.28696236559139 %
Training


1674it [13:49,  2.02it/s]


[2] average loss per epoch: 3.202
Validation


186it [01:27,  2.12it/s]


Accuracy of the network on the 5952 valid images: 73.99193548387096 %
Training


1674it [14:25,  1.94it/s]


[3] average loss per epoch: 3.044
Validation


186it [01:29,  2.09it/s]


Accuracy of the network on the 5952 valid images: 75.08400537634408 %
Training


1674it [13:58,  2.00it/s]


[4] average loss per epoch: 2.898
Validation


186it [01:29,  2.08it/s]


Accuracy of the network on the 5952 valid images: 75.82325268817203 %
Training


1674it [13:49,  2.02it/s]


[5] average loss per epoch: 2.770
Validation


186it [01:27,  2.12it/s]


Accuracy of the network on the 5952 valid images: 76.4616935483871 %
Training


1674it [13:48,  2.02it/s]


[6] average loss per epoch: 2.654
Validation


186it [01:27,  2.12it/s]


Accuracy of the network on the 5952 valid images: 76.69690860215054 %
Training


1674it [13:55,  2.00it/s]


[7] average loss per epoch: 2.553
Validation


186it [01:28,  2.10it/s]


Accuracy of the network on the 5952 valid images: 77.15053763440861 %
Training


1674it [13:54,  2.01it/s]


[8] average loss per epoch: 2.467
Validation


186it [01:28,  2.11it/s]


Accuracy of the network on the 5952 valid images: 78.32661290322581 %
Training


1674it [13:53,  2.01it/s]


[9] average loss per epoch: 2.355
Validation


186it [01:28,  2.10it/s]


Accuracy of the network on the 5952 valid images: 78.36021505376344 %
Training


1674it [14:02,  1.99it/s]


[10] average loss per epoch: 2.290
Validation


186it [01:29,  2.08it/s]

Accuracy of the network on the 5952 valid images: 77.83938172043011 %
Finished Training





In [19]:
PATH = './resnet50_epoch1addconvlayers.pth'

# net.load_state_dict(state_dict, strict=False)
net.load_state_dict(torch.load(PATH))
net.to(device)

# state_dict = torch.load('./resnet50_epoch1addconvlayers.pth')

# from collections import OrderedDict
# new_state_dict = OrderedDict()

# for key, value in state_dict.items():
#     key = key[4:] # remove `att.`
#     new_state_dict[key] = value

# # load params
# net = ConvNet(model)
# net.load_state_dict(new_state_dict)
# net.to(device)


ConvNet(
  (net): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 256,

In [20]:
def predict(anchor, positive, negative):
    dist_pos = torch.norm(anchor - positive, p=2, dim=1)
    dist_neg = torch.norm(anchor - negative, p=2, dim=1)
    diff = (dist_neg - dist_pos).detach().to('cpu').numpy()
    predict = np.ceil(diff.clip(0,1))

    return(predict)

In [21]:
def test(model):
    predictions = []
    model.eval()
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_loader, 0)):
            img_A = data[0].to(device)
            img_B = data[1].to(device)
            img_C = data[2].to(device)

            anchor, positive, negetive = model(img_A, img_B, img_C)

            pred = predict(anchor, positive, negetive)
            predictions.append(pred)
    return predictions

In [22]:
output = test(net)
predictions = []
for i in range(len(output)):
    output[i].astype(np.int8)
    for j in range(len(output[i])):
        predictions.append(output[i][j])
np.savetxt('predictions_resnet50_epoch1addconvlayers.txt', predictions, fmt='%i')

1861it [16:07,  1.92it/s]
