In [51]:
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F

from resnet_features import resnet18_features, resnet34_features, resnet50_features, resnet101_features, resnet152_features
from densenet_features import densenet121_features, densenet161_features, densenet169_features, densenet201_features
from vgg_features import vgg11_features, vgg11_bn_features, vgg13_features, vgg13_bn_features, vgg16_features, vgg16_bn_features,\
                         vgg19_features, vgg19_bn_features

from receptive_field import compute_proto_layer_rf_info_v2

from settings import img_size

from PIL import Image
import numpy as np
import numpy.random as npr
import pandas as pd
import os
import matplotlib.pyplot as plt
import torch.utils.data
# import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.optim as optim

import pickle as pkl
import skimage as sk
import skimage.io as skio
from preference_model import construct_PrefNet, paired_cross_entropy_loss, PrefNet

# book keeping namings and code
from settings import base_architecture, img_size, prototype_shape, num_classes, \
                     prototype_activation_function, add_on_layers_type, experiment_run

from preprocess import mean, std, preprocess_input_function
from tqdm import tqdm

In [52]:
base_architecture_to_features = {'resnet18': resnet18_features,
                                 'resnet34': resnet34_features,
                                 'resnet50': resnet50_features,
                                 'resnet101': resnet101_features,
                                 'resnet152': resnet152_features,
                                 'densenet121': densenet121_features,
                                 'densenet161': densenet161_features,
                                 'densenet169': densenet169_features,
                                 'densenet201': densenet201_features,
                                 'vgg11': vgg11_features,
                                 'vgg11_bn': vgg11_bn_features,
                                 'vgg13': vgg13_features,
                                 'vgg13_bn': vgg13_bn_features,
                                 'vgg16': vgg16_features,
                                 'vgg16_bn': vgg16_bn_features,
                                 'vgg19': vgg19_features,
                                 'vgg19_bn': vgg19_bn_features}


class PrefNet(nn.Module):

    def __init__(self, features, img_size, prototype_shape,
                 proto_layer_rf_info, num_classes, init_weights=True,
                 prototype_activation_function='log',
                 add_on_layers_type='bottleneck', 
                k = 3):

        super(PrefNet, self).__init__()
        self.img_size = img_size
        self.prototype_shape = prototype_shape
        self.num_prototypes = prototype_shape[0]
        self.num_classes = num_classes
        self.epsilon = 1e-4
        self.k = k
        
        # this has to be named features to allow the precise loading
        self.features = features

        features_name = str(self.features).upper()
        if features_name.startswith('VGG') or features_name.startswith('RES'):
            first_add_on_layer_in_channels = \
                [i for i in features.modules() if isinstance(i, nn.Conv2d)][-1].out_channels
        elif features_name.startswith('DENSE'):
            first_add_on_layer_in_channels = \
                [i for i in features.modules() if isinstance(i, nn.BatchNorm2d)][-1].num_features
        else:
            raise Exception('other base base_architecture NOT implemented')

        if add_on_layers_type == 'bottleneck':
            add_on_layers = []
            current_in_channels = first_add_on_layer_in_channels
            while (current_in_channels > self.prototype_shape[1]) or (len(add_on_layers) == 0):
                current_out_channels = max(self.prototype_shape[1], (current_in_channels // 2))
                add_on_layers.append(nn.Conv2d(in_channels=current_in_channels,
                                               out_channels=current_out_channels,
                                               kernel_size=1))
                add_on_layers.append(nn.ReLU())
                add_on_layers.append(nn.Conv2d(in_channels=current_out_channels,
                                               out_channels=current_out_channels,
                                               kernel_size=1))
                if current_out_channels > self.prototype_shape[1]:
                    add_on_layers.append(nn.ReLU())
                else:
                    assert(current_out_channels == self.prototype_shape[1])
                    add_on_layers.append(nn.Sigmoid())
                current_in_channels = current_in_channels // 2
            self.add_on_layers = nn.Sequential(*add_on_layers)
        else:
            self.add_on_layers = nn.Sequential(
                nn.Conv2d(in_channels=first_add_on_layer_in_channels, out_channels=self.prototype_shape[1], kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=self.prototype_shape[1], out_channels=self.prototype_shape[1], kernel_size=1),
                nn.Sigmoid()
                )
        
#         self.prototype_vectors = nn.Parameter(torch.rand(self.prototype_shape),
#                                               requires_grad=True)

#         # do not make this just a tensor,
#         # since it will not be moved automatically to gpu
#         self.ones = nn.Parameter(torch.ones(self.prototype_shape),
#                                  requires_grad=False)

#         self.last_layer = nn.Linear(self.num_prototypes, self.num_classes,
#                                     bias=False) # do not use bias


        self.img_conv = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3),
            nn.Sigmoid()
            )
        
        self.pattern_conv = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3),
            nn.Sigmoid()
            )
        
        self.fc1 = nn.Linear(6400, 512)
        self.fc2 = nn.Linear(512, 32)
        self.fc3 = nn.Linear(32, 1)

        if init_weights:
            self._initialize_weights()
            
            
    def conv_features(self, x):
        '''
        the feature input to prototype layer
        '''
        # Insert k and then img size
        x = self.features(x)
        x = self.add_on_layers(x)
        return x
    
    def forward(self, x, p):
        # (N, 512, 7, 7)
        x = self.conv_features(x)
        x = self.img_conv(x)
        #print("img_conv out shape: ", x.shape)
        
        p = self.conv_features(p)
        p = self.pattern_conv(p)
        #print("pattern_conv out shape: ", p.shape)
        
        out = torch.cat((x, p), dim=1)
        #print("cat out shape: ", out.shape)
        out = torch.flatten(out, 1) # flatten all dimensions except batch
        #print("flatten out shape: ", out.shape)
        
       
        out = torch.sigmoid(self.fc1(out))
        out = torch.sigmoid(self.fc2(out))
        out = self.fc3(out)
        return out

    
    def _initialize_weights(self):
        for m in self.add_on_layers.modules():
            if isinstance(m, nn.Conv2d):
                # every init technique has an underscore _ in the name
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)




            
def construct_PrefNet(base_architecture, pretrained=True, img_size=224,
                    prototype_shape=(2000, 512, 1, 1), num_classes=200,
                    prototype_activation_function='log',
                    add_on_layers_type='bottleneck',
                    k = 3):
    features = base_architecture_to_features[base_architecture](pretrained=pretrained)
    layer_filter_sizes, layer_strides, layer_paddings = features.conv_info()
    proto_layer_rf_info = compute_proto_layer_rf_info_v2(img_size=img_size,
                                                         layer_filter_sizes=layer_filter_sizes,
                                                         layer_strides=layer_strides,
                                                         layer_paddings=layer_paddings,
                                                         prototype_kernel_size=prototype_shape[2])
    return PrefNet(features=features,
                 img_size=img_size,
                 prototype_shape=prototype_shape,
                 proto_layer_rf_info=proto_layer_rf_info,
                 num_classes=num_classes,
                 init_weights=True,
                 prototype_activation_function=prototype_activation_function,
                 add_on_layers_type=add_on_layers_type,
                 k = k)


def paired_cross_entropy_loss(out1, out2, target):
    if target == -1:
        p1 = torch.exp(out1)/(torch.exp(out1) + torch.exp(out2))
        loss = - torch.log(p1)
    elif target == 1:
        p2 = torch.exp(out2)/(torch.exp(out1) + torch.exp(out2))
        loss = - torch.log(p2)
        
    else:
        p1 = torch.exp(out1)/(torch.exp(out1) + torch.exp(out2))
        p2 = torch.exp(out2)/(torch.exp(out1) + torch.exp(out2))
        
        loss = - (0.5*torch.log(p1) + 0.5*torch.log(p2))
        
    return loss


In [53]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [54]:
normalize = transforms.Normalize(mean=mean,
                                 std=std)

trans = transforms.Compose([
    transforms.Resize(size=(img_size, img_size)),
    transforms.ToTensor(),
    normalize
])

In [33]:
k = 1
csv_name = "./human_comparisons/rating_s=5_k=1_500.csv"
if os.path.exists(csv_name):
    comp_df = pd.read_csv(csv_name)

In [55]:
split = 0.7
df_len = len(comp_df)
train_set = []
test_set = []
split_idx = int(df_len*split)
for i in range(split_idx):
    for j in range(i+1, split_idx):
        if comp_df.iloc[i]['rating'] > comp_df.iloc[j]['rating']:
            train_set.append([i, j, -1])
        elif comp_df.iloc[i]['rating'] < comp_df.iloc[j]['rating']:
            train_set.append([i, j, 1])
            
for i in range(split_idx, df_len):
    for j in range(i+1, df_len):
        if comp_df.iloc[i]['rating'] > comp_df.iloc[j]['rating']:
            test_set.append([i, j, -1])
        elif comp_df.iloc[i]['rating'] < comp_df.iloc[j]['rating']:
            test_set.append([i, j, 1])
print(len(train_set))
print(len(test_set))

40449
5021


In [56]:
'''
another way of splitting data
'''
#split = 0.7
#train_set = training_set[:int(len(training_set) * split)]
#test_set = training_set[int(len(training_set) * split):]
#print(len(train_set))
#print(len(test_set))

'\nanother way of splitting data\n'

In [57]:
images = []
patterns = []
for i in range(df_len):
    img = './human_comparisons/feedback_images/k=1/original_imgs/' + comp_df.iloc[i]['imgid'] + '.png'
    img = plt.imread(img)[:, :, :3]
    img = np.transpose(img, (2, 0, 1))
    images.append(torch.from_numpy(np.array([img])))
    pattern = './human_comparisons/feedback_images/k=1/patterns/' + comp_df.iloc[i]['imgid'] + '.npy'
    pattern = np.load(pattern)
    pattern = np.array([pattern, pattern, pattern])
    patterns.append(torch.from_numpy(np.array([pattern])))
print(len(images))
print(images[100].shape)
print(patterns[100].shape)

500
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])


In [62]:
prefnet = construct_PrefNet("resnet18")
prefnet.to(device)
prefnet.train()
pref_optimizer = optim.Adam([{'params': prefnet.img_conv.parameters(), 'lr': 1e-4}, {'params': prefnet.pattern_conv.parameters(), 'lr': 1e-4}, {'params': prefnet.fc1.parameters(), 'lr': 1e-4},
                            {'params': prefnet.fc2.parameters(), 'lr': 1e-4}, {'params': prefnet.fc3.parameters(), 'lr': 1e-4}])

In [63]:
prefnet(images[10].cuda(), patterns[10].cuda())

tensor([[0.3610]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [60]:
epochs = 1

In [66]:
pref_optimizer = optim.Adam([{'params': prefnet.img_conv.parameters(), 'lr': 1e-5}, {'params': prefnet.pattern_conv.parameters(), 'lr': 1e-5}, {'params': prefnet.fc1.parameters(), 'lr': 1e-5},
                            {'params': prefnet.fc2.parameters(), 'lr': 1e-5}, {'params': prefnet.fc3.parameters(), 'lr': 1e-5}])

In [67]:
for epoch in range(epochs):
    shuffled_idx = np.random.permutation(len(train_set))
    for i in range(len(train_set)):
        if i % 100 == 0:
            last_100_losses = []
            last_100_error_count = 0
        idx = shuffled_idx[i]
        left_idx, right_idx, target = train_set[idx]
        left_img, right_img = images[left_idx], images[right_idx]
        left_pattern, right_pattern = patterns[left_idx], patterns[right_idx]
        target = torch.tensor(target).cuda().float()
        
        out1 = prefnet(left_img.cuda().float(), left_pattern.cuda().float())
        out2 = prefnet(right_img.cuda().float(), right_pattern.cuda().float())

        # in your training loop:
        pref_optimizer.zero_grad()   # zero the gradient buffers
        if out1 > out2 and target == 1:
            last_100_error_count += 1
            
        elif out1 < out2 and target == -1:
            last_100_error_count += 1
        loss = paired_cross_entropy_loss(out1, out2, target)
        loss.backward()
        pref_optimizer.step()   
        
        last_100_losses.append(loss.data.cpu().numpy()[0])
        
        if i % 100 == 0:
            print(epoch, i, np.sum(last_100_losses))
        if i % 100 == 99:
            print(epoch, i, last_100_error_count)

0 0 0.0007888209
0 99 0
0 100 1.0788499e-05
0 199 0
0 200 0.0002191903
0 299 0
0 300 0.00019516466
0 399 0
0 400 0.00025484234
0 499 0
0 500 5.9604645e-08
0 599 0
0 600 7.212188e-06
0 699 0
0 700 1.0728842e-06
0 799 0
0 800 0.00023928167
0 899 0
0 900 5.9604645e-08
0 999 0
0 1000 0.0066432594
0 1099 0
0 1100 4.4466055e-05
0 1199 0
0 1200 0.0
0 1299 0
0 1300 7.15256e-07
0 1399 0
0 1400 6.556513e-07
0 1499 0
0 1500 0.00085688574
0 1599 0
0 1600 0.0002274772
0 1699 0
0 1700 0.001514747
0 1799 0
0 1800 0.00018246647
0 1899 0
0 1900 0.00062049803
0 1999 0
0 2000 0.00056097744
0 2099 0
0 2100 2.384186e-07
0 2199 0
0 2200 5.9604645e-08
0 2299 0
0 2300 0.00035977876
0 2399 0
0 2400 0.0
0 2499 0
0 2500 0.0
0 2599 0
0 2600 2.9802328e-07
0 2699 0
0 2700 0.00019713199
0 2799 0
0 2800 0.0029258574
0 2899 0
0 2900 0.00014872465
0 2999 0
0 3000 3.5762793e-07
0 3099 0
0 3100 4.875779e-05
0 3199 0
0 3200 0.00036109053
0 3299 0
0 3300 0.0
0 3399 0
0 3400 0.0
0 3499 0
0 3500 0.0
0 3599 0
0 3600 0.0463058

In [26]:
#prefnet = torch.load('./human_comparisons/pref_model_500rating_split0.7_acc0.82.pth')

In [68]:
'''
Testing reward model
'''
acc = []
#error_images = []
error_count = 0
for i in tqdm(range(len(test_set))):
    left_idx, right_idx, target = test_set[i]
    left_img, right_img = images[left_idx], images[right_idx]
    left_pattern, right_pattern = patterns[left_idx], patterns[right_idx]
    target = torch.tensor(target).cuda().float()

    out1 = prefnet(left_img.cuda().float(), left_pattern.cuda().float())
    out2 = prefnet(right_img.cuda().float(), right_pattern.cuda().float())
    #print(out1)
    #print(out2)
    
    
    if out1 > out2:
        y_pred = -1
        
    else:
        y_pred = 1
    
    #print(y_pred)
    #print("")
    if y_pred == target:
        acc.append(1)
    else:
        #error_images.append((i, y_pred, target))
        error_count += 1
        acc.append(0)
    #print(out1, out2, target)
        
print(np.mean(acc), error_count)

100%|██████████| 5021/5021 [01:04<00:00, 77.47it/s]

0.8111929894443338 948





In [212]:
torch.save(prefnet, './human_comparisons/pref_model_500rating_split0.7_acc0.82.pth')