In [27]:
import torch
from keras.preprocessing import image
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from torch import nn

## Read train file

Get all_train: {image file name - image tag}

In [3]:
train_root_path = "data/0813/DatasetA_train_20180813"
test_root_path = "data/0813/DatasetA_test_20180813/DatasetA_test"

In [4]:
all_train = pd.read_csv(os.path.join(train_root_path, 'train.txt'), sep='\t', header=None)
all_train.columns = ['filename', 'tag']

According to image file name in all_train, get image in order  
X_train: {image in array}, in shape (image_count, height, width, channel)

### Pre-divided Train and Valid

In [5]:
Y_train = pd.read_csv(os.path.join(train_root_path, 'train_local.txt'), sep='\t', header=None)
Y_train.columns = ['filename', 'tag']

Y_valid = pd.read_csv(os.path.join(train_root_path, 'dev_local.txt'), sep='\t', header=None)
Y_valid.columns = ['filename', 'tag']

rootpath = os.path.join(train_root_path, "train") #文件夹目录
X_train = []
X_valid = []

for filename in Y_train['filename']: #遍历文件夹
    pic = image.load_img(rootpath + "/" + filename, target_size = (100, 100))
    pic = image.img_to_array(pic)
    X_train.append(pic)
    
for filename in Y_valid['filename']: #遍历文件夹
    pic = image.load_img(rootpath + "/" + filename, target_size = (100, 100))
    pic = image.img_to_array(pic)
    X_valid.append(pic)

X_train = np.array(X_train)
X_valid = np.array(X_valid)
print('Train set has {} color images'.format(X_train.shape[0]))
print('Validation set has {} color images.'.format(X_valid.shape[0]))

print('Image shape:', X_train[0].shape)

Train set has 30170 color images
Validation set has 8051 color images.
Image shape: (100, 100, 3)


### Test set loading

In [6]:
test_files = pd.read_csv(os.path.join(test_root_path, 'image.txt'), sep='\t', header=None)
test_files.columns = ['filename']

In [7]:
rootpath = os.path.join(test_root_path, "test") #文件夹目录

X_test = []

for filename in test_files['filename']: #遍历文件夹
    pic = image.load_img(rootpath + "/" + filename, target_size = (100, 100))
    pic = image.img_to_array(pic)
    X_test.append(pic)
    
X_test = np.array(X_test)
print('Test set has {} color images'.format(X_test.shape[0]))
print('Image shape:',X_test[0].shape)

Test set has 14633 color images
Image shape: (100, 100, 3)


### Attributes

tag2attr: {image tag - attribute name} pd form, merged from attr2name {attribute name}  
tag2label: {image tag - tag name} match image tag to English name  

In [8]:
tag2attr = pd.read_csv(os.path.join(train_root_path, 'attributes_per_class.txt'),sep='\t', header=None)
attr2name = pd.read_csv(os.path.join(train_root_path, 'attribute_list.txt'), sep='\t', header=None)
tag2label = pd.read_csv(os.path.join(train_root_path, 'label_list.txt'), sep='\t', header=None)
tag2label.columns = ['tag', 'label']
attr2name = attr2name.drop([0], axis=1)
attr2name.columns = ['name']
col_name = ['tag'] + attr2name['name'].tolist()
tag2attr.columns = col_name

### Embedding vector

Reading the provided word vectors 

In [9]:
word_dict = {}
embed_path = os.path.join(train_root_path, 'class_wordembeddings.txt')
with open(embed_path,'r') as f:
    for line in f:
        word, vec = line.split(' ', 1)
        word_dict[word] = np.array(list(map(float, vec.split())))

### Train and Zero-shot tags

In [10]:
train_tags = all_train['tag'].unique()
total_tags = tag2attr['tag'].unique()
zero_shot_tags = list(set(total_tags) - set(train_tags))

In [11]:
print("{} unique tags in total (Including Test Data). \n{} unique tags in Training Data. \n{} Zero-shot tags.".format(
    len(total_tags), len(train_tags), len(zero_shot_tags)))

230 unique tags in total (Including Test Data). 
190 unique tags in Training Data. 
40 Zero-shot tags.


all_train_attr: {}

In [12]:
def img_label_attr(df, tag2label=tag2label, tag2attr=tag2attr):
    Y_cate = []
    for tag in df['tag']:
        arr = [tag2label[tag2label['tag'] == tag].iloc[0,:].tolist()[1]] + tag2attr[tag2attr['tag'] == tag].iloc[0,:].tolist()
        Y_cate.append(arr)
        
    train_attr = pd.DataFrame(Y_cate)
    col_name = ['label'] + ['tag'] + attr2name['name'].tolist()
    train_attr.columns = col_name
    print('Converted {} image tags into categories'.format(train_attr.shape[0]))
    return train_attr

In [13]:
import random

# 这个很慢
def build_pair_dataset(X, Y, tag2label=tag2label, tag2attr=tag2attr):
    Y_cate = []
    label_match = []
    X_new = []
    NEG_NUM = 8
    tags = list(tag2label['tag'].values)
    for tag, data in zip(Y['tag'], X):
        arr = [tag2label[tag2label['tag'] == tag].iloc[0,:].tolist()[1]] + tag2attr[tag2attr['tag'] == tag].iloc[0,:].tolist()
        Y_cate.append(arr)
        label_match.append(1)
        sample_neg_tags = []
        X_new.append(data)
        while True:
            tmp = random.sample(tags, 1)[0]
            if tmp != tag and tmp not in sample_neg_tags:
                sample_neg_tags.append(tmp)
            if len(sample_neg_tags) >= NEG_NUM:
                break
        for neg_tag in sample_neg_tags:
            arr = [tag2label[tag2label['tag'] == neg_tag].iloc[0,:].tolist()[1]] + tag2attr[tag2attr['tag'] == neg_tag].iloc[0,:].tolist()
            Y_cate.append(arr)
            label_match.append(0)
            X_new.append(data)
        
    Y_attr = pd.DataFrame(Y_cate)
    col_name = ['label'] + ['tag'] + attr2name['name'].tolist()
    Y_attr.columns = col_name
    print('Converted {} image tags into categories'.format(Y_attr.shape[0]))
    return X_new, Y_attr, label_match

In [14]:
X_new_train, Y_attr_train, label_match_train = build_pair_dataset(X_train, Y_train)

Converted 271530 image tags into categories


In [15]:
X_new_valid, Y_attr_valid, label_match_valid = build_pair_dataset(X_valid, Y_valid)

Converted 72459 image tags into categories


In [12]:
# Y_train_attr = img_label_attr(Y_train)
# Y_valid_attr = img_label_attr(Y_valid)

Converted 30170 image tags into categories
Converted 8051 image tags into categories


## Keras

### Inference

In [None]:
# X_test = X_valid[0:10]
Y_test = img2sematic.predict(X_test)

In [None]:
def cos_dis(v1, v2):
    return np.linalg.norm(v1-v2)

In [None]:
keys = []
values = []

for key, value in word_dict.items():
    keys.append(key)
    values.append(value)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
res = []

for v1 in Y_test:
    score = []
    for v2 in values:
        score.append(cos_dis(v1,v2))
    
    label = keys[score.index(min(score))]
    res.append(tag2label[tag2label['label'] == label]['tag'].tolist())

In [None]:
result = pd.concat([test_files,pd.DataFrame(res)],axis=1)

### Write to submit.txt

In [None]:
result.to_csv('submit.txt', sep='\t', header=None, index=None)

## PyTorch (Not Done)

In [19]:
from torch.utils.data import Dataset, DataLoader

class ZJL_DS(Dataset):
    def __init__(self, X, Y_attr, label_match):
        # np array
        self.img = X
        # series
        self.tag = Y_attr['tag']
        self.label = Y_attr['label']
        # pd dataframe
        self.attr = Y_attr.drop(['label', 'tag'], axis=1)
        self.label_match = label_match
        
    def __len__(self):
        return len(self.img)
    
    def __getitem__(self, idx):
        img_arr = self.img[idx]
        tag = self.tag[idx]
        label = self.label[idx]
        attr = self.attr.iloc[idx,:]
        label_match = self.label_match[idx]
        info = (tag, label, attr)
        return img_arr, info, label_match
    
    def show_data(self, idx):
        img_arr = self.img[idx]
        plt.imshow(image.array_to_img(X_train[idx]))
        label = self.label[idx]
        tag = self.tag[idx]
        attr = self.attr.iloc[idx,:]
        label_match = self.label_match[idx]
        print("Image index:{} \n\nLabel: {} \n\nTag: {} \n\nAttribute:\n{} \nlabel_match: {}"\
              .format(idx, label, tag, attr, label_match))
        

In [20]:
train_ds = ZJL_DS(X_new_train, Y_attr_train, label_match_train)
valid_ds = ZJL_DS(X_new_valid, Y_attr_valid, label_match_valid)
# # get train data
# img_arr, (tag, label, attr) = train_ds[32]
# label_embedding = word_dict[label]
# inspect data 
train_ds.show_data(2502)

Image index:2502 

Label: tarantula 

Tag: ZJL10 

Attribute:
is animal               1.0
is transportation       0.0
is clothes              0.0
is plant                0.0
is tableware            0.0
is device               0.0
is black                0.5
is white                0.0
is blue                 0.0
is brown                0.7
is orange               0.0
is red                  0.0
is green                0.0
is yellow               0.2
has feathers            0.0
has four legs           0.0
has two legs            0.0
has two arms            0.0
is for entertainment    0.0
is for business         0.0
is for communication    0.0
is for family           0.0
is for office use       0.0
is for personal         0.0
is gorgeous             0.0
is simple               0.0
is elegant              0.0
is cute                 0.0
is pure                 0.0
is naive                0.0
Name: 2502, dtype: float64 
label_match: 1


In [15]:
len(train_ds)

30170

In [36]:
import torch.nn as nn
import torchvision.models as models
from torch.autograd import Variable

resnet18 = models.resnet18(pretrained=True)
modules=list(resnet18.children())[:-1]
resnet18=nn.Sequential(*modules) #.cuda()
for p in resnet18.parameters():
    p.requires_grad = False

In [42]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

class SimpleNN(torch.nn.Module):
    def __init__(self, D_in, H, D_out):

        super(SimpleNN, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        
        h_relu = self.input_linear(x).clamp(min=0)
        h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
#         y_pred = self.softmax(output)
        
        return y_pred

class SimpleNN_pair(torch.nn.Module):
    def __init__(self, D_in_x, D_in_y, H, dropout=0.5):

        super(SimpleNN_pair, self).__init__()
        self.input_linear_x = torch.nn.Linear(D_in_x, H)
        self.input_linear_y = torch.nn.Linear(D_in_y, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.softmax = torch.nn.Softmax()
        
        self.final_layers = nn.Sequential(
            nn.Linear(2 * H, H),
            nn.Tanh(),
            nn.Dropout(dropout),
            nn.Linear(H, 2),
            nn.LogSoftmax(1)
        )
        
    def _algo_1_horiz_comp(self, sent1_block_a, sent2_block_a):
        comparison_feats = []
        for pool in ('max', 'min', 'mean'):
            regM1, regM2 = [], []
            for ws in self.filter_widths:
                x1 = sent1_block_a[ws][pool].unsqueeze(2)
                x2 = sent2_block_a[ws][pool].unsqueeze(2)
                if np.isinf(ws):
                    x1 = x1.expand(-1, self.n_holistic_filters, -1)
                    x2 = x2.expand(-1, self.n_holistic_filters, -1)
                regM1.append(x1)
                regM2.append(x2)

            regM1 = torch.cat(regM1, dim=2)
            regM2 = torch.cat(regM2, dim=2)

            # Cosine similarity
            comparison_feats.append(F.cosine_similarity(regM1, regM2, dim=2))
            # Euclidean distance
            pairwise_distances = []
            for x1, x2 in zip(regM1, regM2):
                dist = F.pairwise_distance(x1, x2).view(1, -1)
                pairwise_distances.append(dist)
            comparison_feats.append(torch.cat(pairwise_distances))

        return torch.cat(comparison_feats, dim=1)
    
    def _algo_2_vert_comp(self, sent1_block_a, sent2_block_a, sent1_block_b, sent2_block_b):
        comparison_feats = []
        ws_no_inf = [w for w in self.filter_widths if not np.isinf(w)]
        for pool in ('max', 'min', 'mean'):
            for ws1 in self.filter_widths:
                x1 = sent1_block_a[ws1][pool]
                for ws2 in self.filter_widths:
                    x2 = sent2_block_a[ws2][pool]
                    if (not np.isinf(ws1) and not np.isinf(ws2)) or (np.isinf(ws1) and np.isinf(ws2)):
                        comparison_feats.append(F.cosine_similarity(x1, x2).unsqueeze(1))
                        comparison_feats.append(F.pairwise_distance(x1, x2).unsqueeze(1))
                        comparison_feats.append(torch.abs(x1 - x2))

        for pool in ('max', 'min'):
            for ws in ws_no_inf:
                oG_1B = sent1_block_b[ws][pool]
                oG_2B = sent2_block_b[ws][pool]
                for i in range(0, self.n_per_dim_filters):
                    x1 = oG_1B[:, :, i]
                    x2 = oG_2B[:, :, i]
                    comparison_feats.append(F.cosine_similarity(x1, x2).unsqueeze(1))
                    comparison_feats.append(F.pairwise_distance(x1, x2).unsqueeze(1))
                    comparison_feats.append(torch.abs(x1 - x2))

        return torch.cat(comparison_feats, dim=1)
    
    def concat_attention(self, sent1, sent2):
        sent1_transposed = sent1.transpose(1, 2)
        attention_dot = torch.bmm(sent1_transposed, sent2)
        sent1_norms = torch.norm(sent1_transposed, p=2, dim=2, keepdim=True)
        sent2_norms = torch.norm(sent2, p=2, dim=1, keepdim=True)
        attention_norms = torch.bmm(sent1_norms, sent2_norms)
        attention_matrix = attention_dot / attention_norms

        sum_row = attention_matrix.sum(2)
        sum_col = attention_matrix.sum(1)

        attention_weight_vec1 = F.softmax(sum_row, 1)
        attention_weight_vec2 = F.softmax(sum_col, 1)

        attention_weighted_sent1 = attention_weight_vec1.unsqueeze(1).expand(-1, self.n_word_dim, -1) * sent1
        attention_weighted_sent2 = attention_weight_vec2.unsqueeze(1).expand(-1, self.n_word_dim, -1) * sent2
        attention_emb1 = torch.cat((attention_weighted_sent1, sent1), dim=1)
        attention_emb2 = torch.cat((attention_weighted_sent2, sent2), dim=1)
        return attention_emb1, attention_emb2
    
    def forward(self, x, y):
        y = y.float()
        hx_relu = self.input_linear_x(x).clamp(min=0)
        hy_relu = self.input_linear_y(y).clamp(min=0)
        # hx_relu = self.middle_linear(hx_relu).clamp(min=0)
        # hy_relu = self.middle_linear(hy_relu).clamp(min=0)
        feat_all = torch.cat([hx_relu, hy_relu], dim=1)
        y_pred = self.final_layers(feat_all)
        return y_pred
    
class ImageLoader(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def build_attr(self, attr):
        res = []
        for i in attr:
            res.append(i)
        return np.array(res)
    
    def __getitem__(self, idx):
        return self.transform(np.uint8(self.data[idx][0])), \
                self.build_attr(self.data[idx][1][2]), \
                self.data[idx][2]

def train(train_loader, model, criterion, optimizer):
    losses = AverageMeter()
    model.train()
    
    for i_batch, (sample_batched, sample_attr, label_match) in enumerate(train_loader):
        output = resnet18(Variable(sample_batched)).squeeze() # .cuda()

        y_pred = model(output, sample_attr)

        loss = criterion(y_pred, label_match) # sample_attr.type(torch.cuda.FloatTensor)
        
        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss = loss.float()
        losses.update(loss.data[0], sample_batched.size(0))
        if i_batch % 1000 == 0:
            print("batch {}, loss: {}".format(i_batch, losses.val.cpu().numpy()))
    return losses.avg.cpu().numpy()

def validation(val_loader, model, criterion):
    losses = AverageMeter()
    model.eval()
    
    for i_batch, (sample_batched, sample_attr) in enumerate(val_loader):
        output = resnet18(Variable(sample_batched).cuda()).squeeze()

        y_pred = model(output)

        loss = criterion(y_pred, sample_attr.type(torch.cuda.FloatTensor))
        
        loss = loss.float()
        losses.update(loss.data[0], sample_batched.size(0))
        
    return losses.avg.cpu().numpy()

In [43]:
import torchvision
input_mean = [0.485, 0.456, 0.406]
input_std = [0.229, 0.224, 0.225]

t_transforms = [
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize((224,224)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(input_mean, input_std),
]
transformer = torchvision.transforms.Compose(t_transforms)


train_dataset = ImageLoader(train_ds, transformer)
train_dataloader = DataLoader(train_dataset, batch_size=4,
                        shuffle=True, num_workers=4)

valid_dataset = ImageLoader(valid_ds, transformer)
valid_dataloader = DataLoader(valid_dataset, batch_size=4,
                        shuffle=True, num_workers=4)

model = SimpleNN_pair(512, 30, 128) # .cuda()

# criterion = torch.nn.MSELoss()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

for t in range(50):
    train_loss = train(train_dataloader, model, criterion, optimizer)
    
    valid_loss = validation(valid_dataloader, model, criterion)
    
    print('train loss : {}, validation loss : {}'.format(train_loss, valid_loss))
    
    



0.6481333
0.5940957


Process Process-17:
Process Process-20:
Process Process-18:
Process Process-19:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/victor/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/victor/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/victor/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/victor/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/victor/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/victor/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/victor/anaco

KeyboardInterrupt: 