In [18]:

import torch.nn as nn
from torchvision import models
import torch
import torchvision.transforms as transforms
from PIL import Image
from transformers import AutoTokenizer, AutoModel
import pandas as pd

In [19]:
if torch.cuda.is_available():
    device = torch.device("cuda")  # GPUデバイスを取得
else:
    device = torch.device("cpu")  # CPUデバイスを取得

In [20]:
"""
画像処理のモデル
"""

class ImageEncoder(nn.Module):
    def __init__(self, embedding_size):
        super(ImageEncoder, self).__init__()
        self.resnet50 = models.resnet50(pretrained=True)
        self.fc = nn.Linear(self.resnet50.fc.out_features, embedding_size)
    
    def forward(self, x):
        x = self.resnet50(x)
        x = self.fc(x)
        return x

In [21]:
"""
テキスト処理のモデル
"""
class CaptionEncoder(nn.Module):
  def __init__(self):
    super().__init__()
    self.bert = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-v2")
  def forward(self, x):
    x = self.bert(x)
    x = torch.max(x.last_hidden_state, dim=1)[0]  # max pooling
    return x

In [22]:
import sys
import os
sys.path.append(os.path.abspath("../"))
from learning.CustomDataset import EmbeddingDataset
dataset = EmbeddingDataset('../learning/data/anotation_new.csv')

(635192, 3)


In [23]:
from torch.utils.data import DataLoader
batch_size = 32
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [train_size, val_size]
)

In [24]:

import torch.nn.functional as F

from models.ContrastiveLoss import ContrastiveLoss

image_model = ImageEncoder(768).to(device)
caption_model = CaptionEncoder().to(device)
image_model.load_state_dict(torch.load('../learning/model_image_2023-06-10.pth'))
caption_model.load_state_dict(torch.load('../learning/model_caption_2023-06-10.pth'))
loss_fn = ContrastiveLoss()
tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-v2")

Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-v2 were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [25]:
# データローダーから一つのデータを取り出します
data_iter = iter(train_dataset)
data_one = next(data_iter)

print(data_one[0].shape)

torch.Size([3, 224, 224])


In [26]:
print(device)

cuda


In [27]:
from experiments.category_search import image_search_with_category
from experiments.img2img import img2img


heap = image_search_with_category(data_one[0], image_model, device, test_dataset, 5)

torch.Size([1, 3, 224, 224])


In [28]:
print('\n'.join([h.img for h in heap]))

D:/M1/fashion/IQON/IQON3000\2505901\3539211/10600854_m.jpg
D:/M1/fashion/IQON/IQON3000\859569\3882799/32569203_m.jpg
D:/M1/fashion/IQON/IQON3000\859569\3882799/32569203_m.jpg
D:/M1/fashion/IQON/IQON3000\1791936\3192476/9296702_m.jpg


In [29]:
data_one[-1]

'D:/M1/fashion/IQON/IQON3000\\2450587\\3707672/12638798_m.jpg'