In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os

num_epochs = 1000
batch_size = 32
learning_rate = 0.01

In [None]:
def _decode_and_resize(filename, label):
    image_string = tf.io.read_file(filename)            # 读取原始文件
    image_decoded = tf.image.decode_jpeg(image_string)  # 解码JPEG图片
    image_resized = tf.image.resize(image_decoded, [224, 224]) / 255.0
    return image_resized, label

data_text = pd.read_csv('train_data.csv')
# 构建训练数据集
train_filenames = tf.constant([filename for filename in list(data_text.path.values)])
train_labels = list(data_text.page_num.values)

train_dataset = tf.data.Dataset.from_tensor_slices((train_filenames, train_labels))
train_dataset = train_dataset.map(
    map_func=_decode_and_resize, 
    num_parallel_calls=tf.data.experimental.AUTOTUNE)
# 取出前buffer_size个数据放入buffer，并从其中随机采样，采样后的数据用后续数据替换
train_dataset = train_dataset.shuffle(buffer_size=500)
train_dataset = train_dataset.batch(batch_size)
train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
resnet50_fine_tune = tf.keras.models.Sequential()

resnet50_fine_tune.add(tf.keras.applications.ResNet50(include_top = False, pooling = 'avg', weights = 'imagenet'))
resnet50_fine_tune.add(tf.keras.layers.Dense(48, activation = 'softmax'))
resnet50_fine_tune.layers[0].trainable = False
# resnet50_fine_tune.compile(loss="sparse_categorical_crossentropy", optimizer="sgd", metrics=['accuracy'])
resnet50_fine_tune.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
    loss=tf.keras.losses.sparse_categorical_crossentropy,
    metrics=[tf.keras.metrics.sparse_categorical_accuracy]
)
resnet50_fine_tune.summary()

In [None]:
resnet50_fine_tune.fit(train_dataset, epochs=num_epochs)

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import models
from torchvision.models.resnet import Bottleneck, BasicBlock, ResNet
import torch.utils.model_zoo as model_zoo

import numpy as np
import cv2
import pandas as pd

from model.model import *

In [2]:
image_net = ResidualNet()
image_net.eval()
image_net = image_net.cuda()

In [3]:
text_net = RoBertaChinese()

In [4]:
with torch.no_grad():
    best_seller = pd.read_csv('best_seller.csv')
    image_vectors = []
    text_vectors = []
    for i in range(0, len(best_seller)):
        image_path = 'data/best_seller/'+best_seller['id'][i]+'.jpg'
        image_space_vector = get_image_space_vector(image_path, image_net, 'avg')
        text_space_vector = get_text_space_vector(best_seller['title'][i], text_net.tokenizer, text_net.model).tolist()
        image_vectors.append(image_space_vector)
        text_vectors.append(text_space_vector)
    best_seller['image_vectors'] = pd.Series(image_vectors)
    best_seller['text_vectors'] = pd.Series(text_vectors)

In [5]:
with torch.no_grad():
    source_goods = pd.read_csv('source_goods.csv')
    image_vectors = []
    text_vectors = []
    for i in range(0, len(source_goods)):
        image_path = 'data/source_goods/'+source_goods['id'][i]+'.jpg'
        image_space_vector = get_image_space_vector(image_path, image_net, 'avg')
        text_space_vector = get_text_space_vector(source_goods['title'][i], text_net.tokenizer, text_net.model).tolist()
        image_vectors.append(image_space_vector)
        text_vectors.append(text_space_vector)
    source_goods['image_vectors'] = pd.Series(image_vectors)
    source_goods['text_vectors'] = pd.Series(text_vectors)

In [6]:
def get_good_mean_cosine(good_image_vector, list_best_seller_vectors):
    total_cosine = 0
    for vector in list_best_seller_vectors:
        total_cosine += get_cosine_similarity(good_image_vector, vector)
    return total_cosine / len(list_best_seller_vectors)
    

source_goods['image_mean_cosine'] = 0
list_best_seller_vectors = best_seller.image_vectors.values
for i in range(0, len(source_goods)):
    mean_cosine = get_good_mean_cosine(source_goods['image_vectors'][i], list_best_seller_vectors)
    source_goods.loc[i, 'image_mean_cosine'] = mean_cosine
    
source_goods['text_mean_cosine'] = 0
list_best_seller_vectors = best_seller.text_vectors.values
for i in range(0, len(source_goods)):
    mean_cosine = get_good_mean_cosine(source_goods['text_vectors'][i], list_best_seller_vectors)
    source_goods.loc[i, 'text_mean_cosine'] = mean_cosine

In [8]:
temp = source_goods.sort_values(by=['image_mean_cosine'],ascending=False).reset_index(drop=True)
temp

Unnamed: 0,title,price,shop,images_url_attribute,product_url_attribute,id,page_num,image_vectors,text_vectors,image_mean_cosine,text_mean_cosine
0,楚楚 10.13晚直播开售【北海道冬日】~主推！中古风盘扣羽绒服,888.0,馨艺服饰,https://img01-gms.17zwd.com/imgextra/61240356/...,https://gz.17zwd.com/item.htm?GID=117129514&sp...,c3bca91e-c96e-47e7-922c-a4f9981fe473,2.0,"[0.101136446, 0.33970687, 0.38277063, 0.093782...","[0.987369179725647, 0.8058074116706848, 0.9361...",0.832702,0.930453
1,实拍韩版2020冬新款潮流时尚中短款ins女棉服羽绒服冬装外套,75.0,纤浔羽绒服,https://img01-gms.17zwd.com/imgextra/61402901/...,https://gz.17zwd.com/item.htm?GID=116899381&sp...,977d2b3d-11aa-40cd-bf0a-a8ef50d40193,17.0,"[0.07285059, 0.63022906, 0.2959206, 0.1359996,...","[0.9668587446212769, 0.934699296951294, 0.7466...",0.831473,0.951467
2,2020新款皮羽绒服女中长款狐狸毛领连帽修身海宁绵羊皮皮衣外套,288.0,珊妮网络服饰,https://img.alicdn.com/bao/uploaded/i3/1612400...,https://gz.17zwd.com/item.htm?GID=117067549&sp...,ad4dc0a5-4960-4542-84a0-d313bff07af0,4.0,"[0.18888241, 0.56815916, 0.13226885, 0.2849952...","[0.935283362865448, 0.9267182350158691, 0.8104...",0.827753,0.950818
3,实拍2955#连帽过膝中长款羽绒服女2020冬装新款韩版白鸭绒外套,238.0,女人花旗舰店,https://img.alicdn.com/bao/uploaded/i2/8369987...,https://gz.17zwd.com/item.htm?GID=117004813&sp...,258700f7-cf19-4f12-ad00-2d64498277d0,12.0,"[0.034658626, 1.4333413, 0.38285947, 0.2110069...","[0.9761690497398376, 0.9639167189598083, 0.583...",0.826821,0.956166
4,实拍2955#连帽过膝中长款羽绒服女2020冬装新款韩版白鸭绒外套,238.0,靓装亮坊服饰,https://img.alicdn.com/bao/uploaded/i2/8369987...,https://gz.17zwd.com/item.htm?GID=117004812&sp...,009fa902-2f34-455e-886d-d5d902f7759b,13.0,"[0.034658626, 1.4333413, 0.38285947, 0.2110069...","[0.9761690497398376, 0.9639167189598083, 0.583...",0.826821,0.956166
...,...,...,...,...,...,...,...,...,...,...,...
1784,棉袄2020年新款棉服韩版宽松冬季外套女装冬装反季羽绒棉衣中长款,90.0,靓谷羽绒服,https://img.alicdn.com/bao/uploaded/i1/3244400...,https://gz.17zwd.com/item.htm?GID=117040578&sp...,80d8adde-67d5-4b1d-9b07-812d0818ef9d,8.0,"[0.42023897, 0.05090834, 0.20144719, 0.4745937...","[0.8919898867607117, 0.8185612559318542, 0.715...",0.681985,0.936353
1785,实拍短款羽绒服女2019新款冬装面包服韩版太阳帽宽松bf棉袄外套,70.0,迪莎琪羽绒服,https://img.alicdn.com/bao/uploaded/i1/3187010...,https://gz.17zwd.com/item.htm?GID=116989398&sp...,66d2f010-04ae-47c3-8f46-7cdab02f6692,14.0,"[0.42332023, 0.3845322, 0.1518079, 0.29634404,...","[0.9722011685371399, 0.9555153846740723, 0.844...",0.677551,0.959776
1786,实拍短款羽绒服女2019新款冬装面包服韩版太阳帽宽松bf棉袄外套,70.0,卡莱登羽绒服,https://img.alicdn.com/bao/uploaded/i4/2980402...,https://gz.17zwd.com/item.htm?GID=117076164&sp...,d7462e42-ffe9-4e86-afb0-0b87f4c54e7b,5.0,"[0.41929242, 0.3640224, 0.14598967, 0.2869682,...","[0.9722011685371399, 0.9555153846740723, 0.844...",0.677138,0.959776
1787,官网图 白鸭绒新款韩版时尚牛角扣羽绒棉服女中长款宽松加厚,150.0,金万俐羽绒服,https://img01-gms.17zwd.com/imgextra/61298336/...,https://gz.17zwd.com/item.htm?GID=117072450&sp...,fd08def9-c117-4184-ac09-9810b56c805d,6.0,"[0.0, 1.4987386, 0.0, 0.02129049, 0.52830786, ...","[0.9559850096702576, 0.93857342004776, 0.75322...",0.658135,0.949379


In [9]:
temp1 = temp.loc[:20,:]
temp1

Unnamed: 0,title,price,shop,images_url_attribute,product_url_attribute,id,page_num,image_vectors,text_vectors,image_mean_cosine,text_mean_cosine
0,楚楚 10.13晚直播开售【北海道冬日】~主推！中古风盘扣羽绒服,888.0,馨艺服饰,https://img01-gms.17zwd.com/imgextra/61240356/...,https://gz.17zwd.com/item.htm?GID=117129514&sp...,c3bca91e-c96e-47e7-922c-a4f9981fe473,2.0,"[0.101136446, 0.33970687, 0.38277063, 0.093782...","[0.987369179725647, 0.8058074116706848, 0.9361...",0.832702,0.930453
1,实拍韩版2020冬新款潮流时尚中短款ins女棉服羽绒服冬装外套,75.0,纤浔羽绒服,https://img01-gms.17zwd.com/imgextra/61402901/...,https://gz.17zwd.com/item.htm?GID=116899381&sp...,977d2b3d-11aa-40cd-bf0a-a8ef50d40193,17.0,"[0.07285059, 0.63022906, 0.2959206, 0.1359996,...","[0.9668587446212769, 0.934699296951294, 0.7466...",0.831473,0.951467
2,2020新款皮羽绒服女中长款狐狸毛领连帽修身海宁绵羊皮皮衣外套,288.0,珊妮网络服饰,https://img.alicdn.com/bao/uploaded/i3/1612400...,https://gz.17zwd.com/item.htm?GID=117067549&sp...,ad4dc0a5-4960-4542-84a0-d313bff07af0,4.0,"[0.18888241, 0.56815916, 0.13226885, 0.2849952...","[0.935283362865448, 0.9267182350158691, 0.8104...",0.827753,0.950818
3,实拍2955#连帽过膝中长款羽绒服女2020冬装新款韩版白鸭绒外套,238.0,女人花旗舰店,https://img.alicdn.com/bao/uploaded/i2/8369987...,https://gz.17zwd.com/item.htm?GID=117004813&sp...,258700f7-cf19-4f12-ad00-2d64498277d0,12.0,"[0.034658626, 1.4333413, 0.38285947, 0.2110069...","[0.9761690497398376, 0.9639167189598083, 0.583...",0.826821,0.956166
4,实拍2955#连帽过膝中长款羽绒服女2020冬装新款韩版白鸭绒外套,238.0,靓装亮坊服饰,https://img.alicdn.com/bao/uploaded/i2/8369987...,https://gz.17zwd.com/item.htm?GID=117004812&sp...,009fa902-2f34-455e-886d-d5d902f7759b,13.0,"[0.034658626, 1.4333413, 0.38285947, 0.2110069...","[0.9761690497398376, 0.9639167189598083, 0.583...",0.826821,0.956166
5,小香风羽绒服女短款2020年冬季新款时尚轻薄小个子黑色白鸭绒外套,70.0,欧尼阁服饰,https://img01-gms.17zwd.com/imgextra/62015702/...,https://gz.17zwd.com/item.htm?GID=117118560&sp...,c673f268-d051-415a-af01-95e64d6b0549,2.0,"[0.2152406, 0.44023412, 0.14497411, 0.1114763,...","[0.9491431713104248, 0.9178586006164551, 0.781...",0.825879,0.959128
6,实拍羽绒服棉服女2020新款加厚亮面学生棉袄连帽显瘦棉衣外套女,70.0,新时代棉服二店,https://img01-gms.17zwd.com/imgextra/228429/i1...,https://gz.17zwd.com/item.htm?GID=117074586&sp...,95742a77-d7ab-422d-b22e-afc4d0a61028,6.0,"[0.19195715, 1.0633242, 0.024518175, 0.1539071...","[0.9491159915924072, 0.9574410319328308, 0.660...",0.825824,0.952266
7,官网图 夏诗文同款羽绒服女2020新款冬中长款加厚外套面包棉服,73.0,金万俐羽绒服,https://img01-gms.17zwd.com/imgextra/61298336/...,https://gz.17zwd.com/item.htm?GID=117072323&sp...,04eb26e1-8afa-4a4e-ab7b-a1018b8dcfce,6.0,"[0.20943673, 0.82773215, 0.16001953, 0.2369165...","[0.9566456079483032, 0.9719009399414062, 0.750...",0.825427,0.949976
8,实拍新款羽绒服 女学生面包服牛角扣学院风棉服外套,70.0,牛牛毛织,https://img.alicdn.com/bao/uploaded/i4/2064759...,https://gz.17zwd.com/item.htm?GID=117060349&sp...,afe9c052-0671-4bbe-8c8d-fa7866033a27,7.0,"[0.05713642, 1.4170709, 0.25714034, 0.2553333,...","[0.9722996354103088, 0.962665319442749, 0.8313...",0.824854,0.950029
9,实拍2955#连帽过膝中长款羽绒服女2020冬装新款韩版白鸭绒外套,238.0,女人花旗舰店,https://img.alicdn.com/bao/uploaded/i3/8369987...,https://gz.17zwd.com/item.htm?GID=117004815&sp...,4fe80f59-1028-4a67-9d57-c5b57e459fc5,12.0,"[0.09992168, 0.9609053, 0.3617295, 0.24298956,...","[0.9761690497398376, 0.9639167189598083, 0.583...",0.822844,0.956166


In [11]:
from PIL import Image

for i in range(0,len(temp1)):
    pic = Image.open('data/source_goods/'+temp1['id'][i]+'.jpg')
    pic = pic.convert('RGB')
    pic.save('data/test/'+temp1['id'][i]+'.jpg')

In [12]:
temp1['product_url_attribute'][3]

'https://gz.17zwd.com/item.htm?GID=117004813&spm=0.42.0.2491.117004813.0.0&action=2&so=%E7%BE%BD%E7%BB%92%E6%9C%8D'