In [2]:
# pip install transformers

In [2]:
import torch
import torch.nn.functional as F
import pandas as pd
import numpy as np

In [2]:
id_mat_df = pd.read_pickle("./id_mat_matrix.pkl")

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from transformers import BertTokenizer, BertModel
# 基于全词掩码（Whole Word Masking）技术的中文预训练模型
# https://github.com/ymcui/Chinese-BERT-wwm
MODEL_NAME = "hfl/chinese-roberta-wwm-ext"
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertModel.from_pretrained(MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [5]:
# data = pd.read_csv("./material_spec.csv")
trainData = pd.read_csv("./Data_train.csv", index_col=0)
mat_names = train_mat_names = list(trainData["MAT_NAME（物料名称）"].drop_duplicates())
mat2indx = {}
for i, name in enumerate(mat_names):
    mat2indx[name]=i

In [6]:
# 使用现成的物料向量
trainMatVec = torch.load("trainMatVec"+".pt")
trainMatVec.shape

torch.Size([31337, 768])

In [57]:
#基于物料相似度的筛选, item base的协同过滤模型
# 计算两个物料名称词向量之间的相似度

class MatNameCF:
    def __init__(self, id_mat_df, matvec, mat2indx, mat_names, model):
        self.id_mat_df = id_mat_df
        self.id_mat_matrix = id_mat_df.to_numpy(dtype=np.float64)
        self.matvec = matvec
        self.mat2indx = mat2indx
        self.model = model
        self.mat_names = mat_names
        self.mat_similarity = None
        self.sup_similarity = None
    def conduct_similarity(self):
        self.mat_similarity = torch.cdist(self.matvec,self.matvec,p=2)
        # self.sup_similarity = pairwise_distances(self.id_mat_matrix)
        return
    def get_sup(self, mat):
        sup_ids = []
        try:
            sup_ids = list(id_mat_df[id_mat_df.get(mat_name)>0].index)
        except:
            pass
        return sup_ids
             
    def get_neareast_supplier(self, mat, simMatNum=10, n=100):
        neareast_mat, sim_score = self.get_neareast_mat(mat, n=simMatNum)
        visited = set()
        sup_id = []
        sup_score = []
        for i,mat in enumerate(neareast_mat):
            suppliers = list(self.id_mat_df[self.id_mat_df[mat]>0].index)
            for id in suppliers:
                if id not in visited:
                    sup_id.append((id))
                    sup_score.append(sim_score[i].item())
                    visited.add(id)
        # return list(sup_id)[:n]
        return sup_id[:n], sup_score[:n]
    
    # def get_neareast_mat(self, mat, n=10):
    #     mat_names = self.mat_names
    #     mat_indx = mat2indx[mat]
    #     similar_mat_indx = self.mat_similarity[mat_indx].argsort()[:n].tolist()
    #     # sim_mat = []
    #     # for i in similar_mat_indx[:n]:
    #     #     sim_mat.append(mat_names[i])
    #     # return sim_mat
    #     return list(mat_names[similar_mat_indx[:n]])
    
    def get_neareast_mat(self, mat, n=10):
        matName = mat
        matvec = self.matvec
        mat2indx = self.mat2indx
        model = self.model
         # 历史出现过的物料名称，直接取已经获得的向量
        if matName in mat2indx.keys():
            matNameVec = matvec[mat2indx[matName]]
        # 历史没出现过的物料名称，输入模型，获得向量
        else:
            input_id = tokenizer([matName], padding=True, truncation=True, max_length=20, return_tensors="pt")
            input_ids = input_id["input_ids"].to(device)
            with torch.no_grad():  
                matNameVec = model(input_ids)['pooler_output']
            matNameVec = matNameVec.cpu().detach()
            matNameVec = matNameVec[0]
            torch.cuda.empty_cache()    

        # simalirities = F.pairwise_distance(matvec, matNameVec, p=2)
        similarities = F.cosine_similarity(matvec, matNameVec, dim=-1)
        sorted_sim, indices = torch.sort(similarities,descending=True)
        sim_mat = []
        for i in indices[:n]:
            sim_mat.append(mat_names[i])
        return sim_mat, sorted_sim[:n]

In [58]:
matNameCF = MatNameCF(id_mat_df, matvec, mat2indx, mat_names, model)
# matNameCF.conduct_similarity()
# matNameCF.mat_similarity = mat_similarity

In [73]:
# matNameCF.conduct_similarity()
# mat_similarity = matNameCF.mat_similarity

In [77]:
mat_indx = mat2indx[mat]
similar_mat_indx = matNameCF.mat_similarity[mat_indx].argsort()[:10]

太阳能电池

In [61]:
mat = "太阳能电池"
sim_mat, sim_score = matNameCF.get_neareast_mat(mat,n = 30)
print("相似物料")
print(sim_mat)

相似物料
['锂亚硫酰氯电池', '电动平板车蓄电池', '锂离子充电电池', '充电式镍氢电池', '40AH铁锂蓄电池（长1300*宽570*高920mm）及液冷机组（长620*宽470*高470mm）', '二氧化碳专用电池', '雷士 太阳能户外灯', '充电式角磨机电池', '锂聚合物电池组', '蓄电池（电瓶车电池）', 'SPECTOR光普仪电池', '蓄电池蓄电池', '户外防水太阳能LED灯带', '户外防水太阳能LED警示灯', '24V大容量锂电池', '阀控式铅酸蓄电池', '内燃照明车 电池（12V）22246797', '电动车锂电池', '锂电太阳能双头路灯', '光纤熔接机电池', '叉车电池用蒸馏水', '太阳能LED灯BRP361', '太阳能户外灯', '锂聚合物电池NL 853556M,1200mAh4.2V', '太阳能防爆航空障碍灯', 'C6054357 锂电池', '退火循环水池电机+泵头安装', '5#南孚碱性电池', '尾矿太阳能控制器SLTK-10AI', '7号镍氢电池']


In [63]:
supp,score = matNameCF.get_neareast_supplier(mat, simMatNum=10, n=50)
print("推荐供应商ID")
print(supp)
print("相关供应物料相似度")
print(score)

推荐供应商ID
[31715, 46086, 269, 13139, 18618, 31877, 7665, 27044, 35895, 7360, 30843, 44241, 2373, 13993, 28893, 4191, 13745, 17843, 8561, 28626, 38203, 4988, 25798, 43755, 3368, 8927, 16840, 19272, 30622]
相关供应物料相似度
[0.9681918025016785, 0.9669303894042969, 0.966675341129303, 0.966675341129303, 0.966675341129303, 0.966675341129303, 0.9664814472198486, 0.9664814472198486, 0.9664814472198486, 0.9638988971710205, 0.9638988971710205, 0.9638988971710205, 0.9629126787185669, 0.9629126787185669, 0.9629126787185669, 0.9627447724342346, 0.9627447724342346, 0.9627447724342346, 0.962562084197998, 0.962562084197998, 0.962562084197998, 0.9618133902549744, 0.9618133902549744, 0.9618133902549744, 0.9614219665527344, 0.9614219665527344, 0.9614219665527344, 0.9614219665527344, 0.9614219665527344]


石墨烯

In [64]:
mat = "石墨烯"
sim_mat, sim_score = matNameCF.get_neareast_mat(mat,n = 30)
print("相似物料")
print(sim_mat)

相似物料
['超高分子聚乙烯管（黑色）', '超高分子量聚乙烯耐磨弯头', '超高分子量聚乙烯板', '超高分子量聚乙烯管', '超高分子量聚乙烯直管', '智能型石墨电热板', '丙烯酸聚氨酯面漆', '高密度聚乙烯管 219*6m 1MPa两端带法兰', '高级丙烯酸外墙涂料', '铝箔聚乙烯保温管壳', '矿用井下聚氯乙烯管', '不锈钢金属石墨垫', '不锈钢石墨金属垫', '黑色环氧树脂地坪漆', '铜芯氯乙烯护套屏蔽软线', '超五类无氧铜网线', '水性环氧防腐底漆', '交联乙烯铝芯电缆', '高密度聚乙烯管 Φ102*8', '水性环氧树脂地坪漆', '硬质聚氯乙烯排水管', '亚克力有机玻璃面板罩', '石墨板底面前板', '双面纳米防腐彩钢瓦', '有机硅耐高温漆', '石墨板底面中板', '软玻璃PVC透明桌布', '耐油耐高温石棉板,,,', '阻燃玻璃钢电缆桥架', '镍丝加强石墨带状垫片']


In [65]:
supp,score = matNameCF.get_neareast_supplier(mat, simMatNum=10, n=50)
print("推荐供应商ID")
print(supp)
print("相关供应物料相似度")
print(score)

推荐供应商ID
[23977, 32539, 8513, 13166, 31052, 38278, 14362, 33930, 37008, 37185, 45257, 13532, 21854, 24450, 40032, 16840, 19272, 30622, 33678, 37819, 2871, 20466, 40412, 40970, 1024, 14516]
相关供应物料相似度
[0.9625617265701294, 0.9625617265701294, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.9609711170196533, 0.957426130771637, 0.957426130771637, 0.957426130771637, 0.9573827385902405, 0.9573827385902405, 0.9573827385902405, 0.9571952819824219, 0.9571952819824219, 0.9557058215141296, 0.9557058215141296, 0.9557058215141296, 0.9557058215141296, 0.9529126882553101, 0.9529126882553101]
