In [1]:
from transformers import BertTokenizer, BertModel
import torch
from sklearn.metrics.pairwise import cosine_similarity

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 初始化BERT模型和分词器
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_embedding(text):
    """获取文本的BERT嵌入"""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).numpy()

def merge_descriptions(desc1, desc2):
    """合并两个描述"""
    embedding1 = get_embedding(desc1)
    embedding2 = get_embedding(desc2)

    # 计算两个描述的余弦相似度
    similarity = cosine_similarity(embedding1, embedding2)[0][0]

    # 如果相似度超过阈值，则合并描述
    if similarity > 0.8:
        # 这里简单地将两个描述连接起来，实际应用中可能需要更复杂的合并策略
        return desc1 + " " + desc2
    else:
        return None

In [4]:

# 示例
desc1 = "A man is standing near a blue car."
desc2 = "A man is standing near a house."

merged_description = merge_descriptions(desc1, desc2)
print(merged_description)


A man is standing near a blue car. A man is standing near a house.


In [8]:
import spacy
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# 加载Spacy模型
nlp = spacy.load("en_core_web_sm")

# 初始化GPT-2模型和分词器
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')

def extract_key_info(text):
    """提取文本中的关键信息"""
    doc = nlp(text)
    subjects = [token.text for token in doc if "subj" in token.dep_]
    objects = [token.text for token in doc if "obj" in token.dep_]
    return subjects, objects

def generate_new_description(desc1, desc2):
    subjects1, objects1 = extract_key_info(desc1)
    subjects2, objects2 = extract_key_info(desc2)

    # 合并关键信息
    all_subjects = list(set(subjects1 + subjects2))
    all_objects = list(set(objects1 + objects2))

    # 使用GPT-2生成新描述
    prompt = "Describe a scene where " + " and ".join(all_subjects) + " are " + " and ".join(all_objects)
    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=50, truncation=True)
    outputs = model.generate(inputs, max_length=30, temperature=0.7, num_return_sequences=1)
    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return generated[len(prompt):].strip()

# 示例
desc1 = "A man is standing near a blue car."
desc2 = "A woman is sitting on a bench."

new_description = generate_new_description(desc1, desc2)
print(new_description)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


ing each other.

A man and woman are sitting on a bench. The man
