In [142]:
import nltk
from nltk import pos_tag
from nltk.chunk import ne_chunk
from nltk.tree import Tree
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans,DBSCAN
from sklearn.preprocessing import StandardScaler

import pandas as pd

In [143]:
# nltk.download()

In [144]:
df = pd.read_csv("data/Aoutput0_cut.csv")
df.iloc[140]

frame                                                   8400
caption    An image of a man cutting a tomato with a knif...
Name: 140, dtype: object

In [145]:
captions = df["caption"]
# tokens = nltk.word_tokenize(captions[0])
tokens = [nltk.word_tokenize(caption) for caption in captions]
tagged_tokens = [nltk.pos_tag(token) for token in tokens]
tagged_tokens[140]

[('An', 'DT'),
 ('image', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('man', 'NN'),
 ('cutting', 'VBG'),
 ('a', 'DT'),
 ('tomato', 'NN'),
 ('with', 'IN'),
 ('a', 'DT'),
 ('knife', 'NN'),
 ('on', 'IN'),
 ('a', 'DT'),
 ('kitchen', 'NN'),
 ('counter', 'NN'),
 ('.', '.')]

In [146]:
df['tagged_tokens'] = tagged_tokens

In [147]:
def extract_SVO(tagged_token):
    subject = ""
    verb = ""
    obj = ""

    grammar = "SOV: {<DT>?<JJ>?<N.*><V.*><IN>?<DT>?<JJ>?<N.*>}"
    cp = nltk.RegexpParser(grammar)
    tree = cp.parse(tagged_token)
    for subtree in tree.subtrees():
        if subtree.label() == "SOV":
            for leaf in subtree.leaves():
                if ('NN' in leaf[1] and subject==''):
                    subject = leaf[0]
                if 'VB' in leaf[1]:
                    verb = leaf[0]
                if ('NN' in leaf[1]):
                    obj = leaf[0]
    return subject, verb, obj

In [148]:
def extract_NV(tagged_token):
    words = []

    for tag in tagged_token:
        if ('NN' in tag[1]):
            words.append(tag[0])
        if 'VB' in tag[1]:
            words.append(tag[0])
    return words[1:]

In [149]:
svos = [extract_NV(tagged_token) for tagged_token in tagged_tokens]
df['svos'] = svos
df.iloc[100]

frame                                                         6000
caption          An image of a man opening a bag of food in a k...
tagged_tokens    [(An, DT), (image, NN), (of, IN), (a, DT), (ma...
svos                            [man, opening, bag, food, kitchen]
Name: 100, dtype: object

In [None]:

import torch
from transformers import BertTokenizer, BertModel

# 初始化tokenizer和model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_bert_embeddings(sentences):
    # 批量标记化文本并得到相应的输出
    inputs = tokenizer(sentences, return_tensors="pt", truncation=True, padding=True, max_length=512)

    with torch.no_grad():
        output = model(**inputs)
    
    # 使用BERT模型的最后一层的均值作为句子的嵌入
    embeddings = output.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings

# 示例
sentences = ["BERT is a great NLP model.", "Transformers library makes it easy.", "Text embeddings are useful."]
embeddings = get_bert_embeddings(sentences)
for i, embed in enumerate(embeddings):
    print(f"Sentence {i + 1} Embedding Shape:", embed.shape)

In [150]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform([" ".join(svo) for svo in svos])
X = StandardScaler().fit_transform(X.toarray())
kmeans = KMeans(n_clusters=100)
# dbscan = DBSCAN(eps=0.5, min_samples=5)
kmeans.fit(X)
# labels = dbscan.fit_predict(X)
labels = kmeans.labels_
df['label'] = labels

  super()._check_params_vs_input(X, default_n_init=10)


In [151]:
intertia = kmeans.inertia_
intertia

20871.502650366296

In [152]:
df.head(50)

Unnamed: 0,frame,caption,tagged_tokens,svos,label
0,0,An image of a man playing a chess game on a ta...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ma...","[man, playing, game, table]",25
1,60,An image of a chessboard and a person standing...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ch...","[chessboard, person, standing, kitchen]",58
2,120,An image of a man stealing a chessboard from a...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ma...","[man, stealing, chessboard, kitchen, counter]",58
3,180,An image of a man stealing a chessboard from a...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ma...","[man, stealing, chessboard, kitchen]",58
4,240,An image of a kitchen and a table with a note ...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ki...","[kitchen, table, note]",98
5,300,An image of a kitchen from a security camera,"[(An, DT), (image, NN), (of, IN), (a, DT), (ki...","[kitchen, security, camera]",28
6,360,An image of a kitchen from a security camera,"[(An, DT), (image, NN), (of, IN), (a, DT), (ki...","[kitchen, security, camera]",28
7,420,An image of a man walking into a kitchen with ...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ma...","[man, walking, kitchen, table, microwave]",16
8,480,An image of a man standing in a kitchen with a...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ma...","[man, standing, kitchen, letter, table]",46
9,540,An image of a man standing in a kitchen with a...,"[(An, DT), (image, NN), (of, IN), (a, DT), (ma...","[man, standing, kitchen, piece, paper]",14
