In [124]:
import nltk
from nltk import pos_tag
from nltk.chunk import ne_chunk
from nltk.tree import Tree
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans,DBSCAN
from sklearn.preprocessing import StandardScaler

import pandas as pd

In [2]:
# nltk.download()

In [8]:
df = pd.read_csv("data/Aoutput0_cut.csv")
df.iloc[140]

frame                                                   8400
caption    An image of a man cutting a tomato with a knif...
Name: 140, dtype: object

In [9]:
captions = df["caption"]
# tokens = nltk.word_tokenize(captions[0])
tokens = [nltk.word_tokenize(caption) for caption in captions]
tagged_tokens = [nltk.pos_tag(token) for token in tokens]
tagged_tokens[140]

[('An', 'DT'),
 ('image', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('man', 'NN'),
 ('cutting', 'VBG'),
 ('a', 'DT'),
 ('tomato', 'NN'),
 ('with', 'IN'),
 ('a', 'DT'),
 ('knife', 'NN'),
 ('on', 'IN'),
 ('a', 'DT'),
 ('kitchen', 'NN'),
 ('counter', 'NN'),
 ('.', '.')]

In [4]:
df['tagged_tokens'] = tagged_tokens

In [83]:
def extract_SVO(tagged_token):
    subject = ""
    verb = ""
    obj = ""

    grammar = "SOV: {<DT>?<JJ>?<N.*><V.*><IN>?<DT>?<JJ>?<N.*>}"
    cp = nltk.RegexpParser(grammar)
    tree = cp.parse(tagged_token)
    for subtree in tree.subtrees():
        if subtree.label() == "SOV":
            for leaf in subtree.leaves():
                if ('NN' in leaf[1] and subject==''):
                    subject = leaf[0]
                if 'VB' in leaf[1]:
                    verb = leaf[0]
                if ('NN' in leaf[1]):
                    obj = leaf[0]
    return subject, verb, obj

In [106]:
def extract_NV(tagged_token):
    words = []

    for tag in tagged_token:
        if ('NN' in tag[1]):
            words.append(tag[0])
        if 'VB' in tag[1]:
            words.append(tag[0])
    return words[1:]

In [107]:
svos = [extract_NV(tagged_token) for tagged_token in tagged_tokens]
df['svos'] = svos
df.iloc[100]

frame                                                   6000
caption    An image of a man opening a bag of food in a k...
svos                      [man, opening, bag, food, kitchen]
label                                                     -1
Name: 100, dtype: object

In [139]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform([" ".join(svo) for svo in svos])
X = StandardScaler().fit_transform(X.toarray())
kmeans = KMeans(n_clusters=100)
# dbscan = DBSCAN(eps=0.5, min_samples=5)
kmeans.fit(X)
# labels = dbscan.fit_predict(X)
labels = kmeans.labels_
df['label'] = labels

  super()._check_params_vs_input(X, default_n_init=10)


In [140]:
intertia = kmeans.inertia_
intertia

20583.57330585108

In [141]:
df.head(50)

Unnamed: 0,frame,caption,svos,label
0,0,An image of a man playing a chess game on a ta...,"[man, playing, game, table]",40
1,60,An image of a chessboard and a person standing...,"[chessboard, person, standing, kitchen]",98
2,120,An image of a man stealing a chessboard from a...,"[man, stealing, chessboard, kitchen, counter]",98
3,180,An image of a man stealing a chessboard from a...,"[man, stealing, chessboard, kitchen]",98
4,240,An image of a kitchen and a table with a note ...,"[kitchen, table, note]",99
5,300,An image of a kitchen from a security camera,"[kitchen, security, camera]",23
6,360,An image of a kitchen from a security camera,"[kitchen, security, camera]",23
7,420,An image of a man walking into a kitchen with ...,"[man, walking, kitchen, table, microwave]",77
8,480,An image of a man standing in a kitchen with a...,"[man, standing, kitchen, letter, table]",68
9,540,An image of a man standing in a kitchen with a...,"[man, standing, kitchen, piece, paper]",18
