In [1]:
import torch
import pandas as pd
import os.path as osp
import numpy as np
import clip
from ast import literal_eval

In [2]:
eos_token_CLIP = 49407
N_un = 5

In [3]:
## Prepare the artemis dataset (merge it with the emotion-histograms.)
df_full = pd.read_csv(f'../Dataset/ArtEmis/ArtEmis_IdC/ArtEmis_IdCI.csv')
print('Annotations loaded:', len(df_full))
df_full['distEmo'] = df_full['distEmo'].apply(literal_eval)
df_full['distEmo'] = df_full['distEmo'].apply(lambda x: (np.array(x) / float(sum(x))).astype('float32'))

Annotations loaded: 100393


In [4]:
df_full['subject'] = df_full['subject'].apply(literal_eval)
df_full['predicate'] = df_full['predicate'].apply(literal_eval)
df_full['CLIP_tokens'] = df_full['CLIP_tokens'].apply(literal_eval)
df_IdCII = pd.DataFrame(columns =df_full.columns )

In [5]:
## Create unnatural captions for the training set
df = df_full[df_full.split=='train'].copy()
df.reset_index(drop=True,inplace=True)

# Encode tokens using CLIP tokenizer
subject_CLIP_tokens = [clip.tokenize(' '.join(utter)).squeeze().tolist() for utter in df['subject']]
predicate_CLIP_tokens = [clip.tokenize(' '.join(utter)).squeeze().tolist() for utter in df['predicate']]
df['subject_CLIP_tokens'] = subject_CLIP_tokens
df['predicate_CLIP_tokens'] = predicate_CLIP_tokens
df['captSet_CLIP_tokens'] = None

for painting,stimuli in df.groupby(['painting']):
    for index, row in stimuli.iterrows():
        painting = row['painting']
        subject_CLIP_tokens = row['subject_CLIP_tokens']
        CLIP_tokens = row['CLIP_tokens']
        set_sameStyle = df[df.painting != painting]
        set_sameStyle.reset_index(inplace=True, drop=True)
        cnts_sameTokens = []
        for other_subject_CLIP_tokens in set_sameStyle['subject_CLIP_tokens']:
            cnts_sameTokens.append(len(set(subject_CLIP_tokens) & set(other_subject_CLIP_tokens)))

        idx_sel_predicates = sorted(range(len(cnts_sameTokens)), key=lambda i: cnts_sameTokens[i])[-N_un:]
        idx_sel_predicates.reverse() #From the most overlapped one
        sel_predicates = set_sameStyle['predicate_CLIP_tokens'][idx_sel_predicates].to_list()
        new_sentences = []
        new_sentences.append(CLIP_tokens)
        len_subject = subject_CLIP_tokens.index(eos_token_CLIP)

        for sel_predicate in sel_predicates:
            sent = subject_CLIP_tokens[:len_subject] + sel_predicate[1:]
            sent = sent[:len(CLIP_tokens)]
            new_sentences.append(sent)
        row['captSet_CLIP_tokens'] = new_sentences
        df_IdCII = df_IdCII.append(row)
        
# See some examples
print(set_sameStyle['subject'][idx_sel_predicates].to_list())
print(set_sameStyle['predicate'][idx_sel_predicates].to_list())
from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer
_tokenizer = _Tokenizer()
print(_tokenizer.decode(new_sentences[0]))
print(_tokenizer.decode(new_sentences[1]))
print(_tokenizer.decode(new_sentences[2]))
print(_tokenizer.decode(new_sentences[3]))
print(_tokenizer.decode(new_sentences[4]))
print(_tokenizer.decode(new_sentences[5]))

[['this', 'honestly'], ['this', 'dog', 'honestly'], ['this', 'guy', "'s", 'face', 'is', 'so', 'cherubic', 'and', 'cute', 'he', 'honestly'], ['this', 'guy', "'s", 'spiked', 'up', 'hair', 'honestly'], ['this', 'guy', "'s", 'headdress', 'honestly']]
[['reminds', 'me', 'of', 'the', 'kiddie', 'pool', 'area', 'at', 'my', 'local', 'community', 'pool', 'everyone', 'is', 'frolicking', 'around', 'naked', 'and', 'no', 'one', 'knows', 'what', 'is', 'going', 'on'], ['reminds', 'me', 'of', 'chewbacca', 'the', 'star', 'wars', 'character', 'they', 'share', 'the', 'same', 'unruly', 'brown', 'fur', 'and', 'beady', 'black', 'eyes'], ['looks', 'like', 'a', 'baby', 'wearing', 'a', 'mustache', 'on', 'halloween'], ['reminds', 'me', 'of', 'a', 'cockatoo'], ['looks', 'like', 'a', 'lampshade', 'with', 'a', 'black', 'sheet', 'over', 'it', 'reminds', 'me', 'of', 'when', 'i', 'played', 'dress', 'up', 'as', 'a', 'kid']]
<|startoftext|>this honestly looks like a man that is dressed in a woman 's clothes that is tryi

In [6]:
## Create unnatural captions for the validation set
df = df_full[df_full.split=='val'].copy()
df.reset_index(drop=True,inplace=True)

# Encode tokens using CLIP tokenizer
subject_CLIP_tokens = [clip.tokenize(' '.join(utter)).squeeze().tolist() for utter in df['subject']]
predicate_CLIP_tokens = [clip.tokenize(' '.join(utter)).squeeze().tolist() for utter in df['predicate']]
df['subject_CLIP_tokens'] = subject_CLIP_tokens
df['predicate_CLIP_tokens'] = predicate_CLIP_tokens
df['captSet_CLIP_tokens'] = None

for painting,stimuli in df.groupby(['painting']):
    for index, row in stimuli.iterrows():
        painting = row['painting']
        subject_CLIP_tokens = row['subject_CLIP_tokens']
        CLIP_tokens = row['CLIP_tokens']
        set_sameStyle = df[df.painting != painting]
        set_sameStyle.reset_index(inplace=True, drop=True)
        cnts_sameTokens = []
        for other_subject_CLIP_tokens in set_sameStyle['subject_CLIP_tokens']:
            cnts_sameTokens.append(len(set(subject_CLIP_tokens) & set(other_subject_CLIP_tokens)))

        idx_sel_predicates = sorted(range(len(cnts_sameTokens)), key=lambda i: cnts_sameTokens[i])[-N_un:]
        idx_sel_predicates.reverse() #From the most overlapped one
        sel_predicates = set_sameStyle['predicate_CLIP_tokens'][idx_sel_predicates].to_list()
        new_sentences = []
        new_sentences.append(CLIP_tokens)
        len_subject = subject_CLIP_tokens.index(eos_token_CLIP)

        for sel_predicate in sel_predicates:
            sent = subject_CLIP_tokens[:len_subject] + sel_predicate[1:]
            sent = sent[:len(CLIP_tokens)]
            new_sentences.append(sent)
        row['captSet_CLIP_tokens'] = new_sentences
        df_IdCII = df_IdCII.append(row)
        
# See some examples
print(set_sameStyle['subject'][idx_sel_predicates].to_list())
print(set_sameStyle['predicate'][idx_sel_predicates].to_list())
from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer
_tokenizer = _Tokenizer()
print(_tokenizer.decode(new_sentences[0]))
print(_tokenizer.decode(new_sentences[1]))
print(_tokenizer.decode(new_sentences[2]))
print(_tokenizer.decode(new_sentences[3]))
print(_tokenizer.decode(new_sentences[4]))
print(_tokenizer.decode(new_sentences[5]))

[['the', 'girls', 'in', 'this', 'picture'], ['a', 'lovely', 'day', 'of', 'gathering', 'flowers', 'the', 'girls'], ['this', 'makes', 'me', 'feel', 'happy', 'cause', 'the', 'girls'], ['the', 'colors', 'of', 'the', 'girls', 'cheeks'], ['love', 'the', 'use', 'of', 'green', 'in', 'this', 'painting', 'the', 'girls']]
[['seem', 'to', 'be', 'bursting', 'with', 'life', 'and', 'appreciation', 'for', 'one', 'another'], ['look', 'like', 'they', 'are', 'enjoying', 'themselves', 'and', 'the', 'yellow', 'flowers', 'brighten', 'the', 'mood', 'even', 'more'], ['look', 'like', 'they', 'are', 'ready', 'for', 'a', 'party'], ['remind', 'me', 'of', 'childhood'], ['look', 'like', 'they', 'are', 'enjoying', 'a', 'nice', 'spring', 'day']]
<|startoftext|>the girls look like slaves or peasants yet the colors and tone are happy <|endoftext|>!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
<|startoftext|>the girls seem to be bursting with life and appreciation for one another <|endoftext|>!!!!!!!!!!!!

In [7]:
## Create unnatural captions for the test set
df = df_full[df_full.split=='test'].copy()
df.reset_index(drop=True,inplace=True)

# Encode tokens using CLIP tokenizer
subject_CLIP_tokens = [clip.tokenize(' '.join(utter)).squeeze().tolist() for utter in df['subject']]
predicate_CLIP_tokens = [clip.tokenize(' '.join(utter)).squeeze().tolist() for utter in df['predicate']]
df['subject_CLIP_tokens'] = subject_CLIP_tokens
df['predicate_CLIP_tokens'] = predicate_CLIP_tokens
df['captSet_CLIP_tokens'] = None

for painting,stimuli in df.groupby(['painting']):
    for index, row in stimuli.iterrows():
        painting = row['painting']
        subject_CLIP_tokens = row['subject_CLIP_tokens']
        CLIP_tokens = row['CLIP_tokens']
        set_sameStyle = df[df.painting != painting]
        set_sameStyle.reset_index(inplace=True, drop=True)
        cnts_sameTokens = []
        for other_subject_CLIP_tokens in set_sameStyle['subject_CLIP_tokens']:
            cnts_sameTokens.append(len(set(subject_CLIP_tokens) & set(other_subject_CLIP_tokens)))

        idx_sel_predicates = sorted(range(len(cnts_sameTokens)), key=lambda i: cnts_sameTokens[i])[-N_un:]
        idx_sel_predicates.reverse() #From the most overlapped one
        sel_predicates = set_sameStyle['predicate_CLIP_tokens'][idx_sel_predicates].to_list()
        new_sentences = []
        new_sentences.append(CLIP_tokens)
        len_subject = subject_CLIP_tokens.index(eos_token_CLIP)

        for sel_predicate in sel_predicates:
            sent = subject_CLIP_tokens[:len_subject] + sel_predicate[1:]
            sent = sent[:len(CLIP_tokens)]
            new_sentences.append(sent)
        row['captSet_CLIP_tokens'] = new_sentences
        df_IdCII = df_IdCII.append(row)
        
# See some examples
print(set_sameStyle['subject'][idx_sel_predicates].to_list())
print(set_sameStyle['predicate'][idx_sel_predicates].to_list())
from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer
_tokenizer = _Tokenizer()
print(_tokenizer.decode(new_sentences[0]))
print(_tokenizer.decode(new_sentences[1]))
print(_tokenizer.decode(new_sentences[2]))
print(_tokenizer.decode(new_sentences[3]))
print(_tokenizer.decode(new_sentences[4]))
print(_tokenizer.decode(new_sentences[5]))

[['this', 'picture', 'makes', 'me', 'feel', 'sad', 'because', 'she', 'does', 'not', 'have', 'any', 'expression', 'her', 'eyes', 'look', 'lost', 'i', 'almost'], ['i', 'feel', 'sad', 'because', 'this'], ['i', 'feel', 'sad', 'because', 'the', 'workers', 'in', 'this', 'picture', 'look', 'older', 'and', 'tired', 'does', 'not'], ['this', 'makes', 'me', 'feel', 'sad', 'because', 'i', 'feel'], ['i', 'really', 'like', 'the', 'color', 'combination', 'of', 'this', 'artwork', 'it', 'does', 'make', 'me', 'feel', 'sad', 'and', 'scared', 'because', 'it']]
[['feel', 'like', 'she', 'is', 'submissive', 'and', 'being', 'controlled'], ['looks', 'like', 'a', 'painting', 'of', 'an', 'impoverished', 'shantytown'], ['look', 'like', 'a', 'healthy', 'situation'], ['as', 'though', 'i', 'am', 'grieving', 'along', 'with', 'the', 'other', 'people'], ['looks', 'like', 'someone', 'is', 'being', 'stabbed', 'to', 'death']]
<|startoftext|>i feel sad because this lady look like she resting from a hard days work <|endofte

In [8]:
df_IdCII.reset_index(drop=True,inplace=True)
df_IdCII.to_csv( '../Dataset/ArtEmis/ArtEmis_IdC/ArtEmis_IdCII.csv', index=False)