In [1]:
import os
os.environ["MODEL_DIR"] = '../model'
import re
import json
from tqdm import tqdm
from itertools import combinations, permutations
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
train_df = pd.read_csv("/home/zhiling/py3_workspace/aser_audioset/audio_Caps/train_lemma.csv", index_col=0)
train_df.head()

Unnamed: 0_level_0,youtube_id,start_time,caption
audiocap_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
91139,r1nicOVtvkQ,130,a woman talk nearby as water pour
58146,UDGBjjwyaqE,20,multiple clanging and clank sound
11542,3eJ9RynJzP8,80,"the wind be blow , insect be singe , and rustl..."
11543,3eK62q7SnVU,390,the wind be blow and rustling occur
11540,3eGXNIadwGk,30,person be whistle


In [20]:
val_df = pd.read_csv("/home/zhiling/py3_workspace/aser_audioset/audio_Caps/val_lemma.csv", index_col=0)
val_df.head()

Unnamed: 0_level_0,youtube_id,start_time,caption
audiocap_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
97151,vfY_TJq7n_U,130,"rustling occur , duck quack and water splash ,..."
108945,tdWhHV3X25Q,60,an audience give applause as a man yell and a ...
107898,tw76HGONaKg,570,a man speak over intermittent keyboard tap
107893,y2bVZ7rz-5M,280,motor noise be follow by a horn honk and a sir...
107892,ti66RjZWTp0,20,a male speak as metal click and a gun fire once


In [21]:
test_df = pd.read_csv("/home/zhiling/py3_workspace/aser_audioset/audio_Caps/test_lemma.csv", index_col=0)
test_df.head()

Unnamed: 0_level_0,youtube_id,start_time,caption
audiocap_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
103549,7fmOlUlwoNg,20,constant rattle noise and sharp vibration
103548,6BJ455B1aAs,0,a rocket fly by follow by a loud explosion and...
103541,GOD8Bt5LfDE,100,hum and vibrate with a man and child speak and...
103540,YQSuFyFm3Lc,230,a train run on a railroad track follow by a ve...
103542,VjSEIRnLAh8,30,"food be fry , and a woman talk"


In [23]:
useless_tails = ["in the", "of a", "and", "and a", "follow by", "as", "with", "a", "on the", "and then"]
conjs = ["and", "follow by", "while", "before", "after", "as", "with"]
conj_pattern = "|".join(conjs+[", "+x for x in conjs])

def make_neg(anchor):
    """
    input: 
    anchor: text to make neg
    
    output：
    neg
    binary indicator of the neg type used
    """
    # beg_types = ["add_tail", "repeat_event", "remove_event", "repeat_word", "delete_word"]
    # neg_labels = [0, 0, 0, 0, 0]
    beg_types = ["add_tail", "repeat_event", "repeat_word", "delete_word"]
    neg_labels = [0, 0, 0, 0]
    
    ret = anchor[:]
    # add useless tails
    tmp = np.random.rand()
    if tmp < 0.2:
        to_add = np.random.choice(useless_tails)
        ret = f"{ret} {to_add}"
        neg_labels[0] = 1

    # add repetition
    n_words = len(anchor.split())
    tmp = np.random.rand()
    if tmp < 0.2:
        events = re.split(conj_pattern, anchor)
        if len(events) > 0:
            event0 = np.random.choice(events).strip()
            conj0 = np.random.choice(conjs[:-1])
            ret = f"{ret} {conj0} {event0}"
            neg_labels[1] = 1

    # remove event, not necessarily a fluency issue
    """
    tmp = np.random.rand()
    if tmp < 0.4:
        try:
            last_pos = ret.rfind(re.findall(f"({conj_pattern})", ret)[-1])
            text = ret[:last_pos].strip()
            assert len(text) > 0
            ret = text
            neg_labels[2] = 1
        except:
            pass
    """

    # repeat word
    tmp = np.random.rand()
    if tmp < 0.2:
        words = ret.split(" ")
        if len(words) > 2:
            idx = np.random.choice(range(len(words)))
            ret = " ".join(words[:idx+1]+words[idx:])
            neg_labels[2] = 1
    
    # delete word
    tmp = np.random.rand()
    if tmp < 0.2:
        words = ret.split(" ")
        if len(words) > 2:
            idx = np.random.choice(range(len(words)))
            ret = " ".join(words[:idx]+words[idx+1:])
            neg_labels[3] = 1
    
    return ret, neg_labels


In [24]:
anchor = "a baby vocalize and laugh at a woman speak"
make_neg(anchor)

('a baby vocalize and laugh at a woman speak', [0, 0, 0, 0])

In [32]:
anchor = "a baby vocalize and laugh at a woman speak"
make_neg(anchor)

('a baby vocalize and laugh at a woman', [0, 0, 0, 1])

In [19]:
os.makedirs("dataset", exist_ok=True)

In [28]:
synthetic_train = []
for rid, row in train_df.iterrows():
    anchor = row["caption"]
    neg, neg_labels = make_neg(anchor)
    synthetic_train.append([neg] + neg_labels)
synthetic_train = pd.DataFrame(synthetic_train)
synthetic_train.columns = ["caption", "add_tail", "repeat_event", "repeat_word", "delete_word"]
synthetic_train["error"] = synthetic_train.iloc[:, 1:].max(1)
synthetic_train.head()

Unnamed: 0,caption,add_tail,repeat_event,repeat_word,delete_word,error
0,a woman talk nearby as water pour in the as a ...,1,1,0,0,1
1,multiple clanging and clank sound and then,1,0,0,0,1
2,"the wind be blow , insect be singe , and rustl...",1,0,0,0,1
3,the wind be blow and rustling occur,0,0,0,0,0
4,person be whistle,0,0,0,0,0


In [29]:
synthetic_val = []
for rid, row in val_df.iterrows():
    anchor = row["caption"]
    neg, neg_labels = make_neg(anchor)
    synthetic_val.append([neg] + neg_labels)
synthetic_val = pd.DataFrame(synthetic_val)
synthetic_val.columns = ["caption", "add_tail", "repeat_event", "repeat_word", "delete_word"]
synthetic_val["error"] = synthetic_val.iloc[:, 1:].max(1)
synthetic_val.head()

Unnamed: 0,caption,add_tail,repeat_event,repeat_word,delete_word,error
0,"rustling occur , duck quack and water splash ,...",0,0,0,0,0
1,an audience give applause as a man yell and a ...,0,0,0,0,0
2,a man speak over intermittent keyboard tap,0,0,0,0,0
3,motor noise be follow by a horn honk and a sir...,0,0,0,0,0
4,a male speak as metal click and a gun fire once,0,0,0,0,0


In [30]:
synthetic_test = []
for rid, row in test_df.iterrows():
    anchor = row["caption"]
    neg, neg_labels = make_neg(anchor)
    synthetic_test.append([neg] + neg_labels)
synthetic_test = pd.DataFrame(synthetic_test)
synthetic_test.columns = ["caption", "add_tail", "repeat_event", "repeat_word", "delete_word"]
synthetic_test["error"] = synthetic_test.iloc[:, 1:].max(1)
synthetic_test.head()

Unnamed: 0,caption,add_tail,repeat_event,repeat_word,delete_word,error
0,constant rattle noise and sharp vibration on the,1,0,0,0,1
1,a rocket fly by follow by a loud explosion and...,0,0,0,1,1
2,hum and vibrate with a man and child speak and...,0,0,0,0,0
3,a train run on a railroad track follow by a ve...,0,0,0,0,0
4,"food be fry , and a woman talk",0,0,0,0,0


In [31]:
synthetic_train.to_csv("./dataset/synthetic_train.csv", encoding="utf-8")
synthetic_val.to_csv("./dataset/synthetic_val.csv", encoding="utf-8")
synthetic_test.to_csv("./dataset/synthetic_test.csv", encoding="utf-8")