In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from transformations.text.contraction.expand_contractions import ExpandContractions
from transformations.text.contraction.contract_contractions import ContractContractions
from transformations.text.emoji.emojify import Emojify, AddPositiveEmoji, AddNegativeEmoji, AddNeutralEmoji
from transformations.text.emoji.demojify import Demojify, RemovePositiveEmoji, RemoveNegativeEmoji, RemoveNeutralEmoji
from transformations.text.negation.remove_negation import RemoveNegation
from transformations.text.negation.add_negation import AddNegation
from transformations.text.contraction.expand_contractions import ExpandContractions
from transformations.text.contraction.contract_contractions import ContractContractions
from transformations.text.word_swap.change_number import ChangeNumber
from transformations.text.word_swap.change_synse import ChangeSynonym, ChangeAntonym, ChangeHyponym, ChangeHypernym
from transformations.text.word_swap.word_deletion import WordDeletion
from transformations.text.word_swap.homoglyph_swap import HomoglyphSwap
from transformations.text.word_swap.random_swap import RandomSwap
from transformations.text.insertion.random_insertion import RandomInsertion
from transformations.text.insertion.sentiment_phrase import InsertSentimentPhrase, InsertPositivePhrase, InsertNegativePhrase
from transformations.text.links.add_sentiment_link import AddSentimentLink, AddPositiveLink, AddNegativeLink
from transformations.text.links.import_link_text import ImportLinkText
from transformations.text.entities.change_location import ChangeLocation
from transformations.text.entities.change_name import ChangeName
from transformations.text.typos.char_delete import RandomCharDel
from transformations.text.typos.char_insert import RandomCharInsert
from transformations.text.typos.char_substitute import RandomCharSubst
from transformations.text.typos.char_swap import RandomCharSwap
from transformations.text.typos.char_swap_qwerty import RandomSwapQwerty 

In [3]:
from datasets import load_dataset
import pandas as pd
import random
import time
import pickle
import os
from tqdm.notebook import tqdm

In [4]:
transformations = [
    ExpandContractions,
    ContractContractions,
    Emojify,
    AddPositiveEmoji,
    AddNegativeEmoji,
    AddNeutralEmoji,
    Demojify, 
    RemovePositiveEmoji,
    RemoveNegativeEmoji,
    RemoveNeutralEmoji,
    ChangeLocation,
    ChangeName,
    InsertPositivePhrase,
    InsertNegativePhrase,
    RandomInsertion,
    AddPositiveLink,
    AddNegativeLink,
    ImportLinkText,
    AddNegation,
    RemoveNegation,
    RandomCharDel,
    RandomCharInsert, 
    RandomCharSubst, 
    RandomCharSwap, 
    RandomSwapQwerty,
    ChangeNumber,
    ChangeSynonym, 
    ChangeAntonym, 
    ChangeHyponym, 
    ChangeHypernym,
    WordDeletion, 
    HomoglyphSwap, 
    RandomSwap
]

In [52]:
df_all = []
for transform in transformations:
    t = transform(meta=True)
    df = t.get_tran_types()
    df['transformation'] = t.__class__.__name__
    df['tran_fn'] = t
    df_all.append(df)
    
df = pd.concat(df_all)

In [54]:
df

Unnamed: 0,task_name,tran_type,transformation,tran_fn
0,sentiment,INV,ExpandContractions,<transformations.text.contraction.expand_contr...
1,topic,INV,ExpandContractions,<transformations.text.contraction.expand_contr...
0,sentiment,INV,ContractContractions,<transformations.text.contraction.contract_con...
1,topic,INV,ContractContractions,<transformations.text.contraction.contract_con...
0,sentiment,INV,Emojify,<transformations.text.emoji.emojify.Emojify ob...
...,...,...,...,...
1,topic,INV,WordDeletion,<transformations.text.word_swap.word_deletion....
0,sentiment,INV,HomoglyphSwap,<transformations.text.word_swap.homoglyph_swap...
1,topic,INV,HomoglyphSwap,<transformations.text.word_swap.homoglyph_swap...
0,sentiment,INV,RandomSwap,<transformations.text.word_swap.random_swap.Ra...


In [10]:
def pkl_save(file, path):
    with open(path, 'wb') as handle:
        pickle.dump(file, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
def pkl_load(path):
    with open(path, 'rb') as handle:
        file = pickle.load(handle)
    return file

def init_transforms(task=None, tran=None, meta=True):
    df_all = []
    for transform in transformations:
        t = transform(meta=meta)
        df = t.get_tran_types()
        df['transformation'] = t.__class__.__name__
        df['tran_fn'] = t
        df_all.append(df)
    df = pd.concat(df_all)
    if task is not None:
        task_df = df['task_name'] == task
        df = df[task_df]
    if tran is not None:
        tran_df = df['tran_type'] == tran
        df = df[tran_df]
    return df

def apply_transforms(test_suites, num_transforms=2, task=None, tran=None):
    df = init_transforms(task=task, tran=tran, meta=True)
    new_test_suites = {}
    for i, test_suite in tqdm(test_suites.items()):
        new_X, new_y, new_ts = [], [], []
        for X, y in zip(test_suite['data'], test_suite['target']):
            ts = []
            n = 0
            while n < num_transforms:
                t_df   = df.sample(1)
                t_fn   = t_df['tran_fn'][0]
                t_name = t_df['transformation'][0]
                if t_name in ts:
                    continue
                else:
                    ts.append(t_name)
                _X, _y, meta = t_fn.transform_Xy(X, y)
                if meta['change']:
                    n += 1
                else:
                    ts.remove(t_name)
            new_X.append(_X)
            new_y.append(_y)
            new_ts.append(ts)
        new_test_suites[i] = {'data': new_X, 'target': new_y, 'ts': new_ts}
    return new_test_suites

In [11]:
test_suites = pkl_load('assets/SST2/test_suites.pkl')
INV_test_suites = pkl_load('assets/SST2/INV_test_suites.pkl')
SIB_test_suites = pkl_load('assets/SST2/SIB_test_suites.pkl')
# both_test_suites = pkl_load('assets/SST2/both_test_suites.pkl')

In [19]:
len(test_suites[0]['target'])

100

In [26]:
numeg=6
list(zip(test_suites[0]['data'][:numeg],test_suites[0]['target'][:numeg]))

[('nothing but an episode of smackdown ! ', 0),
 ('stillborn except as a harsh conceptual exercise ', 0),
 ('so you can get your money back ', 0),
 ('be awed by the power and grace of one of the greatest natural sportsmen of modern times ',
  1),
 ('exciting to watch as two last-place basketball ', 0),
 ('for a fairly inexperienced filmmaker ', 0)]

In [56]:
examples=test_suites[75]['data']
type(examples)

list

In [58]:
examples[75]

"it 's probably worth catching solely on its visual merits . "

In [90]:
len(transformations)

33

In [103]:
import numpy as np
avg = np.zeros(33)

In [104]:
for tnum, trans in enumerate(transformations):
    for ex in examples:
        t = trans(meta=True)
        _, metad = t(ex)
        
        if metad['change']:
            avg[tnum] += 1
    print(f"{tnum}\t transf= {t.__class__.__name__} \t avg= {avg[tnum]}%")
            
            

0	 transf= ExpandContractions 	 avg= 0.0%
1	 transf= ContractContractions 	 avg= 5.0%
2	 transf= Emojify 	 avg= 51.0%
3	 transf= AddPositiveEmoji 	 avg= 100.0%
4	 transf= AddNegativeEmoji 	 avg= 100.0%
5	 transf= AddNeutralEmoji 	 avg= 100.0%
6	 transf= Demojify 	 avg= 0.0%
7	 transf= RemovePositiveEmoji 	 avg= 100.0%
8	 transf= RemoveNegativeEmoji 	 avg= 100.0%
9	 transf= RemoveNeutralEmoji 	 avg= 100.0%
10	 transf= ChangeLocation 	 avg= 14.0%
11	 transf= ChangeName 	 avg= 12.0%
12	 transf= InsertPositivePhrase 	 avg= 100.0%
13	 transf= InsertNegativePhrase 	 avg= 100.0%
14	 transf= RandomInsertion 	 avg= 100.0%
15	 transf= AddPositiveLink 	 avg= 100.0%
16	 transf= AddNegativeLink 	 avg= 100.0%
17	 transf= ImportLinkText 	 avg= 0.0%
18	 transf= AddNegation 	 avg= 44.0%
19	 transf= RemoveNegation 	 avg= 8.0%
20	 transf= RandomCharDel 	 avg= 100.0%
21	 transf= RandomCharInsert 	 avg= 100.0%
22	 transf= RandomCharSubst 	 avg= 99.0%
23	 transf= RandomCharSwap 	 avg= 100.0%
24	 transf= Ran

In [105]:
avg

array([  0.,   5.,  51., 100., 100., 100.,   0., 100., 100., 100.,  14.,
        12., 100., 100., 100., 100., 100.,   0.,  44.,   8., 100., 100.,
        99., 100.,  78.,   3.,  91.,  90.,  85.,  88., 100., 100., 100.])

In [106]:
ans=list(zip(map(lambda t: t.__name__,transformations),list(avg)))
ans

[('ExpandContractions', 0.0),
 ('ContractContractions', 5.0),
 ('Emojify', 51.0),
 ('AddPositiveEmoji', 100.0),
 ('AddNegativeEmoji', 100.0),
 ('AddNeutralEmoji', 100.0),
 ('Demojify', 0.0),
 ('RemovePositiveEmoji', 100.0),
 ('RemoveNegativeEmoji', 100.0),
 ('RemoveNeutralEmoji', 100.0),
 ('ChangeLocation', 14.0),
 ('ChangeName', 12.0),
 ('InsertPositivePhrase', 100.0),
 ('InsertNegativePhrase', 100.0),
 ('RandomInsertion', 100.0),
 ('AddPositiveLink', 100.0),
 ('AddNegativeLink', 100.0),
 ('ImportLinkText', 0.0),
 ('AddNegation', 44.0),
 ('RemoveNegation', 8.0),
 ('RandomCharDel', 100.0),
 ('RandomCharInsert', 100.0),
 ('RandomCharSubst', 99.0),
 ('RandomCharSwap', 100.0),
 ('RandomSwapQwerty', 78.0),
 ('ChangeNumber', 3.0),
 ('ChangeSynonym', 91.0),
 ('ChangeAntonym', 90.0),
 ('ChangeHyponym', 85.0),
 ('ChangeHypernym', 88.0),
 ('WordDeletion', 100.0),
 ('HomoglyphSwap', 100.0),
 ('RandomSwap', 100.0)]

In [108]:
import csv
filename = "./assets/transfomation-test.csv"
with open(filename, 'w') as csvfile:  
    csvwriter = csv.writer(csvfile)  
    csvwriter.writerow(['Transformation name', 'Applicability'])
    for row in ans:
        csvwriter.writerow(row)

In [112]:
[(t[0],t[1]) for t in ans if t[1] < 50]

[('ExpandContractions', 0.0),
 ('ContractContractions', 5.0),
 ('Demojify', 0.0),
 ('ChangeLocation', 14.0),
 ('ChangeName', 12.0),
 ('ImportLinkText', 0.0),
 ('AddNegation', 44.0),
 ('RemoveNegation', 8.0),
 ('ChangeNumber', 3.0)]

In [114]:
[(t[0],t[1]) for t in ans if t[1] > 75]

[('AddPositiveEmoji', 100.0),
 ('AddNegativeEmoji', 100.0),
 ('AddNeutralEmoji', 100.0),
 ('RemovePositiveEmoji', 100.0),
 ('RemoveNegativeEmoji', 100.0),
 ('RemoveNeutralEmoji', 100.0),
 ('InsertPositivePhrase', 100.0),
 ('InsertNegativePhrase', 100.0),
 ('RandomInsertion', 100.0),
 ('AddPositiveLink', 100.0),
 ('AddNegativeLink', 100.0),
 ('RandomCharDel', 100.0),
 ('RandomCharInsert', 100.0),
 ('RandomCharSubst', 99.0),
 ('RandomCharSwap', 100.0),
 ('RandomSwapQwerty', 78.0),
 ('ChangeSynonym', 91.0),
 ('ChangeAntonym', 90.0),
 ('ChangeHyponym', 85.0),
 ('ChangeHypernym', 88.0),
 ('WordDeletion', 100.0),
 ('HomoglyphSwap', 100.0),
 ('RandomSwap', 100.0)]