In [261]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold

In [262]:
#read the csv files

hyperbole_df = pd.read_csv("data/ACL_dataset - HYPO.csv")
metaphor_df = pd.read_csv('data/ACL_dataset - LCC_Label_Balanced.csv')

hyperbole_df = hyperbole_df.sample(frac=1, random_state=42).reset_index(drop=True)
hyperbole_df = hyperbole_df.dropna(axis=0).reset_index(drop=True)

metaphor_df = metaphor_df.sample(frac=1, random_state=42).reset_index(drop=True)
metaphor_df = metaphor_df.dropna(axis=0).reset_index(drop=True)

hyperbole_df["Hyperbole"] = hyperbole_df["Hyperbole"].astype("int")
hyperbole_df["Metaphor"] = hyperbole_df["Metaphor"].astype("int")

metaphor_df["Hyperbole"] = metaphor_df["Hyperbole"].astype("int")
metaphor_df["Metaphor"] = metaphor_df["Metaphor"].astype("int")

In [263]:
hyperbole_df.head()

Unnamed: 0,Sentence,Hyperbole,Metaphor
0,"Insolent boy, I'll slash you to ribbons!",1,1
1,The level of discombobulation in the realm of ...,1,1
2,His eyes were very dark.,0,0
3,It's been a long time since I found someone ni...,0,0
4,"Oh, you are soaked to the bone, monsieur.",1,1


In [264]:
metaphor_df.head()

Unnamed: 0,Sentence,Hyperbole,Metaphor
0,"""They are the blood of the election process ,""...",1,1
1,With 70 percent of them receiving a free or re...,1,1
2,"""This action reflects the board's belief that ...",0,1
3,You know who else carried a gun ?,0,0
4,Income Group Income Range Arizona State & Loca...,0,1


In [265]:
hyperbole_cols = hyperbole_df.columns
hyperbole_label_cols = list(hyperbole_cols[1:])
hyperbole_num_labels = len(hyperbole_label_cols)
print('Hyperbole label columns: ', hyperbole_label_cols)

metaphor_cols = metaphor_df.columns
metaphor_label_cols = list(metaphor_cols[1:])
metaphor_num_labels = len(metaphor_label_cols)
print('Metaphor label columns: ', metaphor_label_cols)

Hyperbole label columns:  ['Hyperbole', 'Metaphor']
Metaphor label columns:  ['Hyperbole', 'Metaphor']


In [266]:
hyperbole_df['one_hot_labels'] = list(hyperbole_df[hyperbole_label_cols].values)
metaphor_df['one_hot_labels'] = list(metaphor_df[metaphor_label_cols].values)

In [267]:
hyperbole_df.head()

Unnamed: 0,Sentence,Hyperbole,Metaphor,one_hot_labels
0,"Insolent boy, I'll slash you to ribbons!",1,1,"[1, 1]"
1,The level of discombobulation in the realm of ...,1,1,"[1, 1]"
2,His eyes were very dark.,0,0,"[0, 0]"
3,It's been a long time since I found someone ni...,0,0,"[0, 0]"
4,"Oh, you are soaked to the bone, monsieur.",1,1,"[1, 1]"


In [268]:
metaphor_df.head()

Unnamed: 0,Sentence,Hyperbole,Metaphor,one_hot_labels
0,"""They are the blood of the election process ,""...",1,1,"[1, 1]"
1,With 70 percent of them receiving a free or re...,1,1,"[1, 1]"
2,"""This action reflects the board's belief that ...",0,1,"[0, 1]"
3,You know who else carried a gun ?,0,0,"[0, 0]"
4,Income Group Income Range Arizona State & Loca...,0,1,"[0, 1]"


In [270]:
hyperbole_conditions = [
    (hyperbole_df["Hyperbole"]==0) & (hyperbole_df["Metaphor"]==0),
    (hyperbole_df["Hyperbole"]==0) & (hyperbole_df["Metaphor"]==1),
    (hyperbole_df["Hyperbole"]==1) & (hyperbole_df["Metaphor"]==0),
    (hyperbole_df["Hyperbole"]==1) & (hyperbole_df["Metaphor"]==1)
]
metaphor_conditions = [
    (metaphor_df["Hyperbole"]==0) & (metaphor_df["Metaphor"]==0),
    (metaphor_df["Hyperbole"]==0) & (metaphor_df["Metaphor"]==1),
    (metaphor_df["Hyperbole"]==1) & (metaphor_df["Metaphor"]==0),
    (metaphor_df["Hyperbole"]==1) & (metaphor_df["Metaphor"]==1)
]
choices = [0,1,2,3]

hyperbole_df["new"] = np.select(hyperbole_conditions, choices)
metaphor_df["new"] = np.select(metaphor_conditions, choices)

In [272]:
hyperbole_df.head()

Unnamed: 0,Sentence,Hyperbole,Metaphor,one_hot_labels,new
0,"Insolent boy, I'll slash you to ribbons!",1,1,"[1, 1]",3
1,The level of discombobulation in the realm of ...,1,1,"[1, 1]",3
2,His eyes were very dark.,0,0,"[0, 0]",0
3,It's been a long time since I found someone ni...,0,0,"[0, 0]",0
4,"Oh, you are soaked to the bone, monsieur.",1,1,"[1, 1]",3


In [271]:
metaphor_df.head()

Unnamed: 0,Sentence,Hyperbole,Metaphor,one_hot_labels,new
0,"""They are the blood of the election process ,""...",1,1,"[1, 1]",3
1,With 70 percent of them receiving a free or re...,1,1,"[1, 1]",3
2,"""This action reflects the board's belief that ...",0,1,"[0, 1]",1
3,You know who else carried a gun ?,0,0,"[0, 0]",0
4,Income Group Income Range Arizona State & Loca...,0,1,"[0, 1]",1


In [273]:
print(hyperbole_df["Metaphor"].value_counts())
print(hyperbole_df["Hyperbole"].value_counts())
print(hyperbole_df.new.value_counts())

print(metaphor_df["Metaphor"].value_counts())
print(metaphor_df["Hyperbole"].value_counts())
print(metaphor_df.new.value_counts())

# y= df["new"].values

0    796
1    622
Name: Metaphor, dtype: int64
1    709
0    709
Name: Hyperbole, dtype: int64
0    602
3    515
2    194
1    107
Name: new, dtype: int64
1    1217
0     817
Name: Metaphor, dtype: int64
0    1400
1     634
Name: Hyperbole, dtype: int64
1    700
0    700
3    517
2    117
Name: new, dtype: int64


In [275]:
# labels = list(df.one_hot_labels.values)
# comments = list(df.Sentence.values)

In [276]:
# df.rename(columns = {"Sentence":"sentence", "Hyperbole":"target"}, inplace=True)
# df.head()

In [277]:
hyperbole_df_hyp_labels = hyperbole_df.Hyperbole.values
hyperbole_df_met_labels = hyperbole_df.Metaphor.values

metaphor_df_hyp_labels = metaphor_df.Hyperbole.values
metaphor_df_met_labels = metaphor_df.Metaphor.values

In [278]:
hyp_new_labels = hyperbole_df.new.values
met_new_labels = metaphor_df.new.values

In [280]:
np.array(hyperbole_df.Sentence)

array(["Insolent boy, I'll slash you to ribbons!",
       'The level of discombobulation in the realm of global politics is simply colossal.',
       'His eyes were very dark.', ..., 'Another idea gone wasted.',
       'My hair is very straight.',
       'He has a very impressive knowledge about the impressionists.'],
      dtype=object)

In [281]:
np.array(metaphor_df.Sentence)

array(['"They are the blood of the election process ," Huneycutt said.',
       "With 70 percent of them receiving a free or reduced-price lunch and with DC's child poverty rate well above the national average, poverty is a mountain that children in our nation's capital climb daily.",
       '"This action reflects the board\'s belief that the share price significantly undervalues Vodafone."',
       ...,
       'The policy is a prescription for social, economic and bureaucratic mayhem masquerading as muscular enforcement.',
       'Congress too must encourage and support American diplomatic leadership .',
       'Isn\'t it ironic how the "Great Society" and the " War on Poverty " created a tax paid mostly by the poor and middle class?'],
      dtype=object)

In [284]:
random_state = [42,101,2020]

In [285]:
for r in random_state:
    kf = StratifiedKFold(n_splits=10, random_state=r, shuffle=True)
    for i,(train_index,validation_index) in enumerate(kf.split(hyperbole_df.Sentence.to_list(),hyp_new_labels)):
        temp_dict = {"sentence":np.array(hyperbole_df.Sentence)[train_index], "label": hyperbole_df_hyp_labels[train_index]}
        temp_df = pd.DataFrame(temp_dict)
        temp_df["id"] = temp_df.index
        temp_df.to_csv("../data/hyperbole_mtl/"+"train_"+str(r)+"_"+str(i)+".csv", index=False)
        
        temp_dict = {"sentence":np.array(hyperbole_df.Sentence)[validation_index], "label": hyperbole_df_hyp_labels[validation_index]}
        temp_df = pd.DataFrame(temp_dict)
        temp_df["id"] = temp_df.index
        temp_df.to_csv("../data/hyperbole_mtl/"+"val_"+str(r)+"_"+str(i)+".csv", index=False)
    for i,(train_index,validation_index) in enumerate(kf.split(metaphor_df.Sentence.to_list(),met_new_labels)):
        temp_dict = {"sentence":np.array(metaphor_df.Sentence)[train_index], "label": metaphor_df_met_labels[train_index]}
        temp_df = pd.DataFrame(temp_dict)
        temp_df["id"] = temp_df.index
        temp_df.to_csv("../data/metaphor_mtl/"+"train_"+str(r)+"_"+str(i)+".csv", index=False)
        
        temp_dict = {"sentence":np.array(metaphor_df.Sentence)[validation_index], "label": metaphor_df_met_labels[validation_index]}
        temp_df = pd.DataFrame(temp_dict)
        temp_df["id"] = temp_df.index
        temp_df.to_csv("../data/metaphor_mtl/"+"val_"+str(r)+"_"+str(i)+".csv", index=False)

In [None]:
import json
f = []
with open("../data/results/hyperbole.json", "w") as file:
    json.dump(f, file)
with open("../data/results/metaphor.json", "w") as file:
    json.dump(f, file)

In [2]:
import pandas as pd

In [13]:
bias_df = pd.read_csv('MTL-E/dataset/StereoSet/train.csv')
bias_df['id'] = bias_df.index+1
bias_df.to_csv('MTL-E/dataset/StereoSet/train.csv', index=False)