In [1]:
import numpy as np
import copy
from tqdm import tqdm
import pandas as pd
import re
import gensim
from score import report_score
from sklearn.metrics import accuracy_score
import copy



In [2]:
datadir="fnc-1"
manual_seed=47
num_train_epochs=3

In [3]:
raw_train_bodies = pd.read_csv(datadir + '/train_bodies.csv')   
raw_train_stances = pd.read_csv(datadir + '/train_stances.csv')
raw_test_bodies = pd.read_csv(datadir + '/competition_test_bodies.csv') 
raw_test_stances = pd.read_csv(datadir + '/competition_test_stances.csv')

true_test = raw_test_stances['Stance']

In [4]:
stance_to_int = {"agree":0, "discuss": 1, "disagree": 2, "unrelated": 3}
int_to_stance = {0:"agree", 1:"discuss", 2:"disagree", 3: "unrelated"}

In [5]:
true_test

0        unrelated
1        unrelated
2        unrelated
3        unrelated
4        unrelated
           ...    
25408        agree
25409      discuss
25410     disagree
25411     disagree
25412        agree
Name: Stance, Length: 25413, dtype: object

In [6]:
actual_test_stances = raw_test_stances['Stance']
raw_train_stances['Stance'] = raw_train_stances['Stance'].apply(lambda x: stance_to_int[x])
raw_test_stances['Stance'] = raw_test_stances['Stance'].apply(lambda x: stance_to_int[x])
true_test_labels = true_test.apply(lambda x: stance_to_int[x])

In [7]:
train_df = raw_train_stances.join(raw_train_bodies.set_index('Body ID'), on='Body ID')
test_df = raw_test_stances.join(raw_test_bodies.set_index('Body ID'), on='Body ID')

train_df['labels'] = train_df['Stance']

train_df = train_df.drop(['Body ID','Stance'], axis=1)
test_df = test_df.drop(['Body ID','Stance'], axis=1)

In [8]:
def clean(s):
    # Cleans a string: Lowercasing, trimming, removing non-alphanumeric
    return " ".join(re.findall(r'\w+', s, flags=re.UNICODE)).lower()

train_df['Headline'] = train_df['Headline'].apply(clean)
train_df['articleBody'] = train_df['articleBody'].apply(clean)

test_df['Headline'] = test_df['Headline'].apply(clean)
test_df['articleBody'] = test_df['articleBody'].apply(clean)

In [9]:
train_df=train_df.rename(columns={'Headline': 'text_a', 'articleBody': 'text_b'})
test_df=test_df.rename(columns={'Headline': 'text_a', 'articleBody': 'text_b'})

In [10]:
train_df.head()

Unnamed: 0,text_a,text_b,labels
0,police find mass graves with at least 15 bodie...,danny boyle is directing the untitled film set...,3
1,hundreds of palestinians flee floods in gaza a...,hundreds of palestinians were evacuated from t...,0
2,christian bale passes on role of steve jobs ac...,30 year old moscow resident was hospitalized w...,3
3,hbo and apple in talks for 15 month apple tv s...,reuters a canadian soldier was shot at the can...,3
4,spider burrowed through tourist s stomach and ...,fear not arachnophobes the story of bunbury s ...,2


In [11]:
test_dl=[]
for i in range(len(test_df)):
  test_dl.append([test_df['text_a'][i], test_df['text_b'][i]])

### Create dataset for Relateness
all 'agree', 'disagree' and 'discuss' are mapped to releatedness.
Releated is represented as 0 and unreleated is represented as 1.

In [12]:
#stance_to_int = {"agree":0, "discuss": 1, "disagree": 2, "unrelated": 3}
stance_to_relatedness = {0:0 , 1:0, 2:0, 3:1}

In [13]:
train_df_relatedness = train_df.copy()
train_df_relatedness['labels'] = train_df_relatedness['labels'].apply(lambda x: stance_to_relatedness[x])
train_df_relatedness.head()


Unnamed: 0,text_a,text_b,labels
0,police find mass graves with at least 15 bodie...,danny boyle is directing the untitled film set...,1
1,hundreds of palestinians flee floods in gaza a...,hundreds of palestinians were evacuated from t...,0
2,christian bale passes on role of steve jobs ac...,30 year old moscow resident was hospitalized w...,1
3,hbo and apple in talks for 15 month apple tv s...,reuters a canadian soldier was shot at the can...,1
4,spider burrowed through tourist s stomach and ...,fear not arachnophobes the story of bunbury s ...,0


In [14]:
true_test_relatedness_labels = true_test_labels.apply(lambda x: stance_to_relatedness[x])
true_test_relatedness_labels.head()

0    1
1    1
2    1
3    1
4    1
Name: Stance, dtype: int64

### Create dataset for Opinion
drop all rows that are unreleated.

In [15]:
train_df_opinion = train_df.copy()
drop_indexs = train_df_opinion[train_df_opinion.labels==3].index
train_df_opinion = train_df_opinion.drop(drop_indexs)
train_df_opinion.info()
train_df_opinion

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13427 entries, 1 to 49970
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text_a  13427 non-null  object
 1   text_b  13427 non-null  object
 2   labels  13427 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 419.6+ KB


Unnamed: 0,text_a,text_b,labels
1,hundreds of palestinians flee floods in gaza a...,hundreds of palestinians were evacuated from t...,0
4,spider burrowed through tourist s stomach and ...,fear not arachnophobes the story of bunbury s ...,2
5,nasa confirms earth will experience 6 days of ...,thousands of people have been duped by a fake ...,0
8,banksy arrested real identity revealed is the ...,if you ve seen a story floating around on your...,0
10,gateway pundit,a british rapper whose father is awaiting tria...,1
...,...,...,...
49946,boko haram denies truce to release kidnapped s...,cnn despite government claims of a ceasefire g...,1
49949,has north korean dictator kim jong un been rep...,the north korean dictator kim jong un has not ...,1
49956,nypd to stop arresting for low level marijuana...,small time drug offenders in new york city cou...,1
49969,mexico says missing students not found in firs...,the bodies found in a mass grave were confirme...,0


In [16]:
true_test_opinion_df = test_df.copy()
true_test_labels_opinion = true_test_labels.copy()
drop_indexs = true_test_labels_opinion[true_test_labels_opinion==3].index

true_test_opinion_df = true_test_opinion_df.drop(drop_indexs).reset_index(drop=True)
true_test_opinion_labels = true_test_labels_opinion.drop(drop_indexs).reset_index(drop=True)

In [17]:
true_test_opinion_df

Unnamed: 0,text_a,text_b
0,exclusive apple to unveil the long awaited ret...,last week apple sent out the invites for its s...
1,found missing afghan soldiers spotted trying t...,the three afghanistan national army officers w...
2,report three missing afghan soldiers caught at...,toronto three missing afghan soldiers were tak...
3,3 boobed woman a fake,did a woman claiming to have a third breast pl...
4,isis might be harvesting organs iraqi ambassad...,isis is using blood money from harvesting orga...
...,...,...
7059,the success of the affordable care act is a hu...,congressional republicans evidently hoping tha...
7060,the success of the affordable care act is a hu...,did obamacare work it s worth reflecting upon ...
7061,the success of the affordable care act is a hu...,millions may lose coverage next year if congre...
7062,the success of the affordable care act is a hu...,come november the grim trudge across the incre...


In [18]:
true_test_opinion_df.info()
true_test_opinion_labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7064 entries, 0 to 7063
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text_a  7064 non-null   object
 1   text_b  7064 non-null   object
dtypes: object(2)
memory usage: 110.5+ KB
<class 'pandas.core.series.Series'>
RangeIndex: 7064 entries, 0 to 7063
Series name: Stance
Non-Null Count  Dtype
--------------  -----
7064 non-null   int64
dtypes: int64(1)
memory usage: 55.3 KB


In [19]:
true_test_opinion_dl=[]
for i in range(len(true_test_opinion_df)):
  true_test_opinion_dl.append([true_test_opinion_df['text_a'][i], true_test_opinion_df['text_b'][i]])

In [20]:
from simpletransformers.classification import ClassificationModel

### Training Relatedness Model (releated/unrelated)

In [21]:
relatedness_model = ClassificationModel(
    'bert', 'bert-base-cased', 
    num_labels=2, 
    args={
        'num_train_epochs': num_train_epochs,
        'manual_seed': manual_seed,
        'max_seq_length': 256,
        'output_dir': "outputs/bert_relatedness",
        'overwrite_output_dir': True,
        'save_steps': -1,
        'early_stopping': True},
    use_cuda=True)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [22]:
#relatedness_model.train_model(train_df_relatedness)
relatedness_model = ClassificationModel("bert", "outputs/bert_relatedness/checkpoint-18741-epoch-3")

In [23]:
preds , _ = relatedness_model.predict(test_dl)
acc = accuracy_score(true_test_relatedness_labels, preds)
print(f" The relatedness accuracy of relatedness model is {acc}")

  0%|          | 0/25413 [00:00<?, ?it/s]

  0%|          | 0/3177 [00:00<?, ?it/s]

 The relatedness accuracy of relatedness model is 0.982922126470704


### Training Opinion Model

In [24]:
opinion_model = ClassificationModel(
    'bert', 'bert-base-cased', 
    num_labels=3, 
    args={
        'num_train_epochs': 2*num_train_epochs,
        'manual_seed': manual_seed,
        'max_seq_length': 256,
        'output_dir': "outputs/bert_opinion",
        'overwrite_output_dir': True,
        'early_stopping': True},
    use_cuda=True)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [27]:
#opinion_model.train_model(train_df_opinion)

#Best performance model is selected.
opinion_model = ClassificationModel("bert", "outputs/bert_opinion/checkpoint-3358-epoch-2")

In [29]:
preds , _ = opinion_model.predict(true_test_opinion_dl)
acc = accuracy_score(true_test_opinion_labels, preds)
print(f" The Opinion accuracy of relatedness model is {acc}")

  0%|          | 0/7064 [00:00<?, ?it/s]

  0%|          | 0/883 [00:00<?, ?it/s]

 The relatedness accuracy of relatedness model is 0.7628822197055493


In [30]:
class cacased_model:
    def __init__(self, relatedness_model, opinion_model):
        self.relatedness_model = relatedness_model
        self.opinion_model = opinion_model

    def predict(self, test_dl):
        related_preds , _ = relatedness_model.predict(test_dl)
        # map unrelated labels to 3
        # stance_to_relatedness = {0:0 , 1:0, 2:0, 3:1}
        opinion_test_dl = []
        opinion_idxs=[]
        preds = copy.deepcopy(related_preds)
        for i in range(len(related_preds)):
            if preds[i] == 1:
                preds[i] = 3
            else:
                opinion_test_dl.append(test_dl[i])
                opinion_idxs.append(i)

        opinion_preds , _ = opinion_model.predict(opinion_test_dl)

        for i in range(len(opinion_idxs)):
            preds[opinion_idxs[i]] = opinion_preds[i]

        return preds

        


In [31]:
cacased_model = cacased_model(relatedness_model, opinion_model)

In [32]:
preds = cacased_model.predict(test_dl)


  0%|          | 0/25413 [00:00<?, ?it/s]

  0%|          | 0/3177 [00:00<?, ?it/s]

  0%|          | 0/6940 [00:00<?, ?it/s]

  0%|          | 0/868 [00:00<?, ?it/s]

In [33]:
len(preds)

25413

In [36]:
outputs = [int_to_stance[int(p)] for p in preds]

In [37]:
report_score(true_test, outputs)

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |   1374    |    55     |    410    |    64     |
-------------------------------------------------------------
| disagree  |    184    |    282    |    170    |    61     |
-------------------------------------------------------------
|  discuss  |    609    |    157    |   3544    |    154    |
-------------------------------------------------------------
| unrelated |    22     |     8     |    125    |   18194   |
-------------------------------------------------------------
Score: 10144.75 out of 11651.25	(87.07005686085184%)


87.07005686085184

In [38]:
true_test_labels = true_test.apply(lambda x: stance_to_int[x])

relatedness_correct = 0
opinion_correct = 0
opinion_count = 0

for i in range(len(true_test_labels)):
    label = true_test_labels[i]
    pred = preds[i]
    if (label == 3 and pred == 3) or (label != 3 and pred != 3):
        relatedness_correct+=1
    if label != 3:
        opinion_count+=1
        if label == pred:
            opinion_correct += 1


print(f"Relatedness Accuracy is {relatedness_correct/len(true_test_labels)}")
print(f"Opinion Accuracy is {opinion_correct/opinion_count}")


Relatedness Accuracy is 0.982922126470704
Opinion Accuracy is 0.7361268403171007
