## Introduction

- Tutorial on Mining of supporting and objecting units .
- Presenter: Harsh Shah, Meher Vivek, Rene Sherf
- Task: Find the stance of evidence towards the claim

- Inspired by [Bar-Haim et al.  2017](https://www.aclweb.org/anthology/E17-1024)
.
    - Paper: Stance Classification of Context-Dependent Claims
    - Input: Topics, Claims 
    - Output: Stance of claim towards the topic


In [1]:

from stanceDetectionModel import StanceDetectionModel

In [2]:
model = StanceDetectionModel()

In [13]:
# Import dataset
import pandas as pd
data = pd.read_csv('Dataset/claim_stance_dataset_v1.csv') 

   

## Preprocessing of Data
-                Remove Stopwords
-                Stemming
-                Lemmatization

In [14]:
claim_corrected_data = data.iloc[:,7]
claim_corrected_data_cleaned = model.remove_stop_words(claim_corrected_data)
claim_corrected_data_stemmed = model.get_stemmed_text(claim_corrected_data_cleaned) 
claim_lemmatized_reviews = model.get_lemmatized_text(claim_corrected_data_stemmed)


In [15]:
claim_corrected_data[0:3]

0    Exposure to violent video games causes at leas...
1    video game violence is not related to serious ...
2    some violent video games may actually have a p...
Name: claims.claimCorrectedText, dtype: object

In [21]:
pd.DataFrame(claim_lemmatized_reviews[0:3])

Unnamed: 0,0
0,exposur violent video game caus least temporar...
1,video game violenc relat seriou aggress behavi...
2,violent video game may actual prosoci effect c...


In [22]:
# split train and test data and handle missing values
train,test = model.load_dataset(data)
print('Train Size',train.shape)
print('Test Size', test.shape)

Train Size (974, 19)
Test Size (1286, 19)


# Select columns from  train data

In [23]:

claim_corrected_data_train = train.iloc[:,5]
claim_sentiment_data_train = train.iloc[:,17]

# Features calculation for the train data 

In [24]:
# This step will take some time as feauture calculation of the claims will perfomed here 
X_train_data = model._instance_features(claim_corrected_data_train)


## Following feaures are calculated from the claim senteces
-                negative_score
-                neutral_score
-                positive_score
-                compound_score
-                num_of_positive_words 
-                num_of_negative_words 
-                num_of_neutral_words
-                avg_tfidf_feature 
-                max_tfidf_feature

In [25]:
# claim sentences are converted into feauters
X_train_data


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.141,0.643,0.217,0.2263,2,1,16,0.185419,0.510982
1,0.000,0.775,0.225,0.4404,1,0,10,0.283161,0.457109
2,0.000,1.000,0.000,0.0000,0,0,14,0.244215,0.367240
3,0.000,0.895,0.105,0.4019,1,0,23,0.187289,0.302452
4,0.000,1.000,0.000,0.0000,0,0,20,0.205836,0.332356
5,0.259,0.570,0.171,-0.1027,1,2,9,0.258621,0.393822
6,0.000,0.795,0.205,0.6597,2,0,21,0.157167,0.454011
7,0.000,1.000,0.000,0.0000,0,0,5,0.405347,0.632007
8,0.000,1.000,0.000,0.0000,0,0,8,0.335332,0.477992
9,0.000,0.709,0.291,0.5719,1,0,9,0.305195,0.391777


# select columns from test data

In [26]:
claim_corrected_data_test = test.iloc[:,5]
claim_target_data_test = test.iloc[:,14]
claim_sentiment_data_test = test.iloc[:,17]
topic_sentiment_data_test = test.iloc[:,2]
claim_target_data_relation = test.iloc[:,18]
topic_text = test.iloc[:,0]
stance_test = test.iloc[:,4]

In [27]:
# claim sentiment from train data
X_train_class_data =  claim_sentiment_data_train

# Training SVM model on features

In [28]:
from sklearn import svm
clf=svm.SVC(gamma='auto')
clf.fit(X_train_data,X_train_class_data)
Y_test_data = claim_corrected_data_test
Y_test_data_transfom = model._instance_features(claim_corrected_data_test)
actual = claim_sentiment_data_test
# predict claim sentiment on the test data
Y_test_class_data = clf.predict(Y_test_data_transfom)

# Confusion Matrix for predicted claim sentiment 

In [30]:
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(actual, Y_test_class_data)
print(matrix)

# Calculating precision, recall, fmeasure, accuracy from confusion matrix
accuracy = (matrix[0][0]+matrix[1][1]) / (matrix[0][0]+matrix[1][1]+matrix[0][1]+matrix[1][0])
precision = matrix[1][1] / (matrix[0][1]+matrix[1][1])
recall    = matrix[0][0] / (matrix[0][0]+matrix[1][0])
f_measure = (2 * precision * recall ) / (precision + recall)

[[579 144]
 [293 270]]


# Evaluation of model for claim prediction 

In [31]:
print("  Accuracy    \t        precision  \t         Recall  \t    f_measure "   "\n", accuracy, precision, recall,f_measure )

  Accuracy    	        precision  	         Recall  	    f_measure 
 0.6601866251944012 0.6521739130434783 0.6639908256880734 0.6580293215138084


# Calculating stance 

In [32]:
predicted_stance =  topic_sentiment_data_test * claim_target_data_relation * Y_test_class_data

In [33]:
predicted_stance

0       1.0
1       1.0
2       1.0
3       1.0
4       1.0
5       1.0
6       1.0
7       1.0
8       1.0
9       1.0
10      1.0
12      1.0
13     -1.0
14     -1.0
15      1.0
16      1.0
17      1.0
18      1.0
19      1.0
20      1.0
21      1.0
22      1.0
23      1.0
24     -1.0
25      1.0
26      1.0
27     -1.0
28      1.0
29     -1.0
30      1.0
       ... 
1810   -1.0
1811   -1.0
1812    1.0
1813   -1.0
1814   -1.0
1815   -1.0
1816   -1.0
1817   -1.0
1818    1.0
1819    1.0
1820    1.0
1821   -1.0
1822   -1.0
1823    1.0
1824   -1.0
1825   -1.0
1826    1.0
1827    1.0
1828    1.0
1829    1.0
1830   -1.0
1831   -1.0
1832   -1.0
1833   -1.0
1834    1.0
1835   -1.0
1836   -1.0
1837   -1.0
1838   -1.0
1839   -1.0
Length: 1286, dtype: float64

# Confusion Matrix for stance prediction 

In [35]:
stance_filtered = stance_test.replace('PRO', 1)
stance_filtered = stance_filtered.replace('CON', -1)
# pro/ con to 1 / -1 
matrix = confusion_matrix(stance_filtered, predicted_stance)
print(matrix)

[[391 227]
 [211 457]]


# Evaluation of model for stance prediction 

In [36]:
accuracy_stance = (matrix[0][0]+matrix[1][1]) /(matrix[0][0]+matrix[1][1]+matrix[0][1]+matrix[1][0]) 
precision_stance = matrix[1][1] / (matrix[0][1]+matrix[1][1])
recall_stance    = matrix[0][0] / (matrix[0][0]+matrix[1][0])

f_measure_stance = (2 * precision * recall ) / (precision + recall)

predicted_stances = predicted_stance.replace(1, 'PRO')
predicted_stance = predicted_stances.replace(-1,'CON')
predicted_stance = (predicted_stance)

In [37]:
print("  Accuracy    \t        precision  \t         Recall "   "\n", accuracy_stance, precision_stance, recall_stance )


  Accuracy    	        precision  	         Recall 
 0.6594090202177294 0.6681286549707602 0.6495016611295681


# combining the result as a dataframe

In [38]:
DataFrameCombination = pd.concat([topic_text,claim_corrected_data_test,stance_test],axis =1)
DataFrameCombination['predicted.Stance'] = predicted_stance

# Printing the final result comparing actual stance with predicted stance ( calculated using formula)

In [39]:
DataFrameCombination

Unnamed: 0,topicText,claims.claimCorrectedText,claims.stance,predicted.Stance
0,This house believes that the sale of violent v...,Exposure to violent video games causes at leas...,PRO,PRO
1,This house believes that the sale of violent v...,video game violence is not related to serious ...,CON,PRO
2,This house believes that the sale of violent v...,some violent video games may actually have a p...,CON,PRO
3,This house believes that the sale of violent v...,exposure to violent video games causes both sh...,PRO,PRO
4,This house believes that the sale of violent v...,Violent video games increase the violent tende...,PRO,PRO
5,This house believes that the sale of violent v...,No conclusive link was found between video gam...,CON,PRO
6,This house believes that the sale of violent v...,violent video games are significantly associat...,PRO,PRO
7,This house believes that the sale of violent v...,video game publishers unethically train childr...,PRO,PRO
8,This house believes that the sale of violent v...,violent video games may increase mild forms of...,PRO,PRO
9,This house believes that the sale of violent v...,exposure to violent video games results in inc...,PRO,PRO
