In [1]:
import numpy as np
import pandas as pd
import pickle
from IPython.display import display_html
import torch
from transformers import BertForSequenceClassification, AutoTokenizer, TextClassificationPipeline



In [2]:
import os
import sys
shared_path = '../shared/'
if shared_path not in sys.path:
    sys.path.append(shared_path)
    
eval_path = '../evaluation/'
if eval_path not in sys.path:
    sys.path.append(eval_path)
    
mmd_path = '../mmd/'
if mmd_path not in sys.path:
    sys.path.append(mmd_path)
    
clr_path = '../contra-lexrank/'
if clr_path not in sys.path:
    sys.path.append(clr_path)

In [3]:
from Argument import Argument
from DataHandler import DataHandler

from SilhouetteCoefficient import SilhouetteCoefficient
from EdgeCorrelation import EdgeCorrelation
from SentenceArgReAllocator import SentenceArgReAllocator
from MMDBase import MMDBase
from TradeOffScorer import TradeOffScorer

The following shows some arguments from the corpus used by [Alshomary et al. (2020)](#Alshomary.2020), henceforth ArgsMe-Snippet corpus. This analysis aims at getting a gut feeling on how difficult the task of CAS is. Since this is a manually performed analysis, it is done only for a small subset of the arguments.

The pickle file already contains sentence embeddings that are computed using SBERT by [Reimers and Gurevych (2019)](#Reimers.2019) that will be used in a later step.

In [4]:
AMSC_PATH = '../../not-gitted/dataset_as_json_file.pickle'

In [5]:
data = DataHandler()
data.load_bin(AMSC_PATH)

# General information

We discard arguments that fall short of minimum length (3 sentences).

In [6]:
filtered_arguments = data.get_filtered_arguments([DataHandler.get_args_filter_length(length=3)])

In [7]:
display_html(f'The filter reduced the corpus size from 100 arguments to {len(filtered_arguments)}.', raw=True)

Look at some stats about the length.

In [8]:
contexts = DataHandler.get_query_context_keys(filtered_arguments)
records = list()
for c in contexts:
    arguments_c = DataHandler.get_query_context(filtered_arguments, c)
    
    records.append({
        'context': c,
        'number_of_args': len(arguments_c),
        'mean_length_in_sentences': np.mean([len(a.sentences) for a in arguments_c]),
        'std_length_in_sentences': np.std([len(a.sentences) for a in arguments_c]),
        'min_length_in_sentences': len(min(arguments_c, key=lambda a: len(a.sentences)).sentences),
        'max_length_in_sentences': len(max(arguments_c, key=lambda a: len(a.sentences)).sentences),
        'argument_lengths': [len(a.sentences) for a in arguments_c]
    })
    
pd.DataFrame.from_records(records)

Unnamed: 0,context,number_of_args,mean_length_in_sentences,std_length_in_sentences,min_length_in_sentences,max_length_in_sentences,argument_lengths
0,vegan,8,34.875,31.640312,3,84,"[18, 84, 84, 5, 47, 3, 32, 6]"
1,feminism,9,7.333333,6.765928,3,26,"[4, 7, 3, 4, 5, 8, 5, 4, 26]"
2,brexit,9,15.555556,14.667508,3,48,"[33, 9, 14, 4, 7, 48, 3, 19, 3]"
3,death_penalty,9,14.111111,9.085329,4,36,"[6, 12, 13, 6, 4, 36, 15, 18, 17]"
4,trump,5,16.8,2.481935,14,20,"[14, 19, 20, 17, 14]"
5,google,8,8.75,4.351724,3,17,"[13, 3, 6, 10, 6, 17, 10, 5]"
6,nuclear_energy,3,36.0,31.822424,13,81,"[81, 13, 14]"
7,climate change,6,15.5,15.777093,4,50,"[7, 8, 9, 15, 4, 50]"
8,abortion,10,15.8,13.962808,3,48,"[6, 3, 10, 17, 32, 24, 6, 6, 6, 48]"
9,donald trump,10,13.9,14.727186,3,53,"[6, 3, 28, 9, 7, 12, 3, 12, 6, 53]"


# Snippet Analysis

## Feminism

Let's have a more detailed look at some arguments and their generic snippets. We start with a few short arguments to see how complicated it is to keep everything in mind. Therefore, we chosse some arguments from context 'feminism'. Highlighting marks snippet sentences.

In [10]:
feminism_args = DataHandler.get_query_context(filtered_arguments, 'feminism')

def print_arg(arg, hl=None):
    html_str = f'<p style="text-align:right">{arg.arg_id}</p><div style="border: solid black 1px; padding-left: 5px">'
    tex_str = r"""\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{""" + arg.arg_id + r"""}}\\
        \hline""" + '\n'
    for idx, s in enumerate(arg.sentences):
        if hl is None:
            html_str += f'<span style="color:grey">{idx+1:02d} </span><span style="width:50%; background-color:{"yellow" if s in arg.snippet else "white"}">{s}</span><br>'
            tex_str += (r'\textcolor{gray}{'+ f'{idx+1:02d}' + r'}&'+ ('\\hl' if s in arg.snippet else '') + r'{'+ s +r'}\\' + '\n')
        else:
            html_str += f'<span style="color:grey">{idx+1:02d} </span><span style="width:50%; background-color:{"yellow" if idx in hl else "white"}">{s}</span><br>'
            tex_str += (r'\textcolor{gray}{'+ f'{idx+1:02d}' + r'}&'+ ('\\hl' if idx in hl else '') + r'{'+ s +r'}\\' + '\n')
    display_html(html_str+ '</div>', raw=True)
    print(tex_str+'\hline\n\end{tabularx}')
    
print_arg(feminism_args[0])
print_arg(feminism_args[1])
print_arg(feminism_args[2])
print_arg(feminism_args[3])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-01}}\\
        \hline
\textcolor{gray}{01}&{Do American women still need feminism?}\\
\textcolor{gray}{02}&\hl{A controversial social media movement called Women Against Feminism features women explaining " mostly in "selfies" with handwritten signs " why they do not.}\\
\textcolor{gray}{03}&{Feminist responses have ranged from bafflement to vitriol or mockery to arguments that these women don"t know what feminism is.}\\
\textcolor{gray}{04}&\hl{But while this new movement has its silly aspects, it raises some much-needed questions about feminism"s present and future state " and, in the weeks since it first attracted notice, many prominent feminists have helped validate some of the criticisms.}\\
\hline
\end{tabularx}


\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-02}}\\
        \hline
\textcolor{gray}{01}&{Women don't need feminism.}\\
\textcolor{gray}{02}&{They are highly capable of taking care of themselves, but you know who does?}\\
\textcolor{gray}{03}&{Women who get splashed in the face with acid because they want education.}\\
\textcolor{gray}{04}&{Woman who are mistreated and not respected.}\\
\textcolor{gray}{05}&\hl{Women do not need modern Feminism.}\\
\textcolor{gray}{06}&\hl{They do want equality just as men, but feminism is more just than opportunities.}\\
\textcolor{gray}{07}&{It is about equality or more likely dominating men.}\\
\hline
\end{tabularx}


\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-03}}\\
        \hline
\textcolor{gray}{01}&\hl{Feminism says they want the equality but the definition of the Feminism doesn't say anything about promoting men's rights so it basically assumes men are not oppressed so it's unnecessary to promote the men's rights.}\\
\textcolor{gray}{02}&{but this is completely a myth because men are oppressed all the time from every single way.}\\
\textcolor{gray}{03}&\hl{so modern Feminism is not about equality it's about Female Supremacy to suppress to dominant over men.}\\
\hline
\end{tabularx}


\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-04}}\\
        \hline
\textcolor{gray}{01}&{One might assume that Women Against Feminism is a traditionalist backlash against gender equality.}\\
\textcolor{gray}{02}&\hl{Yet many of the women say they reject feminism precisely because they are pro-equality.}\\
\textcolor{gray}{03}&\hl{A blogger who goes by AstrokidNJ has analyzed a week"s worth of posts on Women Against Feminism and found that 46 percent were egalitarian, 19 percent endorsed men"s issues, and 12 percent criticized feminist intolerance toward dissent.}\\
\textcolor{gray}{04}&{Only 23 percent reflected traditionalist views such as support for distinct sex roles, chivalry, or full-time motherhood.}\\
\hline
\end{tabularx}


At a first sight, the shown arguments convey con-stance towards the issue. Their criticism concentrate on the aspect that modern feminism is not about equality anymore, and this triggered the movement _Women Against Feminism_. Looking at the highlighted sentences &mdash; the snippets &mdash;, leads to the observation that they capture the lack-of-equality aspect pretty well despite the different wording. But since this is the most prevalent aspect, every snippet contains it. Solely the first argument has a slightly different perspective: It rather considers criticism on the movement. Anyhow, it is noticeable that these rather short arguments do not capture many more aspects, thus contrastive summarization can only try to highlight different details on the present aspect(s) (instead of highlighting different aspects).

If we try to select sentences for a constrative snippet from the above arguments, we most likely leave the first snippet as it is. For the second argument, we select its second sentence instead the sixth since it provides the autonomy aspect. And to increase readability, we select the first instead of the fifth.

In [10]:
feminism_args[0].excerpt_indices = [0,2]
feminism_args[1].excerpt_indices = [0,1]
print_arg(feminism_args[1], hl=[0,1])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-02}}\\
        \hline
\textcolor{gray}{01}&\hl\textcolor{gray}{02}&\hl{Women who get splashed in the face with acid because they want education.}\\
{Woman who are mistreated and not respected.}\\
{Women do not need modern Feminism.}\\
{They do want equality just as men, but feminism is more just than opportunities.}\\
{It is about equality or more likely dominating men.}\\
\hline
\end{tabularx}


In argument feminism-03 is only one other possibility to select. But since the second sentence builds on the first, and the third is a kind of conclusion, we cannot reject the first sentence. By taking the only possible option, we also add information on oppression of men.

In [11]:
feminism_args[2].excerpt_indices = [0,1]
print_arg(feminism_args[2], hl=[0,1])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-03}}\\
        \hline
\textcolor{gray}{01}&\hl\textcolor{gray}{02}&\hl{so modern Feminism is not about equality it's about Female Supremacy to suppress to dominant over men.}\\
\hline
\end{tabularx}


Please note that sentence 3 of feminism-03 is quite similar to sentence 2 of feminism-04. And as we throw sentence 3 out of the snippet of feminism-03, we have the possibility to keep sentence 2 for the last snippet. Sentence 3 should still be a part of the snippet because it provides additional statistics that the other arguments do not have.

In conclusion, we saw that these rather short arguments does not have multiple aspects that they cover. Consequently, contrastive argument summarization can not select another one, but it has to highlight different details, which indeed seems to be possible.

## Trump

Now, we continue with arguments that have more sentences and hopefully more aspects they cover. Since it might be difficult to keep multiple argument with more than 30 sentences in mind, we look at some from context 'trump' and 'death_penalty' with 10&ndash;20 sentences.

In [11]:
trump_args = DataHandler.get_query_context(filtered_arguments, 'trump')

We skipped trump-01 at this point because it was either meant as a rhetorical joke, or the author misunterstood the topic. Argument trump-03 falls short of the length requirement.

In [12]:
#print_arg(trump_args[0])
print_arg(trump_args[1])
print_arg(trump_args[2])
print_arg(trump_args[3])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{trump-02}}\\
        \hline
\textcolor{gray}{01}&\hl{Trump is the most openly racist Presidential candidate since pre Civil Rights.}\\
\textcolor{gray}{02}&{Source 1: ( here comes Donald Trump, who started his campaign by ranting about how Mexican immigrants are rapists and drug dealers (...) After a Black Lives Matter protester was punched and kicked at a Trump rally, Trump said "Maybe he should have been roughed up."}\\
\textcolor{gray}{03}&{And he retweeted a graphic with fake statistics about black people supposedly murdering whites, which turns out to have been created by a neo-Nazi.}\\
\textcolor{gray}{04}&{(...) the thing Trump is trying to communicate with this story.}\\
\textcolor{gray}{05}&{(...) Your Muslim friends and neighbors?}\\
\textcolor{gray}{06}&{They're not the assimilated, patriotic Americans they want you to believe.}\\
\textcolor{gray}{07}&{They're not regular people with jobs and f

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{trump-04}}\\
        \hline
\textcolor{gray}{01}&{AustinMe forfeited this round.}\\
\textcolor{gray}{02}&{Pro Here is the source I will be using: - This website lists 13 reasons why Trump will make a great president.}\\
\textcolor{gray}{03}&{I am only going to copy and comment on two of them.}\\
\textcolor{gray}{04}&{12.}\\
\textcolor{gray}{05}&{"Trump is planning to build a great, great wall (yes"two "greats") on our southern border between the U.S. and Mexico.}\\
\textcolor{gray}{06}&{The best part?}\\
\textcolor{gray}{07}&{He"s planning to have Mexico pay for that wall, and mark his words, "" I will immediately terminate President Obama"s illegal executive order on immigration.""}\\
\textcolor{gray}{08}&{"Plus, with less authentic delicious Mexican food here in the U.S., obesity rates will probably drop."}\\
\textcolor{gray}{09}&\hl{-regardless of of Trump's racist bigotry used to appeal to an old, rac

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{trump-05}}\\
        \hline
\textcolor{gray}{01}&{First of all, I support O'Malley, but that isn't important.}\\
\textcolor{gray}{02}&{It looks like this debate comes down to Trump's stance on immigration.}\\
\textcolor{gray}{03}&{I showed clear harms from racism - shattered communities, (look at Ferguson if you want proof lol) poor mental and physical health, economic downturns, etc..}\\
\textcolor{gray}{04}&{If Trump becomes president and embraces racism, all these impacts will go nationwide.}\\
\textcolor{gray}{05}&{My opponent has no response to this, so insofar as I can prove Trump is racist, you vote neg.}\\
\textcolor{gray}{06}&{And I showed clear evidence of that, but if you want more, here's how illegal immigrants are treated.}\\
\textcolor{gray}{07}&{Source 3: ( These Minutemen inspired by rightwing Republican Pat Buchanan's claim that urgent action is needed to preserve the U.S. as an ethnicall

In [14]:
trump_args[1].excerpt_indices = [0,9]
trump_args[2].excerpt_indices = [8,17]
trump_args[3].excerpt_indices = [8,16]

The arguments above convey con stance towards Trump. They express criticism that addresses mainly racism and his position towards immigration. trump-02 starts with showing that Trump is a racist, continues with declaring racism is wrong, and finally gives evidence that it harms society. Its corresponding snippet points out that he is a racist and racism is wrong. trump-04 takes up some points made by pro-side regarding racism and immigration. Its first snippet sentence is the author's comment on the pro-point, while the second seems to be part of the pro-point itself. The last argument gives reasons why racism is bad, especially on a nationwide scale. Furthermore, it mentions problems with a militia that hunts immigrants. Its snippet captures the latter and that Trump causes such organizations.

Finding other snippets that introduce more contrastiveness seems to be difficult (if we also want to preserve some representativeness): Even though the main aspects are racism and immigration, each of the arguments shows different facets and is thereby already sufficiently contrastive. 

## Death penalty

In [13]:
dp_arguments = DataHandler.get_query_context(filtered_arguments, 'death_penalty')

In [14]:
print_arg(dp_arguments[5])
print_arg(dp_arguments[6])
print_arg(dp_arguments[7])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{death_penalty-07}}\\
        \hline
\textcolor{gray}{01}&{Rebuttal 1: Why not just do it with a firing squad?}\\
\textcolor{gray}{02}&{Let them donate the guns and ammunition to kill them.}\\
\textcolor{gray}{03}&{Let the family or concerned citizens execute them or you can just hang them or use the electric chair.}\\
\textcolor{gray}{04}&{All of these are cost efficient ways of executing the death penalty.}\\
\textcolor{gray}{05}&{My opponent may also state that the trial will take longer, but the trial is going to cost money anyways.}\\
\textcolor{gray}{06}&{Death penalty or no death penalty.}\\
\textcolor{gray}{07}&{We can shorten the trials.}\\
\textcolor{gray}{08}&{My opponent also needs to take into account how much money it takes to feed someone for life.}\\
\textcolor{gray}{09}&{Rebuttal 2: These are just taken by citizens who have not witnessed the death penalty and don't know the effect of the d

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{death_penalty-08}}\\
        \hline
\textcolor{gray}{01}&{My opponent speaks no form of fact or statistic but morally on his/her opinion.}\\
\textcolor{gray}{02}&{I could sit here and talk about how I think the death penalty is good.}\\
\textcolor{gray}{03}&{But i'm not using facts or sources.}\\
\textcolor{gray}{04}&{If you need a reason on why the death penalty is a good idea; here are a few.}\\
\textcolor{gray}{05}&\hl{One small but important reason on why the death penalty is a good idea is that it decreases the prisoner population, which saves even more tax payers money.}\\
\textcolor{gray}{06}&\hl{Another reason why the death penalty is a good idea, Once a criminal is executed, he cannot kill again.}\\
\textcolor{gray}{07}&{The case for this would be Kenneth Allen McDuff who was on Death Row when the death penalty was declared unconstitutional in the 1970's.}\\
\textcolor{gray}{08}&{His sentence was

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{death_penalty-09}}\\
        \hline
\textcolor{gray}{01}&{The death penalty has a purpose.}\\
\textcolor{gray}{02}&{People who have killed many and even thousands cant be sentence to death?}\\
\textcolor{gray}{03}&{Death penalty should exist because when someone commits a crime that's so violent the end result can cause hundreds to die.}\\
\textcolor{gray}{04}&{If there is no death penalty then the maniac will still kill more people.}\\
\textcolor{gray}{05}&{Death penalty is used to get rid of bad people so bad that hundreds die.}\\
\textcolor{gray}{06}&{If you think the Death penalty is bad because it kills people you must not know what the purpose of it is.}\\
\textcolor{gray}{07}&{The death penalty is a way to put fear into the criminal and to put fear into others.}\\
\textcolor{gray}{08}&{Crime has decreased and the death penalty still lives.}\\
\textcolor{gray}{09}&{Several tests have shown that the 

In contrast to the Trump debate, death penalty comprises more aspects, e.g. costs, deterrence, number of inmates, elimination of repeated crimes (committed by one person). Looking at the snippets, leads to the observation that they briefly mention nearly all of the aspects, and thus they are not contrastive. To split the information to the different snippets the following snippets seem suitable. death_penalty-07's snippet keeps sentence 28 and takes sentence 35 instead of 29.

In [17]:
dp_arguments[5].excerpt_indices = [27,34]
print_arg(dp_arguments[5], hl=[27,34])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{death_penalty-07}}\\
        \hline
{Rebuttal 1: Why not just do it with a firing squad?}\\
{Let them donate the guns and ammunition to kill them.}\\
{Let the family or concerned citizens execute them or you can just hang them or use the electric chair.}\\
{All of these are cost efficient ways of executing the death penalty.}\\
{My opponent may also state that the trial will take longer, but the trial is going to cost money anyways.}\\
{Death penalty or no death penalty.}\\
{We can shorten the trials.}\\
{My opponent also needs to take into account how much money it takes to feed someone for life.}\\
{Rebuttal 2: These are just taken by citizens who have not witnessed the death penalty and don't know the effect of the death penalty.}\\
{You also fail to understand the variety of states.}\\
{States have different Almost all of these non-death penalty states, are generally small states.}\\
{You also have to

Since the decrease of prisoner population, the consequent savings, and the elimination of repeated crimes are not part of the first argument's snippet, we adhere to the snippet for death_penalty-08. Regarding the last argument (death_penalty-09), we choose sentence 7 to add the fear aspect, and sentence 10 to introduce the human rights question.

In [18]:
dp_arguments[6].excerpt_indices = [4,5]
dp_arguments[7].excerpt_indices = [6,9]
print_arg(dp_arguments[7], hl=[6,9])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{death_penalty-09}}\\
        \hline
{The death penalty has a purpose.}\\
{People who have killed many and even thousands cant be sentence to death?}\\
{Death penalty should exist because when someone commits a crime that's so violent the end result can cause hundreds to die.}\\
{If there is no death penalty then the maniac will still kill more people.}\\
{Death penalty is used to get rid of bad people so bad that hundreds die.}\\
{If you think the Death penalty is bad because it kills people you must not know what the purpose of it is.}\\
\textcolor{gray}{07}&\hl{Crime has decreased and the death penalty still lives.}\\
{Several tests have shown that the death penalty is an effective deterrent of future crimes.}\\
\textcolor{gray}{10}&\hl{To say that we are lowering ourselves to that level is wrong.}\\
{Killing over 200 innocent lives.}\\
{We have to look as well at the possibilities of these criminals co

# Sentence graph

This section sheds light on the semantic similarity of sentences according to the sentences embeddings. To do so, we look at the feminism context along with the sentence similarity graphs. Nodes of the graphs are sentences, each is annotated with the argument it belongs to and the sentence number as in the text block. Edges represent similarity, the lighter the edge the lower the similarity and vice versa.

In [19]:
print_arg(feminism_args[0])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-01}}\\
        \hline
\textcolor{gray}{01}&{Do American women still need feminism?}\\
\textcolor{gray}{02}&\hl{A controversial social media movement called Women Against Feminism features women explaining " mostly in "selfies" with handwritten signs " why they do not.}\\
\textcolor{gray}{03}&{Feminist responses have ranged from bafflement to vitriol or mockery to arguments that these women don"t know what feminism is.}\\
\textcolor{gray}{04}&\hl{But while this new movement has its silly aspects, it raises some much-needed questions about feminism"s present and future state " and, in the weeks since it first attracted notice, many prominent feminists have helped validate some of the criticisms.}\\
\hline
\end{tabularx}


<img src="feminism-01.png" height="500" width="500"/>

We can observe that the snippet sentences are central in terms of being similar to the remaining ones, and therefore, they are representative for the argument.

In [20]:
print_arg(feminism_args[1])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-02}}\\
        \hline
\textcolor{gray}{01}&{Women don't need feminism.}\\
\textcolor{gray}{02}&{They are highly capable of taking care of themselves, but you know who does?}\\
\textcolor{gray}{03}&{Women who get splashed in the face with acid because they want education.}\\
\textcolor{gray}{04}&{Woman who are mistreated and not respected.}\\
\textcolor{gray}{05}&\hl{Women do not need modern Feminism.}\\
\textcolor{gray}{06}&\hl{They do want equality just as men, but feminism is more just than opportunities.}\\
\textcolor{gray}{07}&{It is about equality or more likely dominating men.}\\
\hline
\end{tabularx}


<img src="feminism-02.png" height="500" width="500"/>

We can observe a similar pattern as in the previous graph: The snippet (sentence 5 and 6) have a higher degree centrality than the others.

In [21]:
print_arg(feminism_args[2])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{feminism-03}}\\
        \hline
\textcolor{gray}{01}&\hl{Feminism says they want the equality but the definition of the Feminism doesn't say anything about promoting men's rights so it basically assumes men are not oppressed so it's unnecessary to promote the men's rights.}\\
\textcolor{gray}{02}&{but this is completely a myth because men are oppressed all the time from every single way.}\\
\textcolor{gray}{03}&\hl{so modern Feminism is not about equality it's about Female Supremacy to suppress to dominant over men.}\\
\hline
\end{tabularx}


<img src="feminism-03.png" height="500" width="500"/>

Again, the snippet seems well selected to represent the argument.

For CAS, it is interesting to see similarities across single arguments. The following graphs display the above arguments' sentences, and only the snippet sentences have colored nodes (generic snippet in the left-hand side graph, and previously selected contrastive snippet on the right-hand side).

<img src="feminism-01-02-03.png" height="500" width="1200"/>

The contrastive snippet of feminism-02 was completely changed compared to the generic one. At least sentence 2 seems to be less connected than sentence 6, and thus, a step towards more contrastiveness among the snippets. A similar situation holds for feminism-03:03 which was rejected to include feminism-03:02 instead. Based on this, it seems possible to infer contrastiveness and corresponding snippets from similarity of sentence embeddings.

Now, we will look at the snippet graph as above and think about which sentences are not sufficiently contrastive in the context. We start with two short arguments from context 'climate change'. As we know from the feminism example, we expect rather few distinct aspects but different facets and details to extract with the contrastive snippet.

In [22]:
cc_arguments = DataHandler.get_query_context(filtered_arguments, 'climate change')

| <img src="cc-06-08-heatmap.png" width="700" height="500" /> | <img src="cc-06-08.png" width="700" height="500" /> |
|:---:|:---:|
| inter-sentence similarities | sentence similarity graph |

The figure above pictures sentences from climate change-06 and 08. The heatmap on the left-hand displays the same similarities as the edges in the graph (CC-08 on the x-axis, CC-06 on the y-axis). We see that snippet sentences are connected not only among sentences from their arguments but also across argument borders. Especially, CC-06:02 is similar to CC-08:01 and CC-08:02, and thus, is a good candidate to be dropped for a contrastive snippet. Sentences CC-06:01 and CC-06:03 are pretty connected among the sentences of their argument and therefore might be representative while they are less similar to sentences of CC-08. If we select CC-06:01 and CC-06:03 as the contrastive snippet, we choose CC-08:03 instead of CC-08:01 to further increase contrastivness between the snippets. The arguments' texts are shown below. The first print highlights generic snippets and the second highlights the contrastive ones.

In [23]:
# generic snippets
print_arg(cc_arguments[2], hl=[1,3])
print_arg(cc_arguments[4], hl=[0,1])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{climate change-06}}\\
        \hline
{My position on climate change is that not only do I believe in it but I believe it is man caused.}\\
\textcolor{gray}{02}&\hl{97% of scientists believe that climate change is primarily human caused.}\\
\textcolor{gray}{04}&\hl{I wish my opponent the best of luck.}\\
{Sources(s): Shaftel, H. (Ed.)}\\
{.}\\
{(2012, January 5).}\\
{Global Climate Change: Consensus.}\\
\hline
\end{tabularx}


\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{climate change-08}}\\
        \hline
\textcolor{gray}{01}&\hl\textcolor{gray}{02}&\hl{Out of 918 peer-reviewed scientific papers on this subject, 0% disagreed that climate change is happening, but in newspaper articles, 53% were unsure.}\\
{This proves that climate change is happening, but scientists are having trouble conveying the information and other data to the people of the world.}\\
\hline
\end{tabularx}


Both of the generic snippets point out that climate change is a proven fact and that it causes global warming. climate change-06 also states, it is human-caused.

In [24]:
# contrastive snippets
cc_arguments[2].excerpt_indices = [0,2]
cc_arguments[4].excerpt_indices = [1,2]
print_arg(cc_arguments[2], hl=[0,2])
print_arg(cc_arguments[4], hl=[1,2])

\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{climate change-06}}\\
        \hline
\textcolor{gray}{01}&\hl{I will start My opening argument to state that global warming is a proven fact and anyone that disagrees with me is ignorant on this subject.}\\
\textcolor{gray}{03}&\hl{Also the arguments stated by Human-caused climate change deniers are pointless and not fact driven.}\\
{I wish my opponent the best of luck.}\\
{Sources(s): Shaftel, H. (Ed.)}\\
{.}\\
{(2012, January 5).}\\
{Global Climate Change: Consensus.}\\
\hline
\end{tabularx}


\noindent\begin{tabularx}{\textwidth}{|rX|}
		\multicolumn{2}{r}{\textcolor{gray}{climate change-08}}\\
        \hline
{Climate Change is causing the Earth to warm up measurably, and there are already signs of disaster.}\\
\textcolor{gray}{02}&\hl\textcolor{gray}{03}&\hl{This proves that climate change is happening, but scientists are having trouble conveying the information and other data to the people of the world.}\\
\hline
\end{tabularx}


The contrastive snippet of climate change-06 shifts the focus solely on human-caused climate change aspect. climate change-08's contrastive snippet adheres to sentence 02 and adds new background information with sentence 03. Comparing both CC-08's and CC-06's generic snippets with their contrastive counterparts, leads to the observation that the latter are indeed more contrastive.

This example has shown that finding contrastive snippets is also possible using only sentence similarity information. Also, without the need to keep the arguments in mind, it seems to be easier.

# Remarks on snippet creation
This examination investigated how contrastive the generic snippets already are and how difficult it is to select contrastive ones. It turned out that most of generic snippets analyzed here have the potential to be more contrastive. To find a contrastive snippet for a given context is a non-trivial task but is manually feasible with short arguments.

# Evaluation measures

This section reveals how the automatic metrics evaluate the generic and contrastive snippet as created above. We assess the three dimensions representativeness, contrastiveness, and argumentativeness. The former two are computed based on the spatial position of the sentences in the embedding space. Since argumentativeness is not encoded in the same way, we employ automatic argument quality assessment by [Gretz et al. (2020)](#Gretz.2020). For all measures, it holds higher values are better.

In [25]:
args_to_evaluate = [
    cc_arguments[2],
    cc_arguments[4],
    feminism_args[0],
    feminism_args[1],
    feminism_args[2],
    dp_arguments[5],
    dp_arguments[6],
    dp_arguments[7],
    trump_args[1],
    trump_args[2],
    trump_args[3],
]

In [26]:
silhouette = SilhouetteCoefficient()
silhouette_generic = silhouette.silhouette_coefficient(args_to_evaluate)

In [27]:
edge = EdgeCorrelation()
edge_generic = edge.edge_correlation(args_to_evaluate)

In [28]:
# Pseudo-clustering
realloc = SentenceArgReAllocator()
realloc.prepare_snippet_embeddings(args_to_evaluate)
realloc.re_allocate(args_to_evaluate)
new_args = realloc.convert_to_argument()

In [29]:
silhouette = SilhouetteCoefficient()
silhouette_contrastive = silhouette.silhouette_coefficient(new_args)

In [30]:
edge = EdgeCorrelation()
edge_contrastive = edge.edge_correlation(new_args)

The first assumption states that contrastive snippets form better clusters than the generic counterparts. In order to measure this difference in clustering performance, we compute silhouette coefficient that relates the intra- and inter-cluster distances, and
edge correlation which inspects correlation between similarity and cluster belonging. Results are shown in the table blow.

In [31]:
table = pd.DataFrame.from_records([silhouette_generic, silhouette_contrastive, {k:edge_generic[k].correlation for k in edge_generic.keys()}, {k:edge_contrastive[k].correlation for k in edge_contrastive.keys()}])
table.index = ['silhouette_generic', 'silhouette_contrastive', 'edge_generic', 'edge_contrastive']
table.transpose()

Unnamed: 0,silhouette_generic,silhouette_contrastive,edge_generic,edge_contrastive
trump,0.026988,0.053604,0.185904,0.194402
climate change,-0.013672,0.040551,0.011311,0.164829
feminism,0.029515,0.244845,0.098836,0.431865
death_penalty,-0.025475,0.045079,-0.010937,0.142791


In [44]:
ltable = table.transpose()
s_g_c = ltable.silhouette_contrastive.values - ltable.silhouette_generic.values
rel_s_g_c = s_g_c / abs(ltable.silhouette_generic.values)
ltable.insert(loc = 2,
          column = 'diff',
          value = s_g_c)
#ltable.insert(loc = 3,
#          column = 'rel diff',
#          value = rel_s_g_c)
e_g_c = ltable.edge_contrastive.values - ltable.edge_generic.values
ltable.insert(loc = 5,
          column = 'diff_e',
          value = e_g_c)
ltable

Unnamed: 0,silhouette_generic,silhouette_contrastive,diff,edge_generic,edge_contrastive,diff_e
trump,0.026988,0.053604,0.026616,0.185904,0.194402,0.008498
climate change,-0.013672,0.040551,0.054223,0.011311,0.164829,0.153518
feminism,0.029515,0.244845,0.21533,0.098836,0.431865,0.333029
death_penalty,-0.025475,0.045079,0.070554,-0.010937,0.142791,0.153728


In [45]:
print(ltable.to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} &  silhouette\_generic &  silhouette\_contrastive &      diff &  edge\_generic &  edge\_contrastive &    diff\_e \\
\midrule
trump          &            0.026988 &                0.053604 &  0.026616 &      0.185904 &          0.194402 &  0.008498 \\
climate change &           -0.013672 &                0.040551 &  0.054223 &      0.011311 &          0.164829 &  0.153518 \\
feminism       &            0.029515 &                0.244845 &  0.215330 &      0.098836 &          0.431865 &  0.333029 \\
death\_penalty  &           -0.025475 &                0.045079 &  0.070554 &     -0.010937 &          0.142791 &  0.153728 \\
\bottomrule
\end{tabular}



We can observe that both silhouette coefficient and edge correlation yield higher values for the contrastive snippets than the generic ones, as expected. Since we did not changed the contrastive snippet compared to the generic for the context 'trump', we observe only a marginal change. Context 'feminism' shows the largest increase, followed by death_penalty and climate change. Furthermore, we see that the metrics depict a similar situation.

We now focus on representativeness which we measure by the weighted degree centrality in the sentence similarity graph (what we did when we looked at the graphs earlier, just with numbers), and the cosine similarity between the snippets and the complete argument.

In [32]:
scorer = TradeOffScorer()
scorer.transform(args_to_evaluate)

In [33]:
sim_func = MMDBase(param_gamma=.0, param_lambda=.0).cosine_kernel_matrix

In [34]:
representativeness = list()
for a in args_to_evaluate:
    sim_mat = sim_func(torch.tensor(a.sentence_embeddings))
    g0 = a.sentences.index(a.snippet[0])
    g1 = a.sentences.index(a.snippet[1])
    c0 = a.excerpt_indices[0]
    c1 = a.excerpt_indices[1]
    gdc = float(sum(sim_mat[g0])+sum(sim_mat[g1]))
    cdc = float(sum(sim_mat[c0])+sum(sim_mat[c1]))
    representativeness.append({
        'id': a.arg_id,
        'generic_degree_centrality':gdc,
        'contrastive_degree_centrality':cdc ,
        'change_in_degree_centrality': cdc-gdc,
        'generic_soc': a.soc_sn,
        'contrastive_soc': a.soc_ex,
        'change_in_soc': a.soc_ex-a.soc_sn
    })
    
pd.DataFrame.from_records(representativeness)

Unnamed: 0,id,generic_degree_centrality,contrastive_degree_centrality,change_in_degree_centrality,generic_soc,contrastive_soc,change_in_soc
0,climate change-06,4.668279,4.668279,0.0,0.631397,0.631397,5.960464e-08
1,climate change-08,4.336589,3.66994,-0.666649,0.878635,0.830405,-0.04822963
2,feminism-01,5.767579,5.730258,-0.037321,0.947536,0.94431,-0.003226697
3,feminism-02,5.980761,5.268937,-0.711823,0.872578,0.787552,-0.08502597
4,feminism-03,3.902555,3.64998,-0.252576,0.927434,0.940326,0.01289177
5,death_penalty-07,19.561958,19.500824,-0.061134,0.697997,0.700546,0.002549171
6,death_penalty-08,10.267973,10.267973,0.0,0.758006,0.758006,1.192093e-07
7,death_penalty-09,18.41498,15.602402,-2.812578,0.875187,0.789306,-0.08588105
8,trump-02,15.187593,15.187593,0.0,0.79056,0.79056,0.0
9,trump-04,8.598515,8.598515,0.0,0.605476,0.605476,-1.788139e-07


The degree centrality is zero where we left the snippets unchanged. All other contrastive snippets show a decrease compared to the generics, but this is expected as we have to drop a representativeness to some degree in order to integrate contrastiveness. Put differently, we take a step away from the snippet that represents the argument optimally.

The snippet-original comparison (soc), i.e. the cosine similarity between the snippet and the complete argument, shows a slightly different situation. First, note that the `change_in_soc` should be zero where `change_in_degree_centrality` is zero, too. It seems, we face floating point computation errors. In almost all cases, soc shows a negative change, too. In contrast, feminism-03, death_penalty-07, death_penalty-08 depict an increase in similarity between the contrastive snippet and the argument compared to the generic snippet and the argument. This indicates the contrastive snippet matches the argument better than the generic one, and therefore, no loss in representativeness.

Finally, we consider the argumentative nature of the summarized texts. We fine-tuned a pre-trained BERT model on the regression task defined by [Gretz et al. (2020)](#Gretz.2020). Target values are their weighted-average scores. They reported a performance of Spearman correlation  of $\rho = .47$ and Pearson correlation coefficient of $r=.51$. Our model achieves similar results:
```log
2021-08-03 13:17:30,787 __main__ 	 [INFO] 	 Pearson r: (0.5152595582724522, 0.0)
2021-08-03 13:17:30,787 __main__ 	 [INFO] 	 Spearman p: SpearmanrResult(correlation=0.4673681293955043, pvalue=0.0)
```

In [35]:
model = BertForSequenceClassification.from_pretrained('../bert-finetuning/results/argQ-bert-base-uncased', local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
pipeline = TextClassificationPipeline(model=model, tokenizer=tokenizer, framework='pt', task='ArgQ')

In [36]:
generic_snippets = [" ".join(a.snippet) for a in args_to_evaluate]

In [None]:
generic_results = pipeline(generic_snippets, device=-1)

In [None]:
contrastive_snippets = [" ".join(np.take(a.sentences, a.excerpt_indices)) for a in args_to_evaluate]

In [None]:
contrastive_results = pipeline(contrastive_snippets, device=-1)

In [None]:
arg_score_records = list()
for i, a in enumerate(args_to_evaluate):
    arg_score_records.append({
        'id': a.arg_id,
        'generic_snippets_argumentativeness': generic_results[i]['score'],
        'contrastive_snippets_argumentativeness': contrastive_results[i]['score'],
        'change': contrastive_results[i]['score']-generic_results[i]['score'],
    })

In [None]:
pd.DataFrame.from_records(arg_score_records)

We assess the argumentativeness/quality for all snippet sentences jointly. The results show slightly decreasing to slightly increasing, but mostly stable values.

# Remarks on evaluation
Assessment of the contrastiveness show expected results. We can take the fact that the scores for context 'trump' changed only slightly as a hint that the generic snippets are already contrastive while also representing their arguments. A decrease in the representativeness assessment was expected, too. Only the increase of the snippet-original comparison of some arguments give rise to the question whether the metrics are suitable or if face an unexpected phenomenon of representative contrastive snippets. Regarding the argumentativeness, we have to keep in mind that scores themselves are results of another model which might not reflect reality perfectly.

# References

<a id="Alshomary.2020"></a> 
Milad Alshomary, Nick Düsterhus, and Henning Wachsmuth. 2020. Extractive snippet generation for arguments. In SIGIR ’20, pages 1969–1972, New York, NY. Association for Computing Machinery.

<a id="Reimers.2019"></a> 
Nils Reimers and Iryna Gurevych. 2019. Sentence-bert: Sentence embeddings using siamese bert-networks. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics.

<a id="Gretz.2020"></a> 
Shai Gretz, Roni Friedman, Edo Cohen-Karlik, Assaf Toledo, Dan Lahav, Ranit Aharonov, and Noam Slonim. 2020. A large-scale dataset for argument quality ranking: Construction and analysis. Proceedings of the AAAI Conference on Artificial Intelligence, 34(05):7805–7813.