In [1]:
### This notebook contains rough code for constructing Latex tables with experimental results

In [72]:
from tabulate import tabulate
from texttable import Texttable

import latextable
import numpy as np

In [73]:
import sys
sys.path.append('../src')

from misc import open_dict, get_file_names


In [143]:
settings = open_dict('../results/ablation_testing/settings_params_from_chosen_each_timeProppLearner_from_allen_short.p')

### Classifier Results

In [144]:
## #create dict arranged differently: datast, sampling strategy, experiment, acc/prec/recall/f1

datasetNames = list(settings.keys())
print(datasetNames)
experimentNames = list(settings[datasetNames[0]]['results'].keys())
print(experimentNames)
samplingStrategies = list(settings[datasetNames[0]]['results'][experimentNames[0]].keys())
print(samplingStrategies)
measures = list(settings[datasetNames[0]]['results'][experimentNames[0]][samplingStrategies[0]].keys())
print(measures)

['ProppLearner_from_gold', 'ProppLearner_from_allen', 'ProppLearner_from_heads_only', 'LitBank_from_gold', 'LitBank_from_allen', 'CEN_from_allen', 'CEN_from_heads_only', 'ProppLearner_from_allen_short']
['original features', 'index features', 'CoreNLP', 'additional features', 'additional features plus CN']
['normal', 'over', 'over_under']
['accuracy', 'precision', 'recall', 'f1']


In [145]:
resultsByName = {}

for datasetName in datasetNames:

    # if datasetName in ['ProppLearner_from_allen_short']:
    #     continue
    
    resultsByName[datasetName] = {}


    for samplingStrategyName in samplingStrategies:
        resultsByName[datasetName][samplingStrategyName] = {}

        for experimentName in experimentNames:
            resultsByName[datasetName][samplingStrategyName][experimentName] = {}

            for measureName in measures:
                resultsByName[datasetName][samplingStrategyName][experimentName][measureName] = 0

In [146]:
for datasetName, dataset in settings.items():
    # if datasetName in ['ProppLearner_from_allen_short']:
    #     continue
    
    results = dataset['results']
    for experimentName, experiment in results.items():
        for samplingStrategyName, samplingStrategy in experiment.items():
            for measureName, measure in samplingStrategy.items():
                resultsByName[datasetName][samplingStrategyName][experimentName][measureName] = measure


#### Table 1 (Original ALlenNLP vs using CoreNLP)

In [125]:
# rows = [['','Normal', 'Over', 'Over Under', 'Normal', 'Over', 'Over Under']]
# for expName, exp in settings.items():

#     row = [expName]

#     for resName in ['original features', 'CoreNLP']:

#         for key, val in exp['results'][resName].items():
#             row.append('$' + str(val['avg']) + '$')
        
#     rows.append(row)

rows = [['Sampling method','Accuracy', 'Precision', 'Recall','F-score','Accuracy', 'Precision', 'Recall','F-score']]

for datasetName, dataset in resultsByName.items():

    for samplingStratName, samplingStrat in dataset.items():

        row = ['$' + datasetName + ' ' + samplingStratName + '$']
        count = 0

        while count <= 1:
            for expName, exp in samplingStrat.items():
                
                if count == 0:

                    if expName == 'CoreNLP':

                        for measureName, res in exp.items():
                            row.append('$' +  str(res['avg'])  + '$')

                        count += 1
                
                elif count == 1:
                    if expName == 'original features':
                        for measureName, res in exp.items():
                            row.append('$' +  str(res['avg'])  + '$')

                        count += 1

                

        rows.append(row)

TypeError: 'int' object is not subscriptable

In [126]:
capt = 'A comparison between the the classification model in the original approach when features are constructed using statistical methods (CoreNLP), and the same model when features are constructed using methods based on PTLMs (AllenNLP).'

table = Texttable()
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES)
table.add_rows(rows)

print('\nTexttable Table')
print(table.draw())

print('\nTexttable Latex:')
print(latextable.draw_latex(table, caption=capt))


Texttable Table
Samplin | Accurac | Precis | Recall | F-scor | Accura | Precis | Recall | F-scor
   g    |    y    |  ion   |        |   e    |   cy   |  ion   |        |   e   
method  |         |        |        |        |        |        |        |       
$ProppL | $0.96$  | $0.69$ | $0.91$ | $0.78$ | $0.97$ | $0.74$ | $0.95$ | $0.83$
earner_ |         |        |        |        |        |        |        |       
from_go |         |        |        |        |        |        |        |       
  ld    |         |        |        |        |        |        |        |       
normal$ |         |        |        |        |        |        |        |       
$ProppL | $0.97$  | $0.81$ | $0.8$  | $0.8$  | $0.97$ | $0.75$ | $0.94$ | $0.83$
earner_ |         |        |        |        |        |        |        |       
from_go |         |        |        |        |        |        |        |       
  ld    |         |        |        |        |        |        |        |       
 over$  |  

In [114]:
capt = 'A comparison between the the classification model in the original approach when features are constructed using statistical methods (CoreNLP), and the same model when features are constructed using methods based on PTLMs (AllenNLP).'

table = Texttable()
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES)
table.add_rows(rows)

print('\nTexttable Table')
print(table.draw())

print('\nTexttable Latex:')
print(latextable.draw_latex(table, caption=capt))


Texttable Table
Samplin | Accurac | Precis | Recall | F-scor | Accura | Precis | Recall | F-scor
   g    |    y    |  ion   |        |   e    |   cy   |  ion   |        |   e   
method  |         |        |        |        |        |        |        |       
$ProppL | $0.97$  | $0.74$ | $0.95$ | $0.83$ | $0.96$ | $0.69$ | $0.91$ | $0.78$
earner_ |         |        |        |        |        |        |        |       
from_go |         |        |        |        |        |        |        |       
  ld    |         |        |        |        |        |        |        |       
normal$ |         |        |        |        |        |        |        |       
$ProppL | $0.97$  | $0.75$ | $0.94$ | $0.83$ | $0.97$ | $0.81$ | $0.8$  | $0.8$ 
earner_ |         |        |        |        |        |        |        |       
from_go |         |        |        |        |        |        |        |       
  ld    |         |        |        |        |        |        |        |       
 over$  |  

#### Table 2 (Indexes)

In [138]:
# rows = [['','Normal', 'Over', 'Over Under']]
# for expName, exp in settings.items():

#     if exp['OrigOnly']:
#         continue
    
#     row = [expName]

#     for resName in ['index features']:

#         for key, val in exp['results'][resName].items():
#             row.append('$' + str(val['avg']) + '$')
        
#     rows.append(row)


rows = [['Sampling method','Accuracy', 'Precision', 'Recall','F-score']]

for datasetName, dataset in resultsByName.items():

    if settings[datasetName]['OrigOnly']:
        continue

    for samplingStratName, samplingStrat in dataset.items():

        row = ['$' + datasetName + ' ' + samplingStratName + '$']

        for expName, exp in samplingStrat.items():
        
            if expName == 'index features':

                for measureName, res in exp.items():
                    row.append('$' +  str(res['avg'])  + '$')

        rows.append(row)

In [140]:
table = Texttable()
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES)
table.add_rows(rows)

print('\nTexttable Table')
print(table.draw())

print('\nTexttable Latex:')
print(latextable.draw_latex(table, caption="F-Scores for the classification model using Index features."))


Texttable Table
          Sampling method            | Accuracy | Precision | Recall | F-score
  $ProppLearner_from_gold normal$    |  $0.97$  |  $0.71$   | $0.95$ | $0.81$ 
   $ProppLearner_from_gold over$     |  $0.97$  |  $0.77$   | $0.95$ | $0.85$ 
$ProppLearner_from_gold over_under$  |  $0.97$  |   $0.8$   | $0.91$ | $0.85$ 
  $ProppLearner_from_allen normal$   |  $0.83$  |  $0.59$   | $0.87$ | $0.71$ 
   $ProppLearner_from_allen over$    |  $0.85$  |  $0.65$   | $0.82$ | $0.72$ 
$ProppLearner_from_allen over_under$ |  $0.85$  |  $0.65$   | $0.81$ | $0.72$ 
     $LitBank_from_gold normal$      |  $0.93$  |  $0.22$   | $0.73$ | $0.34$ 
      $LitBank_from_gold over$       |  $0.96$  |  $0.33$   | $0.62$ | $0.42$ 
   $LitBank_from_gold over_under$    |  $0.96$  |  $0.34$   | $0.62$ | $0.44$ 
    $LitBank_from_allen normal$      |  $0.93$  |  $0.32$   | $0.72$ | $0.43$ 
     $LitBank_from_allen over$       |  $0.95$  |  $0.39$   | $0.55$ | $0.45$ 
  $LitBank_from_allen over_under$  

#### Table 2 (Additional Features)

In [141]:
# rows = [['','Normal', 'Over', 'Over Under']]
# for expName, exp in settings.items():

#     if exp['OrigOnly']:
#         continue
    
#     row = [expName]

#     for resName in ['additional features']:

#         for key, val in exp['results'][resName].items():
#             row.append('$' + str(val['avg']) + '$')
        
#     rows.append(row)



rows = [['Sampling method','Accuracy', 'Precision', 'Recall','F-score']]

for datasetName, dataset in resultsByName.items():

    if settings[datasetName]['OrigOnly']:
        continue

    for samplingStratName, samplingStrat in dataset.items():

        row = ['$' + datasetName + ' ' + samplingStratName + '$']

        for expName, exp in samplingStrat.items():
        
            if expName == 'additional features':

                for measureName, res in exp.items():
                    row.append('$' +  str(res['avg'])  + '$')

        rows.append(row)

In [142]:
table = Texttable()
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES)
table.add_rows(rows)

print('\nTexttable Table')
print(table.draw())

print('\nTexttable Latex:')
print(latextable.draw_latex(table, caption="F-Scores for the classification model using Index features, the quotation feature and the dispersion feature."))


Texttable Table
          Sampling method            | Accuracy | Precision | Recall | F-score
  $ProppLearner_from_gold normal$    |  $0.97$  |  $0.79$   | $0.92$ | $0.85$ 
   $ProppLearner_from_gold over$     |  $0.97$  |   $0.8$   | $0.92$ | $0.85$ 
$ProppLearner_from_gold over_under$  |  $0.97$  |  $0.79$   | $0.92$ | $0.84$ 
  $ProppLearner_from_allen normal$   |  $0.84$  |  $0.62$   | $0.84$ | $0.71$ 
   $ProppLearner_from_allen over$    |  $0.85$  |  $0.65$   | $0.82$ | $0.72$ 
$ProppLearner_from_allen over_under$ |  $0.85$  |  $0.65$   | $0.81$ | $0.72$ 
     $LitBank_from_gold normal$      |  $0.93$  |  $0.22$   | $0.7$  | $0.34$ 
      $LitBank_from_gold over$       |  $0.96$  |  $0.32$   | $0.62$ | $0.41$ 
   $LitBank_from_gold over_under$    |  $0.95$  |  $0.29$   | $0.77$ | $0.42$ 
    $LitBank_from_allen normal$      |  $0.94$  |  $0.32$   | $0.49$ | $0.38$ 
     $LitBank_from_allen over$       |  $0.92$  |   $0.3$   | $0.79$ | $0.43$ 
  $LitBank_from_allen over_under$  

#### Table 4 (15 stories ProppLearner Allen)

In [None]:
rows = [['Sampling method','Accuracy', 'Precision', 'Recall','F-score']]

for datasetName, dataset in resultsByName.items():

    if settings[datasetName]['OrigOnly']:
        continue

    for samplingStratName, samplingStrat in dataset.items():

        row = ['$' + datasetName + ' ' + samplingStratName + '$']

        for expName, exp in samplingStrat.items():
        
            if expName == 'additional features':

                for measureName, res in exp.items():
                    row.append('$' +  str(res['avg'])  + '$')

        rows.append(row)

In [148]:
resultsByName['ProppLearner_from_allen_short']

{'normal': {'original features': {'accuracy': {'avg': 0.85,
    'all': array([0.80176211, 0.86784141, 0.8061674 , 0.85022026, 0.83700441,
           0.85462555, 0.84955752, 0.84955752, 0.87610619, 0.84955752,
           0.84140969, 0.81938326, 0.88105727, 0.84581498, 0.83700441,
           0.86343612, 0.80088496, 0.85840708, 0.86725664, 0.84955752,
           0.82378855, 0.86784141, 0.86343612, 0.85022026, 0.86343612,
           0.87665198, 0.84070796, 0.84955752, 0.82300885, 0.81415929,
           0.85022026, 0.8061674 , 0.87665198, 0.81057269, 0.8722467 ,
           0.82819383, 0.90265487, 0.86725664, 0.86283186, 0.84513274,
           0.82378855, 0.86343612, 0.85462555, 0.77973568, 0.88105727,
           0.85022026, 0.84955752, 0.85840708, 0.86283186, 0.85840708,
           0.85022026, 0.85022026, 0.84140969, 0.84581498, 0.84140969,
           0.86343612, 0.90707965, 0.80973451, 0.84070796, 0.80973451,
           0.8722467 , 0.88105727, 0.84140969, 0.84581498, 0.85022026,
          

In [153]:
rows = [['Sampling method','Accuracy', 'Precision', 'Recall','F-score']]

for datasetName, dataset in resultsByName.items():

    if datasetName != 'ProppLearner_from_allen_short':
        continue

    for samplingStratName, samplingStrat in dataset.items():

        row = ['$' + datasetName + ' ' + samplingStratName + '$']

        for expName, exp in samplingStrat.items():
        
            if expName == 'additional features':

                for measureName, res in exp.items():
                    row.append('$' +  str(res['avg'])  + '$')

        rows.append(row)

In [155]:
table = Texttable()
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES)
table.add_rows(rows)

print('\nTexttable Table')
print(table.draw())

print('\nTexttable Latex:')
print(latextable.draw_latex(table, caption="F-Scores for the classification model, first 15 stories of ProppLearner SpanBERT"))


Texttable Table
           Sampling method             | Accuracy | Precision | Recall | F-score
$ProppLearner_from_allen_short normal$ |  $0.84$  |  $0.62$   | $0.84$ | $0.71$ 
 $ProppLearner_from_allen_short over$  |  $0.85$  |  $0.65$   | $0.82$ | $0.72$ 
    $ProppLearner_from_allen_short     |  $0.86$  |  $0.68$   | $0.77$ | $0.72$ 
             over_under$               |          |           |        |        

Texttable Latex:
\begin{table}
	\begin{center}
		\begin{tabular}{c|c|c|c|c}
			Sampling method & Accuracy & Precision & Recall & F-score \\
			\hline
			$ProppLearner_from_allen_short normal$ & $0.84$ & $0.62$ & $0.84$ & $0.71$ \\
			$ProppLearner_from_allen_short over$ & $0.85$ & $0.65$ & $0.82$ & $0.72$ \\
			$ProppLearner_from_allen_short over_under$ & $0.86$ & $0.68$ & $0.77$ & $0.72$ \\
		\end{tabular}
	\end{center}
	\caption{F-Scores for the classification model, first 15 stories of ProppLearner SpanBERT}
\end{table}


### Full Pipeline Results

#### Select Best Resuls

In [8]:
print('results -1:')
print(np.load('../results/full_pipeline/results.npy'))
print()

for i in range(5):
    print('results',str(i)+':')
    print(np.load('../results/full_pipeline/results' + str(i) + '.npy'))

results -1:
[[[0.51820451 0.67615287 0.51820451 0.56943779]
  [0.57911675 0.66920491 0.57911675 0.60328613]
  [0.54826377 0.67615287 0.54826377 0.58798291]
  [0.54596843 0.66920491 0.54596843 0.58363998]]

 [[0.50500869 0.47331535 0.50500869 0.46565982]
  [0.54261852 0.4614526  0.54261852 0.47724834]
  [0.51124749 0.47331535 0.51124749 0.46928457]
  [0.53490675 0.4614526  0.53490675 0.47303411]]

 [[0.25094544 0.20032738 0.25094544 0.19247444]
  [0.26525446 0.20032738 0.26525446 0.1941876 ]
  [0.26099256 0.20032738 0.26099256 0.19307671]
  [0.25520734 0.20032738 0.25520734 0.19358533]]]

results 0:
[[[0.5280132  0.67932327 0.5280132  0.57787541]
  [0.58562531 0.67266632 0.58562531 0.60958732]
  [0.55688612 0.67932327 0.55688612 0.59545249]
  [0.55412102 0.67266632 0.55412102 0.59105327]]

 [[0.5031799  0.47290169 0.5031799  0.4615705 ]
  [0.5425092  0.46149646 0.5425092  0.47402802]
  [0.50879764 0.47290169 0.50879764 0.4648382 ]
  [0.53586727 0.46149646 0.53586727 0.4703454 ]]

 [[0.2

In [6]:
results0 = np.load('../results/full_pipeline/results' + str(0) + '.npy')
results1 = np.load('../results/full_pipeline/results' + str(1) + '.npy')
results4 = np.load('../results/full_pipeline/results' + str(4) + '.npy')

In [21]:
results = results0

best = [0,1,4]

rows = [['','Accuracy','Recall','Precision','F1']]
settingNames = ['ProppLearner', 'CEN', 'LitBank']

for i, (settingName, settingNum) in enumerate(zip(settingNames, range(len(settingNames)))):

    results = np.load('../results/full_pipeline/results' + str(best[i]) + '.npy')

    rowNames = ['No Post-processing', "Remove `and's and remove `None'","Remove `None'" , "Remove `and's"]
    
    for rowName, result in zip(rowNames, results[settingNum]):
        row = []
        row.append(rowName)
        for item in result:
            row.append('$' + str(np.round(item, 2)) + '$')

        rows.append(row)

In [22]:
table = Texttable()
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES)
table.add_rows(rows)

print('\nTexttable Table')
print(table.draw())

print('\nTexttable Latex:')
print(latextable.draw_latex(table, caption="A comparison of F-scores the the classification model when features are constructed using AllenNLP vs using CoreNLP."))


Texttable Table
                                | Accuracy | Recall | Precision |   F1  
      No Post-processing        |  $0.53$  | $0.68$ |  $0.53$   | $0.58$
Remove `and's and remove `None' |  $0.59$  | $0.67$ |  $0.59$   | $0.61$
         Remove `None'          |  $0.56$  | $0.68$ |  $0.56$   | $0.6$ 
         Remove `and's          |  $0.55$  | $0.67$ |  $0.55$   | $0.59$
      No Post-processing        |  $0.52$  | $0.48$ |  $0.52$   | $0.47$
Remove `and's and remove `None' |  $0.56$  | $0.47$ |  $0.56$   | $0.48$
         Remove `None'          |  $0.53$  | $0.48$ |  $0.53$   | $0.48$
         Remove `and's          |  $0.55$  | $0.47$ |  $0.55$   | $0.48$
      No Post-processing        |  $0.27$  | $0.22$ |  $0.27$   | $0.2$ 
Remove `and's and remove `None' |  $0.3$   | $0.22$ |   $0.3$   | $0.21$
         Remove `None'          |  $0.28$  | $0.22$ |  $0.28$   | $0.2$ 
         Remove `and's          |  $0.29$  | $0.22$ |  $0.29$   | $0.21$

Texttable Latex:
\begin{table}
	\