Merge pull request #24 from tomxie0217/master

2020 evaluations 15-CIST
WING-NUS · Oct 17, 2020 · e35b2ca · e35b2ca
2 parents b4f1dbe + ffd47c5
commit e35b2ca
Show file tree

Hide file tree

Showing 69,973 changed files with 9,628,017 additions and 42 deletions.
diff --git a/CLSciSumm_2020_Evaluation/15-CIST/runs/readme.csv b/CLSciSumm_2020_Evaluation/15-CIST/runs/readme.csv
@@ -41,45 +41,45 @@ run39,Jaccard-Focused_POS-Vote,Jaccard-Focused_SV-DPPs
 run40,Jaccard-Focused_POS-XGB,Jaccard-Focused_GCN
 run41,Jaccard-Focused_POS-XGB,Jaccard-Focused_QD-DPPs
 run42,Jaccard-Focused_POS-XGB,Jaccard-Focused_SV-DPPs
-run43,Voting-1.1_CON-CT-FastText,Voting-1.1_GCN
-run44,Voting-1.1_CON-CT-FastText,Voting-1.1_QD-DPPs
-run45,Voting-1.1_CON-CT-FastText,Voting-1.1_SV-DPPs
-run46,Voting-1.1_CON-POS-Vote,Voting-1.1_GCN
-run47,Voting-1.1_CON-POS-Vote,Voting-1.1_QD-DPPs
-run48,Voting-1.1_CON-POS-Vote,Voting-1.1_SV-DPPs
-run49,Voting-1.1_CON-RT-FastText,Voting-1.1_GCN
-run50,Voting-1.1_CON-RT-FastText,Voting-1.1_QD-DPPs
-run51,Voting-1.1_CON-RT-FastText,Voting-1.1_SV-DPPs
-run52,Voting-1.1_POS-ADB,Voting-1.1_GCN
-run53,Voting-1.1_POS-ADB,Voting-1.1_QD-DPPs
-run54,Voting-1.1_POS-ADB,Voting-1.1_SV-DPPs
-run55,Voting-1.1_POS-LR,Voting-1.1_GCN
-run56,Voting-1.1_POS-LR,Voting-1.1_QD-DPPs
-run57,Voting-1.1_POS-LR,Voting-1.1_SV-DPPs
-run58,Voting-1.1_POS-Vote,Voting-1.1_GCN
-run59,Voting-1.1_POS-Vote,Voting-1.1_QD-DPPs
-run60,Voting-1.1_POS-Vote,Voting-1.1_SV-DPPs
-run61,Voting-1.1_POS-XGB,Voting-1.1_GCN
-run62,Voting-1.1_POS-XGB,Voting-1.1_QD-DPPs
-run63,Voting-1.1_POS-XGB,Voting-1.1_SV-DPPs
-run64,Voting-2.0_CON-CT-FastText,Voting-2.0_GCN
-run65,Voting-2.0_CON-CT-FastText,Voting-2.0_QD-DPPs
-run66,Voting-2.0_CON-CT-FastText,Voting-2.0_SV-DPPs
-run67,Voting-2.0_CON-POS-Vote,Voting-2.0_GCN
-run68,Voting-2.0_CON-POS-Vote,Voting-2.0_QD-DPPs
-run69,Voting-2.0_CON-POS-Vote,Voting-2.0_SV-DPPs
-run70,Voting-2.0_CON-RT-FastText,Voting-2.0_GCN
-run71,Voting-2.0_CON-RT-FastText,Voting-2.0_QD-DPPs
-run72,Voting-2.0_CON-RT-FastText,Voting-2.0_SV-DPPs
-run73,Voting-2.0_POS-ADB,Voting-2.0_GCN
-run74,Voting-2.0_POS-ADB,Voting-2.0_QD-DPPs
-run75,Voting-2.0_POS-ADB,Voting-2.0_SV-DPPs
-run76,Voting-2.0_POS-LR,Voting-2.0_GCN
-run77,Voting-2.0_POS-LR,Voting-2.0_QD-DPPs
-run78,Voting-2.0_POS-LR,Voting-2.0_SV-DPPs
-run79,Voting-2.0_POS-Vote,Voting-2.0_GCN
-run80,Voting-2.0_POS-Vote,Voting-2.0_QD-DPPs
-run81,Voting-2.0_POS-Vote,Voting-2.0_SV-DPPs
-run82,Voting-2.0_POS-XGB,Voting-2.0_GCN
-run83,Voting-2.0_POS-XGB,Voting-2.0_QD-DPPs
-run84,Voting-2.0_POS-XGB,Voting-2.0_SV-DPPs
+run43,Voting-1.2_CON-CT-FastText,Voting-1.2_GCN
+run44,Voting-1.2_CON-CT-FastText,Voting-1.2_QD-DPPs
+run45,Voting-1.2_CON-CT-FastText,Voting-1.2_SV-DPPs
+run46,Voting-1.2_CON-POS-Vote,Voting-1.2_GCN
+run47,Voting-1.2_CON-POS-Vote,Voting-1.2_QD-DPPs
+run48,Voting-1.2_CON-POS-Vote,Voting-1.2_SV-DPPs
+run49,Voting-1.2_CON-RT-FastText,Voting-1.2_GCN
+run50,Voting-1.2_CON-RT-FastText,Voting-1.2_QD-DPPs
+run51,Voting-1.2_CON-RT-FastText,Voting-1.2_SV-DPPs
+run52,Voting-1.2_POS-ADB,Voting-1.2_GCN
+run53,Voting-1.2_POS-ADB,Voting-1.2_QD-DPPs
+run54,Voting-1.2_POS-ADB,Voting-1.2_SV-DPPs
+run55,Voting-1.2_POS-LR,Voting-1.2_GCN
+run56,Voting-1.2_POS-LR,Voting-1.2_QD-DPPs
+run57,Voting-1.2_POS-LR,Voting-1.2_SV-DPPs
+run58,Voting-1.2_POS-Vote,Voting-1.2_GCN
+run59,Voting-1.2_POS-Vote,Voting-1.2_QD-DPPs
+run60,Voting-1.2_POS-Vote,Voting-1.2_SV-DPPs
+run61,Voting-1.2_POS-XGB,Voting-1.2_GCN
+run62,Voting-1.2_POS-XGB,Voting-1.2_QD-DPPs
+run63,Voting-1.2_POS-XGB,Voting-1.2_SV-DPPs
+run64,Voting-2.1_CON-CT-FastText,Voting-2.1_GCN
+run65,Voting-2.1_CON-CT-FastText,Voting-2.1_QD-DPPs
+run66,Voting-2.1_CON-CT-FastText,Voting-2.1_SV-DPPs
+run67,Voting-2.1_CON-POS-Vote,Voting-2.1_GCN
+run68,Voting-2.1_CON-POS-Vote,Voting-2.1_QD-DPPs
+run69,Voting-2.1_CON-POS-Vote,Voting-2.1_SV-DPPs
+run70,Voting-2.1_CON-RT-FastText,Voting-2.1_GCN
+run71,Voting-2.1_CON-RT-FastText,Voting-2.1_QD-DPPs
+run72,Voting-2.1_CON-RT-FastText,Voting-2.1_SV-DPPs
+run73,Voting-2.1_POS-ADB,Voting-2.1_GCN
+run74,Voting-2.1_POS-ADB,Voting-2.1_QD-DPPs
+run75,Voting-2.1_POS-ADB,Voting-2.1_SV-DPPs
+run76,Voting-2.1_POS-LR,Voting-2.1_GCN
+run77,Voting-2.1_POS-LR,Voting-2.1_QD-DPPs
+run78,Voting-2.1_POS-LR,Voting-2.1_SV-DPPs
+run79,Voting-2.1_POS-Vote,Voting-2.1_GCN
+run80,Voting-2.1_POS-Vote,Voting-2.1_QD-DPPs
+run81,Voting-2.1_POS-Vote,Voting-2.1_SV-DPPs
+run82,Voting-2.1_POS-XGB,Voting-2.1_GCN
+run83,Voting-2.1_POS-XGB,Voting-2.1_QD-DPPs
+run84,Voting-2.1_POS-XGB,Voting-2.1_SV-DPPs
diff --git a/CLSciSumm_2020_Evaluation/15-CIST/setup/run1/err.log b/CLSciSumm_2020_Evaluation/15-CIST/setup/run1/err.log
@@ -0,0 +1 @@
+rm: cannot remove 'output/*': No such file or directory
diff --git a/CLSciSumm_2020_Evaluation/15-CIST/setup/run1/input/ref/.DS_Store b/CLSciSumm_2020_Evaluation/15-CIST/setup/run1/input/ref/.DS_Store
diff --git a/CLSciSumm_2020_Evaluation/15-CIST/setup/run1/input/ref/Task1/A00-2018_akanksha.csv b/CLSciSumm_2020_Evaluation/15-CIST/setup/run1/input/ref/Task1/A00-2018_akanksha.csv
@@ -0,0 +1,29 @@
+Citance Number,Reference Article,Citing Article,Citation Marker Offset,Citation Marker,Citation Offset,Citation Text,Citation Text Clean,Reference Offset,Reference Text,Discourse Facet
+2,A00-2018,N10-1002,0,"Charniak, 2000",0,"As a benchmark VPC extraction system, we use the Charniak parser (Charniak, 2000)","As a benchmark VPC extraction system, we use the Charniak parser (Charniak, 2000)","'90' , '91'","<S sid=""90"" ssid=""1"">We created a parser based upon the maximumentropy-inspired model of the last section, smoothed using standard deleted interpolation.</S>
+    <S sid=""91"" ssid=""2"">As the generative model is top-down and we use a standard bottom-up best-first probabilistic chart parser [2,7], we use the chart parser as a first pass to generate candidate possible parses to be evaluated in the second pass by our probabilistic model.</S>",Method_Citation
+3,A00-2018,W11-0610,0,"Charniak, 2000",0,"Each of these scores can be calculated from a provided syntactic parse tree, and to generate these we made use of the Charniakparser (Charniak, 2000), also trained on the Switch board tree bank","Each of these scores can be calculated from a provided syntactic parse tree, and to generate these we made use of the Charniak parser (Charniak, 2000), also trained on the Switch board tree bank",'5',"<S sid=""5"" ssid=""1"">We present a new parser for parsing down to Penn tree-bank style parse trees [16] that achieves 90.1% average precision/recall for sentences of length &lt; 40, and 89.5% for sentences of length &lt; 100, when trained and tested on the previously established [5,9,10,15,17] &amp;quot;standard&amp;quot; sections of the Wall Street Journal tree-bank.</S>",Method_Citation
+4,A00-2018,W06-3119,0,"Charniak, 2000",0,"We then use Charniak? s parser (Charniak, 2000) to generate the most likely parse tree for each English target sentence in the training corpus","We then use Charniak's parser (Charniak, 2000) to generate the most likely parse tree for each English target sentence in the training corpus",'90',"<S sid=""90"" ssid=""1"">We created a parser based upon the maximumentropy-inspired model of the last section, smoothed using standard deleted interpolation.</S>",Method_Citation
+5,A00-2018,N03-2024,0,"Charniak, 2000",0,"We were interested in the occurrence of features such as type and number of premodifiers, presence and type of post modifiers, and form of name reference for people. We constructed a large, automatically annotated corpus by merging the output of Charniak? s statistical parser (Charniak, 2000) with that of the IBM named entity recognition system Nominator (Wacholder et al,1997)","We were interested in the occurrence of features such as type and number of premodifiers, presence and type of post modifiers, and form of name reference for people. We constructed a large, automatically annotated corpus by merging the output of Charniak's statistical parser (Charniak, 2000) with that of the IBM named entity recognition system Nominator (Wacholder et al,1997)","'48','49','51'","<S sid=""48"" ssid=""17"">Maximum-entropy models have two benefits for a parser builder.</S>
+    <S sid=""49"" ssid=""18"">First, as already implicit in our discussion, factoring the probability computation into a sequence of values corresponding to various &amp;quot;features&amp;quot; suggests that the probability model should be easily changeable &#8212; just change the set of features used.</S>
+    <S sid=""51"" ssid=""20"">Second, and this is a point we have not yet mentioned, the features used in these models need have no particular independence of one another.</S>",Method_Citation
+6,A00-2018,N06-1039,0,"Charniak, 2000",0,"After getting a set of basic clusters, we pass them to an existing statistical parser (Charniak, 2000) and rule-based tree normalizer to obtain a GLARFstructure for each sentence in every article","After getting a set of basic clusters, we pass them to an existing statistical parser (Charniak, 2000) and rule-based tree normalizer to obtain a GLARF structure for each sentence in every article","'90','91','92','93','94'","<S sid=""90"" ssid=""1"">We created a parser based upon the maximumentropy-inspired model of the last section, smoothed using standard deleted interpolation.</S>
+    <S sid=""91"" ssid=""2"">As the generative model is top-down and we use a standard bottom-up best-first probabilistic chart parser [2,7], we use the chart parser as a first pass to generate candidate possible parses to be evaluated in the second pass by our probabilistic model.</S>
+    <S sid=""92"" ssid=""3"">For runs with the generative model based upon Markov grammar statistics, the first pass uses the same statistics, but conditioned only on standard PCFG information.</S>
+    <S sid=""93"" ssid=""4"">This allows the second pass to see expansions not present in the training corpus.</S>
+    <S sid=""94"" ssid=""5"">We use the gathered statistics for all observed words, even those with very low counts, though obviously our deleted interpolation smoothing gives less emphasis to observed probabilities for rare words.</S>",Method_Citation
+7,A00-2018,C04-1180,0,2000,0,"The levels of accuracy and robustness recently achieved by statistical parsers (e.g. Collins (1999), Charniak (2000)) have led to their use in a number of NLP applications, such as question-answering (Pasca and Harabagiu, 2001), machine translation (Charniak et al, 2003), sentence simplification (Carroll et al, 1999), and a linguist? s search engine (Resnik and Elkiss, 2003)","The levels of accuracy and robustness recently achieved by statistical parsers (e.g. Collins (1999), Charniak (2000)) have led to their use in a number of NLP applications, such as question-answering (Pasca and Harabagiu, 2001), machine translation (Charniak et al, 2003), sentence simplification (Carroll et al, 1999), and a linguist? s search engine (Resnik and Elkiss, 2003)",0,�NA�,Result_Citation
+8,A00-2018,W05-0638,0,"Charniak, 2000",0,"In CoNLL-2005, full parsing trees are provided by two full parsers: the Collins parser (Collins, 1999) and the Charniak parser (Charniak, 2000)","In CoNLL-2005, full parsing trees are provided by two full parsers: the Collins parser (Collins, 1999) and the Charniak parser (Charniak, 2000)",'90',"<S sid=""90"" ssid=""1"">We created a parser based upon the maximumentropy-inspired model of the last section, smoothed using standard deleted interpolation.</S>",Method_Citation
+9,A00-2018,P05-1065,0,"Charniak, 2000",0,"We also use a standard statistical parser (Charniak, 2000) to provide syntactic analysis","We also use a standard statistical parser (Charniak, 2000) to provide syntactic analysis","'90','91','92','93','94'","<S sid=""90"" ssid=""1"">We created a parser based upon the maximumentropy-inspired model of the last section, smoothed using standard deleted interpolation.</S>
+    <S sid=""91"" ssid=""2"">As the generative model is top-down and we use a standard bottom-up best-first probabilistic chart parser [2,7], we use the chart parser as a first pass to generate candidate possible parses to be evaluated in the second pass by our probabilistic model.</S>
+    <S sid=""92"" ssid=""3"">For runs with the generative model based upon Markov grammar statistics, the first pass uses the same statistics, but conditioned only on standard PCFG information.</S>
+    <S sid=""93"" ssid=""4"">This allows the second pass to see expansions not present in the training corpus.</S>
+    <S sid=""94"" ssid=""5"">We use the gathered statistics for all observed words, even those with very low counts, though obviously our deleted interpolation smoothing gives less emphasis to observed probabilities for rare words.</S>",Method_Citation
+10,A00-2018,P05-1065,0,"Charniak, 2000",0,"For each article, we calculated the per cent age of a) all word instances (tokens) and b) all unique words (types) not on these lists, resulting in three token OOV rate features and three type OOV rate features per article. The parse features are generated using the Charniak parser (Charniak, 2000) trained on the standard Wall Street Journal Treebank corpus","For each article, we calculated the percentage of a) all word instances (tokens) and b) all unique words (types) not on these lists, resulting in three token OOV rate features and three type OOV rate features per article. The parse features are generated using the Charniak parser (Charniak, 2000) trained on the standard Wall Street Journal Treebank corpus","'38','39','40'","<S sid=""38"" ssid=""7"">To compute a probability in a log-linear model one first defines a set of &amp;quot;features&amp;quot;, functions from the space of configurations over which one is trying to compute probabilities to integers that denote the number of times some pattern occurs in the input.</S>
+    <S sid=""39"" ssid=""8"">In our work we assume that any feature can occur at most once, so features are boolean-valued: 0 if the pattern does not occur, 1 if it does.</S>
+    <S sid=""40"" ssid=""9"">In the parser we further assume that features are chosen from certain feature schemata and that every feature is a boolean conjunction of sub-features.</S>",Method_Citation
+11,A00-2018,P04-1040,0,2000,0,"The evaluation of the transformed output of the parsers of Charniak (2000) and Collins (1999) gives 90 %unlabelled and 84 %labelled accuracy with respect to dependencies, when measured against a dependency corpus derived from the Penn Treebank.The paper is organized as follows","The evaluation of the transformed output of the parsers of Charniak (2000) and Collins (1999) gives 90 % unlabelled and 84 % labelled accuracy with respect to dependencies, when measured against a dependency corpus derived from the Penn Treebank. The paper is organized as follows",'174',"<S sid=""174"" ssid=""1"">We have presented a lexicalized Markov grammar parsing model that achieves (using the now standard training/testing/development sections of the Penn treebank) an average precision/recall of 91.1% on sentences of length &lt; 40 and 89.5% on sentences of length &lt; 100.</S>",Result_Citation
+13,A00-2018,P04-1040,0,2000,0,"As an alternative to hard coded heuristics, Blaheta and Charniak (2000) proposed to recover the Penn functional tags automatically","As an alternative to hard coded heuristics, Blaheta and Charniak (2000) proposed to recover the Penn functional tags automatically",'85',"<S sid=""85"" ssid=""54"">As partition-function calculation is typically the major on-line computational problem for maximum-entropy models, this simplifies the model significantly.</S>",Method_Citation
+17,A00-2018,N06-1022,0,2000,0,"The parser of Charniak (2000) is also a two-stage ctf model, where the first stage is a smoothed Markov grammar (it uses up to three previous constituents as context), and the second stage is a lexicalized Markov grammar with extra annotations about parents and grandparents","The parser of Charniak (2000) is also a two-stage ctf model, where the first stage is a smoothed Markov grammar (it uses up to three previous constituents as context), and the second stage is a lexicalized Markov grammar with extra annotations about parents and grandparents","'63','143','146'","<S sid=""63"" ssid=""32"">As we discuss in more detail in Section 5, several different features in the context surrounding c are useful to include in H: the label, head pre-terminal and head of the parent of c (denoted as lp, tp, hp), the label of c's left sibling (lb for &amp;quot;before&amp;quot;), and the label of the grandparent of c (la).</S><S sid=""143"" ssid=""34"">The first is simply that if we first guess the pre-terminal, when we go to guess the head the first thing we can condition upon is the pre-terminal, i.e., we compute p(h I t).</S><S sid=""146"" ssid=""37"">The second major reason why first guessing the pre-terminal makes so much difference is that it can be used when backing off the lexical head in computing the probability of the rule expansion.</S>",Method_Citation
+18,A00-2018,N06-1022,0,2000,0,"Most recently, McDonald et al (2005) have implemented a dependency parser with good accuracy (it is almost as good at dependency parsing as Charniak (2000)) and very impressive speed (it is about ten times faster than Collins (1997) and four times faster than Charniak (2000))","Most recently, McDonald et al (2005) have implemented a dependency parser with good accuracy (it is almost as good at dependency parsing as Charniak (2000)) and very impressive speed (it is about ten times faster than Collins (1997) and four times faster than Charniak (2000))","'78','79'","???<S sid=""78"" ssid=""47"">With some prior knowledge, non-zero values can greatly speed up this process because fewer iterations are required for convergence.</S>                                        <S sid=""79"" ssid=""48"">We comment on this because in our example we can substantially speed up the process by choosing values picked so that, when the maximum-entropy equation is expressed in the form of Equation 4, the gi have as their initial values the values of the corresponding terms in Equation 7.</S>",Method_Citation
+19,A00-2018,H05-1035,0,"Charniak, 2000",0,"The feature set contains complex information extracted automatically from candidate syntax trees generated by parsing (Charniak, 2000), trees that will be improved by more accurate PP-attachment decisions","The feature set contains complex information extracted automatically from candidate syntax trees generated by parsing (Charniak, 2000), trees that will be improved by more accurate PP-attachment decisions",'90',"<S sid=""90"" ssid=""1"">We created a parser based upon the maximumentropy-inspired model of the last section, smoothed using standard deleted interpolation.</S>",Method_Citation
+20,A00-2018,P04-1042,0,2000,0,"Note that the dependency figures of Dienes lag behind even the parsed results for Johnson? s model; this may well be due to the fact that Dienes built his model as an extension of Collins (1999), which lags behind Charniak (2000) by about 1.3-1.5% .Manual investigation of errors on English gold standard data revealed two major issues that suggest further potential for improvement in performance without further increase in algorithmic complexity or training set size","Note that the dependency figures of Dienes lag behind even the parsed results for Johnson's model; this may well be due to the fact that Dienes built his model as an extension of Collins (1999), which lags behind Charniak (2000) by about 1.3-1.5%. Manual investigation of errors on English gold standard data revealed two major issues that suggest further potential for improvement in performance without further increase in algorithmic complexity or training set size",'174',"<S sid=""174"" ssid=""1"">We have presented a lexicalized Markov grammar parsing model that achieves (using the now standard training/testing/development sections of the Penn treebank) an average precision/recall of 91.1% on sentences of length &lt; 40 and 89.5% on sentences of length &lt; 100.</S>",Result_Citation