/
list.bib
1436 lines (1311 loc) · 91.2 KB
/
list.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@inproceedings{ScharpfSG21,
title = {Fast Linking of Mathematical Wikidata Entities in Wikipedia Articles Using Annotation Recommendation},
url = {https://doi.org/10.1145/3442442.3452348},
doi = {10.1145/3442442.3452348},
pages = {602--609},
booktitle = {Companion of The Web Conference 2021, Virtual Event / Ljubljana, Slovenia, April 19-23, 2021},
publisher = {{ACM} / {IW}3C2},
author = {Scharpf, Philipp and Schubotz, Moritz and Gipp, Bela},
editor = {Leskovec, Jure and Grobelnik, Marko and Najork, Marc and Tang, Jie and Zia, Leila},
date = {2021},
note = {tex.bibsource: dblp computer science bibliography, https://dblp.org
tex.biburl: https://dblp.org/rec/conf/www/{ScharpfSG}21.bib
tex.timestamp: Mon, 07 Jun 2021 14:34:36 +0200
tex.preprint: https://arxiv.org/pdf/2104.05111.pdf},
keywords = {!ms\_author, {SchubotzCV}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/www/ScharpfSG21.bib},
timestamp = {Mon, 07 Jun 2021 14:34:36 +0200},
preprint = {https://arxiv.org/pdf/2104.05111.pdf},
}
@inproceedings{OstendorffRSG20,
title = {Pairwise Multi-Class Document Classification for Semantic Relations Between Wikipedia Articles},
booktitle = {Proceedings of the Annual International {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL})},
author = {Ostendorff, Malte and Ruas, Terry and Schubotz, Moritz and Gipp, Bela},
date = {2020-08},
note = {tex.oldkey: Ostendorff2020
tex.topic: wiki},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_preprint, jabref\_imp2},
oldkey = {Ostendorff2020},
topic = {wiki},
}
@article{GreinerPetterYRM20,
title = {Math-word embedding in math search and semantic extraction},
issn = {0138-9130, 1588-2861},
url = {http://link.springer.com/10.1007/s11192-020-03502-9},
doi = {10.1007/s11192-020-03502-9},
journaltitle = {Scientometrics},
shortjournal = {Scientometrics},
author = {Greiner-Petter, André and Youssef, Abdou and Ruas, Terry and Miller, Bruce R. and Schubotz, Moritz and Aizawa, Akiko and Gipp, Bela},
urldate = {2020-06-16},
date = {2020-06-09},
langid = {english},
keywords = {!ag\_author, !bg\_author, !ms\_author, !tr\_author, math search, mathematical information retrieval, nlp, nlp\_embeddings},
}
@inproceedings{ScharpfSYH20,
title = {Classification and Clustering of {arXiv} Documents, Sections, and Abstracts Comparing Encodings of Natural and Mathematical Language},
rights = {All rights reserved},
booktitle = {Proceedings of the {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL})},
author = {Scharpf, Philipp and Schubotz, Moritz and Youssef, Abdou and Hamborg, Felix and Meuschke, Norman and Gipp, Bela},
date = {2020-06},
note = {tex.ids: {ScharpfSYH}20a
tex.oldkey: Scharpf2020
tex.topic: mathir},
keywords = {!bg, !bg\_author, !fh, !fh\_author, !ms, !ms\_author, !nm, !nm\_author, \#nosource, jabref\_imp1\_clean},
ids = {ScharpfSYH20a},
oldkey = {Scharpf2020},
topic = {mathir},
}
@inproceedings{SchubotzGMT20,
title = {Mathematical Formulae in Wikimedia Projects 2020},
url = {http://arxiv.org/abs/2003.09417},
doi = {10.1145/3383583.3398557},
abstract = {This poster summarizes our contributions to Wikimedia's processing pipeline for mathematical formulae. We describe how we have supported the transition from rendering formulae as course-grained {PNG} images in 2001 to providing modern semantically enriched language-independent {MathML} formulae in 2020. Additionally, we describe our plans to improve the accessibility and discoverability of mathematical knowledge in Wikimedia projects further.},
booktitle = {Proceedings of the {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL})},
author = {Schubotz, Moritz and Greiner-Petter, André and Meuschke, Norman and Teschke, Olaf and Gipp, Bela},
urldate = {2020-06-11},
date = {2020-05-06},
eprinttype = {arxiv},
eprint = {2003.09417},
keywords = {!ms\_author, !nm\_author},
}
@inproceedings{GreinerPetterSMB20,
title = {Discovering Mathematical Objects of Interest - a Study of Mathematical Notations},
doi = {10.1145/3366423.3380218},
booktitle = {Proceedings of the Web Conference 2020 ({WWW}'20), April 20–24, 2020, Taipei, Taiwan},
author = {Greiner-Petter, André and Schubotz, Moritz and Müller, Fabian and Breitinger, Corinna and Cohl, Howard S. and Aizawa, Akiko and Gipp, Bela},
date = {2020-04},
note = {tex.core: 0;Core Rank A*;http://portal.core.edu.au/conf-ranks/1548/
tex.oldkey: {GreinerPetter}2020
tex.preprint: https://ag-gipp.github.io/bib/preprints/greinerpetter2020.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !cb, !cb\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
core = {0;Core Rank A*;http://portal.core.edu.au/conf-ranks/1548/},
oldkey = {GreinerPetter2020},
preprint = {https://ag-gipp.github.io/bib/preprints/greinerpetter2020.pdf},
topic = {mathir},
}
@incollection{FoltynekRSM20,
location = {Cham},
title = {Detecting Machine-Obfuscated Plagiarism},
volume = {12051 {LNCS}},
rights = {Creative Commons Attribution-{ShareAlike} 4.0 International License ({CC}-{BY}-{SA})},
isbn = {978-3-030-43686-5 978-3-030-43687-2},
url = {http://link.springer.com/10.1007/978-3-030-43687-2_68},
abstract = {Research on academic integrity has identified online paraphrasing tools as a severe threat to the effectiveness of plagiarism detection systems. To enable the automated identification of machine-paraphrased text, we make three contributions. First, we evaluate the effectiveness of six prominent word embedding models in combination with five classifiers for distinguishing human-written from machine-paraphrased text. The best performing classification approach achieves an accuracy of 99.0\% for documents and 83.4\% for paragraphs. Second, we show that the best approach outperforms human experts and established plagiarism detection systems for these classification tasks. Third, we provide a Web application that uses the best performing classification approach to indicate whether a text underwent machine-paraphrasing. The data and code of our study are openly available.},
pages = {816--827},
booktitle = {Sustainable Digital Communities},
publisher = {Springer International Publishing},
author = {Foltýnek, Tomáš and Ruas, Terry and Scharpf, Philipp and Meuschke, Norman and Schubotz, Moritz and Grosky, William and Gipp, Bela},
editor = {Sundqvist, Anneli and Berget, Gerd and Nolin, Jan and Skjerdingstad, Kjell Ivar},
date = {2020-03},
langid = {english},
doi = {10.1007/978-3-030-43687-2_68},
note = {tex.ids: {FoltynekRSM}20a
tex.oldkey: Foltynek2020
tex.topic: pd},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, jabref\_imp1\_clean},
ids = {FoltynekRSM20a},
oldkey = {Foltynek2020},
topic = {pd},
}
@inproceedings{IhleSMG20,
location = {New York, {NY}, {USA}},
title = {A first step towards content protecting plagiarism detection},
isbn = {978-1-4503-7585-6},
url = {https://doi.org/10.1145/3383583.3398620},
doi = {10.1145/3383583.3398620},
series = {{JCDL} '20},
abstract = {Plagiarism detection systems are essential tools for safeguarding academic and educational integrity. However, today's systems require disclosing the full content of the input documents and the document collection to which the input documents are compared. Moreover, the systems are centralized and under the control of individual, typically commercial providers. This situation raises procedural and legal concerns regarding the confidentiality of sensitive data, which can limit or prohibit the use of plagiarism detection services. To eliminate these weaknesses of current systems, we seek to devise a plagiarism detection approach that does not require a centralized provider nor exposing any content as cleartext. This paper presents the initial results of our research. Specifically, we employ Private Set Intersection to devise a content-protecting variant of the citation-based similarity measure Bibliographic Coupling implemented in our plagiarism detection system {HyPlag}. Our evaluation shows that the content-protecting method achieves the same detection effectiveness as the original method while making common attacks to disclose the protected content practically infeasible. Our future work will extend this successful proof-of-concept by devising plagiarism detection methods that can analyze the entire content of documents without disclosing it as cleartext.},
pages = {341--344},
booktitle = {Proceedings of the {ACM}/{IEEE} joint conference on digital libraries in 2020},
publisher = {Association for Computing Machinery},
author = {Ihle, Cornelius and Schubotz, Moritz and Meuschke, Norman and Gipp, Bela},
date = {2020},
note = {tex.ids: {IhleSMG}20a
tex.oldkey: Ihle2020
tex.topic: pd
tex.core: A*;Core Rank A*;http://portal.core.edu.au/conf-ranks/2085/
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/ihle2020.pdf},
keywords = {!bg, !bg\_author, !ci, !ci\_author, !ms, !ms\_author, !nm, !nm\_author, \#nosource, decentralized\_open\_science, jabref\_imp1\_clean, plagiarism detection, private computation, similarity detection},
ids = {IhleSMG20a},
oldkey = {Ihle2020},
topic = {pd},
core = {A*;Core Rank A*;http://portal.core.edu.au/conf-ranks/2085/},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/ihle2020.pdf},
}
@inproceedings{SchubotzTSM19,
location = {Czech Republic},
title = {Forms of Plagiarism in Digital Mathematical Libraries},
volume = {11617 {LNCS}},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
doi = {10.1007/978-3-030-23250-4_18},
abstract = {We report on an exploratory analysis of the forms of plagiarism observable in mathematical publications, which we identified by investigating editorial notes from {zbMATH}. While most cases we encountered were simple copies of earlier work, we also identified several forms of disguised plagiarism. We investigated 11 cases in detail and evaluate how current plagiarism detection systems perform in identifying these cases. Moreover, we describe the steps required to discover these and potentially undiscovered cases in the future.},
pages = {258--274},
booktitle = {Proceedings International Conference on Intelligent Computer Mathematics},
author = {Schubotz, Moritz and Teschke, Olaf and Stange, Vincent and Meuschke, Norman and Gipp, Bela},
date = {2019-07},
note = {tex.ids: {SchubotzTSM}19a
tex.oldkey: Schubotz2019
tex.preprint: https://ag-gipp.github.io/bib/preprints/schubotz2019.pdf
tex.topic: pd},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, {DFG}1259-1, jabref\_imp1\_clean, old\_tex\_field\_preprint},
ids = {SchubotzTSM19a},
oldkey = {Schubotz2019},
preprint = {https://ag-gipp.github.io/bib/preprints/schubotz2019.pdf},
topic = {pd},
}
@article{GreinerPetterSCG19,
title = {Semantic Preserving Bijective Mappings for Expressions Involving Special Functions in Computer Algebra Systems and Document Preparation Systems},
volume = {71},
issn = {2050-3806},
doi = {10.1108/ajim-08-2018-0185},
pages = {415--439},
number = {3},
journaltitle = {Aslib Journal of Information Management},
author = {Greiner-Petter, Andre and Schubotz, Moritz and Cohl, Howard S. and Gipp, Bela},
date = {2019-07},
note = {tex.biburl: https://www.emeraldinsight.com/action/{showCitFormats}?doi=10.1108\%2FAJIM-08-2018-0185
tex.oldkey: {GreinerPetter}2019
tex.preprint: https://ag-gipp.github.io/bib/preprints/greinerpetter2019.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://www.emeraldinsight.com/action/showCitFormats?doi=10.1108%2FAJIM-08-2018-0185},
oldkey = {GreinerPetter2019},
preprint = {https://ag-gipp.github.io/bib/preprints/greinerpetter2019.pdf},
topic = {mathir},
}
@inproceedings{MeuschkeSSK19,
location = {Urbana-Champaign, Illinois, {USA}},
title = {Improving Academic Plagiarism Detection for {STEM} Documents by Analyzing Mathematical Content and Citations},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-72811-547-4},
doi = {10.1109/jcdl.2019.00026},
abstract = {Identifying academic plagiarism is a pressing task for educational and research institutions, publishers, and funding agencies. Current plagiarism detection systems reliably find instances of copied and moderately reworded text. However, reliably detecting concealed plagiarism, such as strong paraphrases, translations, and the reuse of nontextual content and ideas is an open research problem. In this paper, we extend our prior research on analyzing mathematical content and academic citations. Both are promising approaches for improving the detection ofconcealed academic plagiarism primarily in Science, Technology, Engineering and Mathematics ({STEM}). We make the following contributions: i) We present a two-stage detec- tion process that combines similarity assessments of mathematical content, academic citations, and text. ii) We introduce new similar- ity measures that consider the order of mathematical features and outperform the measures in our prior research. iii) We compare the effectiveness of the math-based, citation-based, and text-based detection approaches using confirmed cases of academic plagia- rism. iv) We demonstrate that the combined analysis of math-based and citation-based content features allows identifying potentially suspicious cases in a collection of 102K {STEM} documents. Overall, we show that analyzing the similarity of mathematical content and academic citations is a striking supplement for conventional text- based detection approaches for academic literature in the {STEM} disciplines. The data and code of our study are openly available at https://purl.org/{hybridPD}},
pages = {120--129},
booktitle = {Proceedings of the Annual International {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL})},
author = {Meuschke, Norman and Stange, Vincent and Schubotz, Moritz and Kramer, Michael and Gipp, Bela},
date = {2019-06},
note = {tex.ids: {MeuschkeSSK}19a
tex.core: 0;Core Rank A*;http://portal.core.edu.au/conf-ranks/2085/
tex.oldkey: Meuschke2019
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/meuschke2019.pdf
tex.topic: pd},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, 21InfWissHb, {DFG}1259-1, jabref\_imp1\_clean, old\_tex\_field\_preprint, pd\_litrev19},
ids = {MeuschkeSSK19a},
core = {0;Core Rank A*;http://portal.core.edu.au/conf-ranks/2085/},
oldkey = {Meuschke2019},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/meuschke2019.pdf},
topic = {pd},
}
@inproceedings{WortnerSLG19,
location = {Urbana-Champaign, {IL}, {USA}},
title = {Securing the Integrity of Time Series Data in Open Science Projects Using Blockchain-Based Trusted Timestamping},
booktitle = {Proceedings of the Workshop on Web Archiving and Digital Libraries ({WADL}) co-located with the Annual International {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL})},
author = {Wortner, Patrick and Schubotz, Moritz and Breitinger, Corinna and Leible, Stephan and Gipp, Bela},
date = {2019-06},
note = {tex.oldkey: Wortner2019
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/wortner2019.pdf
tex.topic: blockchain},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, decentralized\_open\_science, jabref\_imp2, old\_tex\_field\_preprint},
oldkey = {Wortner2019},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/wortner2019.pdf},
topic = {blockchain},
}
@inproceedings{ScharpfSCG19,
title = {Towards Formula Concept Discovery and Recognition},
volume = {2414},
url = {http://ceur-ws.org/Vol-2414/paper11.pdf},
series = {{CEUR} workshop proceedings},
pages = {108--115},
booktitle = {Proceedings of the 4th Joint Workshop on Bibliometric-Enhanced Information Retrieval and Natural Language Processing for Digital Libraries ({BIRNDL} 2019) co-located with the 42nd Annual International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval, Paris, France, July 25, 2019.},
publisher = {{CEUR}-{WS}.org},
author = {Scharpf, Philipp and Schubotz, Moritz and Cohl, Howard S. and Gipp, Bela},
editor = {Chandrasekaran, Muthu Kumar and Mayr, Philipp},
date = {2019},
note = {tex.ids: {ScharpfSCG}19a
tex.biburl: https://dblp.org/rec/bib/conf/sigir/{ScharpfSCG}19
tex.oldkey: Scharpf2019a
tex.preprint: http://ceur-ws.org/Vol-2414/paper11.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !ms, !ms\_author, !ms\_cv, \#nosource, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint, ⛔ No {DOI} found},
ids = {ScharpfSCG19a},
biburl = {https://dblp.org/rec/bib/conf/sigir/ScharpfSCG19},
oldkey = {Scharpf2019a},
preprint = {http://ceur-ws.org/Vol-2414/paper11.pdf},
topic = {mathir},
}
@inproceedings{GreinerPetterRSA19,
location = {Paris, France},
title = {Why Machines Cannot Learn Mathematics, Yet},
booktitle = {4th Joint Workshop on Bibliometric-Enhanced Information Retrieval and Natural Language Processing for Digital Libraries co-located with the 42nd Annual International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval},
author = {Greiner-Petter, Andre and Ruas, Terry and Schubotz, Moritz and Aizawa, Akiko and Grosky, William and Gipp, Bela},
date = {2019},
note = {tex.ids: {GreinerPetter}2019b, {GreinerPetterRSA}19
tex.oldkey: {GreinerPetter}2019a
tex.topic: mathir
tex.url\_orig: http://ceur-ws.org/Vol-2414/paper14.pdf},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint, ⛔ No {DOI} found},
ids = {GreinerPetter2019b, GreinerPetterRSA19},
oldkey = {GreinerPetter2019a},
topic = {mathir},
url_orig = {http://ceur-ws.org/Vol-2414/paper14.pdf},
}
@inproceedings{ScharpfMSB19,
title = {{AnnoMath} {TeX}- a Formula Identifier Annotation Recommender System for {STEM} Documents},
url = {https://doi.org/10.1145/3298689.3347042},
doi = {10.1145/3298689.3347042},
pages = {532--533},
booktitle = {Proceedings of the 13th {ACM} Conference on Recommender Systems 2019, Copenhagen, Denmark, September 16-20, 2019},
publisher = {{ACM}},
author = {Scharpf, Philipp and Mackerracher, Ian and Schubotz, Moritz and Beel, Jöran and Breitinger, Corinna and Gipp, Bela},
editor = {Bogers, Toine and Said, Alan and Brusilovsky, Peter and Tikk, Domonkos},
date = {2019},
note = {tex.biburl: https://dblp.org/rec/bib/conf/recsys/{ScharpfMSBBG}19
tex.core: B;Core Rank B;http://portal.core.edu.au/conf-ranks/28/
tex.homepage: https://annomathtex.wmflabs.org/
tex.oldkey: Scharpf2019b
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/scharpf2019b.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !cb, !cb\_author, !jb, !jb\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://dblp.org/rec/bib/conf/recsys/ScharpfMSBBG19},
core = {B;Core Rank B;http://portal.core.edu.au/conf-ranks/28/},
homepage = {https://annomathtex.wmflabs.org/},
oldkey = {Scharpf2019b},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/scharpf2019b.pdf},
topic = {mathir},
}
@article{HulekMST19,
title = {Mathematical Research Data – an Analysis Through {zbMATH} References.},
volume = {113},
issn = {1027-488X},
url = {https://www.ems-ph.org/journals/show_pdf.php?issn=1027-488X&vol=9&iss=113&rank=14},
doi = {10.4171/news/113/14},
pages = {54--57},
journaltitle = {European Mathematical Society. Newsletter},
author = {Hulek, Klaus and Müller, Fabian and Schubotz, Moritz and Teschke, Olaf},
date = {2019},
note = {tex.biburl: https://zbmath.org/bibtex/07111212.bib
tex.oldkey: Hulek19
tex.publisher: European Mathematical Society ({EMS}) Publishing House, Zurich},
keywords = {!ms, !ms\_author, !ms\_cv, \#nosource, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://zbmath.org/bibtex/07111212.bib},
oldkey = {Hulek19},
publisher = {European Mathematical Society (EMS) Publishing House, Zurich},
}
@article{LeibleSSG19,
title = {A Review on Blockchain Technology and Blockchain Projects Fostering Open Science},
volume = {2},
issn = {2624-7852},
url = {https://www.frontiersin.org/article/10.3389/fbloc.2019.00016},
doi = {10.3389/fbloc.2019.00016},
abstract = {Many sectors, like finance, medicine, manufacturing, and education, use blockchain applications to profit from the unique bundle of characteristics of this technology. Blockchain technology ({BT}) promises benefits in trustability, collaboration, organization, identification, credibility, and transparency. In this paper, we conduct an analysis in which we show how open science can benefit from this technology and its properties. For this, we determined the requirements of an open science ecosystem and compared them with the characteristics of {BT} to prove that the technology suits as an infrastructure. We also review literature and promising blockchain-based projects for open science to describe the current research situation. To this end, we examine the projects in particular for their relevance and contribution to open science and categorize them afterwards according to their primary purpose. Several of them already provide functionalities that can have a positive impact on current research workflows. So, {BT} offers promising possibilities for its use in science, but why is it then not used on a large-scale in that area? To answer this question, we point out various shortcomings, challenges, unanswered questions, and research potentials that we found in the literature and identified during our analysis. These topics shall serve as starting points for future research to foster the {BT} for open science and beyond, especially in the long-term.},
pages = {16},
journaltitle = {Frontiers in Blockchain},
author = {Leible, Stephan and Schlager, Steffen and Schubotz, Moritz and Gipp, Bela},
date = {2019},
note = {tex.biburl: https://www.frontiersin.org/articles/10.3389/fbloc.2019.00016/{bibTex}
tex.oldkey: Leible2019
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/leible2019.pdf
tex.topic: blockchain},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_preprint, jabref\_imp2},
biburl = {https://www.frontiersin.org/articles/10.3389/fbloc.2019.00016/bibTex},
oldkey = {Leible2019},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/leible2019.pdf},
topic = {blockchain},
}
@article{SchubotzT19,
title = {Four Decades of {TeX} at {zbMATH}.},
volume = {112},
issn = {1027-488X},
url = {http://www.ems-ph.org/journals/show_pdf.php?issn=1027-488x&vol=6&iss=112&rank=15},
doi = {10.4171/news/112/15},
pages = {50--52},
journaltitle = {European Mathematical Society Newsletter},
author = {Schubotz, Moritz and Teschke, Olaf},
date = {2019},
note = {tex.biburl: https://zbmath.org/bibtex/07065264.bib
tex.oldkey: Schubotz2019b
tex.publisher: European Mathematical Society ({EMS}) Publishing House, Zurich},
keywords = {!ms, !ms\_author, !ms\_cv, \#nosource, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://zbmath.org/bibtex/07065264.bib},
oldkey = {Schubotz2019b},
publisher = {European Mathematical Society (EMS) Publishing House, Zurich},
}
@inproceedings{LeibleSSG18,
title = {Fostering Open Science by Using Blockchain Technology},
url = {https://zenodo.org/record/2454725/files/Blockchain-For-Science%20Poster-1.0-final.pdf?download=1},
doi = {10.5281/zenodo.2454725},
author = {Leible, Stephan and Schlager, Steffen and Schubotz, Moritz and Gipp, Bela},
date = {2018-11},
note = {tex.biburl: https://zenodo.org/record/2454725/export/hx\#.{XCJgpMYo}8ax
tex.oldkey: Leible2018
tex.url\_orig: https://doi.org/10.5281/zenodo.2454725},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://zenodo.org/record/2454725/export/hx#.XCJgpMYo8ax},
oldkey = {Leible2018},
url_orig = {https://doi.org/10.5281/zenodo.2454725},
}
@article{SchubotzBHG18,
title = {Repurposing Open Source Tools for Open Science: A Practical Guide},
url = {https://zenodo.org/record/2453415/files/bc4openScience.pdf?download=1},
doi = {10.5281/zenodo.2453415},
author = {Schubotz, Moritz and Breitinger, Corinna and Hepp, Thomas and Gipp, Bela},
date = {2018-11},
note = {tex.biburl: https://zenodo.org/record/2453415/export/hx\#.{XCJhJ}8Yo8aw
tex.oldkey: Schubotz2018d
tex.url\_orig: https://doi.org/10.5281/zenodo.2453415},
keywords = {!bg, !bg\_author, !bg\_preprint, !cb, !cb\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://zenodo.org/record/2453415/export/hx#.XCJhJ8Yo8aw},
oldkey = {Schubotz2018d},
url_orig = {https://doi.org/10.5281/zenodo.2453415},
}
@inproceedings{MeuschkeSSG18,
location = {Ann Arbor, {MI}, {USA}},
title = {{HyPlag}: A Hybrid Approach to Academic Plagiarism Detection},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-5657-2},
doi = {10.1145/3209978.3210177},
abstract = {Current plagiarism detection systems reliably find instances of copied and moderately altered text, but often fail to detect strong paraphrases, translations, and the reuse of non-textual content and ideas. To improve upon the detection capabilities for such concealed content reuse in academic publications, we make four contributions: i) We present the first plagiarism detection approach that combines the analysis of mathematical expressions, images, citations and text. ii) We describe the implementation of this hybrid detection approach in the research prototype {HyPlag}. iii) We present novel visualization and interaction concepts to aid users in reviewing content similarities identified by the hybrid detection approach. iv) We demonstrate the usefulness of the hybrid detection and result visualization approaches by using {HyPlag} to analyze a confirmed case of content reuse present in a retracted research publication.},
pages = {1321--1324},
booktitle = {Proceedings of the 41st International {ACM} {SIGIR} Conference on Research \& Development in Information Retrieval},
author = {Meuschke, Norman and Stange, Vincent and Schubotz, Moritz and Gipp, Bela},
date = {2018-06},
note = {tex.ids: {MeuschkeSSG}18a
tex.biburl: https://dblp.uni-trier.de/rec/bibtex/conf/sigir/{MeuschkeSSG}18
tex.core: A*
tex.oldkey: Meuschke2018a
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/meuschke2018a.pdf
tex.topic: pd},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, 21InfWissHb, {DFG}1259-1, jabref\_imp1\_clean, old\_tex\_field\_preprint, pd},
ids = {MeuschkeSSG18a},
biburl = {https://dblp.uni-trier.de/rec/bibtex/conf/sigir/MeuschkeSSG18},
core = {A*},
oldkey = {Meuschke2018a},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/meuschke2018a.pdf},
topic = {pd},
}
@inproceedings{SchubotzGSM18,
location = {Fort Worth, {USA}},
title = {Improving the Representation and Conversion of Mathematical Formulae by Considering their Textual Context},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-5178-2},
url = {http://doi.acm.org/10.1145/3197026.3197058},
doi = {10.1145/3197026.3197058},
abstract = {Mathematical formulae represent complex semantic information in a concise form. Especially in Science, Technology, Engineering, and Mathematics, mathematical formulae are crucial to communicate information, e.g., in scientific papers, and to perform computations using computer algebra systems. Enabling computers to access the information encoded in mathematical formulae requires machine-readable formats that can represent both the presentation and content, i.e., the semantics, of formulae. Exchanging such information between systems additionally requires conversion methods for mathematical representation formats. We analyze how the semantic enrichment of formulae improves the format conversion process and show that considering the textual context of formulae reduces the error rate of such conversions. Our main contributions are: (1) providing an openly available benchmark dataset for the mathematical format conversion task consisting of a newly created test collection, an extensive, manually curated gold standard and task-specific evaluation metrics; (2) performing a quantitative evaluation of state-of-the-art tools for mathematical format conversions; (3) presenting a new approach that considers the textual context of formulae to reduce the error rate for mathematical format conversions. Our benchmark dataset facilitates future research on mathematical format conversions as well as research on many problems in mathematical information retrieval. Because we annotated and linked all components of formulae, e.g., identifiers, operators and other entities, to Wikidata entries, the gold standard can, for instance, be used to train methods for formula concept discovery and recognition. Such methods can then be applied to improve mathematical information retrieval systems, e.g., for semantic formula search, recommendation of mathematical content, or detection of mathematical plagiarism.},
pages = {233--242},
booktitle = {Proceedings of the 18th {ACM}/{IEEE} on Joint Conference on Digital Libraries ({JCDL})},
publisher = {{ACM}},
author = {Schubotz, Moritz and Greiner-Petter, André and Scharpf, Philipp and Meuschke, Norman and Cohl, Howard S. and Gipp, Bela},
date = {2018-06},
note = {tex.ids: Schubotz2018c, {SchubotzGSM}18a
tex.biburl: https://dblp.org/rec/bib/conf/jcdl/{SchubotzGSMCG}18
tex.core: 0;Core Rank A*;http://portal.core.edu.au/conf-ranks/2085/
tex.oldkey: {SchubotzGSMCG}18
tex.preprint: https://arxiv.org/pdf/1804.04956.pdf
tex.url\_orig: http://doi.acm.org/10.1145/3197026.3197058},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, {DFG}1259-1, jabref\_imp1\_clean, old\_tex\_field\_preprint},
ids = {Schubotz2018c, SchubotzGSM18a},
biburl = {https://dblp.org/rec/bib/conf/jcdl/SchubotzGSMCG18},
core = {0;Core Rank A*;http://portal.core.edu.au/conf-ranks/2085/},
oldkey = {SchubotzGSMCG18},
preprint = {https://arxiv.org/pdf/1804.04956.pdf},
url_orig = {http://doi.acm.org/10.1145/3197026.3197058},
}
@inproceedings{SchubotzSDN18,
location = {Fort Worth, {USA}},
title = {Introducing {MathQA} - a Math-Aware Question Answering System},
doi = {10.1108/idd-06-2018-0022},
booktitle = {Proceedings of the Annual International {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL}), Workshop on Knowledge Discovery},
author = {Schubotz, Moritz and Scharpf, Philipp and Dudhat, Kaushal and Nagar, Yash and Hamborg, Felix and Gipp, Bela},
date = {2018-06},
note = {tex.biburl: https://www.emeraldinsight.com/action/{showCitFormats}?doi=10.1108\%2FIDD-06-2018-0022
tex.oldkey: Schubotz2018a
tex.preprint: https://www.emeraldinsight.com/eprint/{FXthtRDDEGcMVzInHphu}/full
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !fh, !fh\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://www.emeraldinsight.com/action/showCitFormats?doi=10.1108%2FIDD-06-2018-0022},
oldkey = {Schubotz2018a},
preprint = {https://www.emeraldinsight.com/eprint/FXthtRDDEGcMVzInHphu/full},
topic = {mathir},
}
@inproceedings{PetersenSG18,
location = {Hagenberg, Austria},
title = {Towards Formula Translation Using Recursive Neural Networks},
url = {http://arxiv.org/abs/1811.04234},
booktitle = {Proceedings of the 11th Conference on Intelligent Computer Mathematics ({CICM})},
author = {Petersen, Felix and Schubotz, Moritz and Gipp, Bela},
date = {2018},
note = {tex.biburl: https://dblp.org/rec/bib/journals/corr/abs-1811-04234
tex.oldkey: Petersen2018
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/petersen2018.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint, ⛔ No {DOI} found},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1811-04234},
oldkey = {Petersen2018},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/petersen2018.pdf},
topic = {mathir},
}
@inproceedings{ScharpfSG18,
title = {Representing Mathematical Formulae in Content {MathML} Using Wikidata},
volume = {2132},
url = {http://ceur-ws.org/Vol-2132/paper5.pdf},
series = {{CEUR} workshop proceedings},
pages = {46--59},
booktitle = {Proceedings of the 3rd joint workshop on bibliometric-enhanced information retrieval and natural language processing for digital libraries ({BIRNDL} 2018) co-located with the 41st international {ACM} {SIGIR} conference on research and development in information retrieval ({SIGIR} 2018), ann arbor, {USA}, july 12, 2018.},
publisher = {{CEUR}-{WS}.org},
author = {Scharpf, Philipp and Schubotz, Moritz and Gipp, Bela},
editor = {Mayr, Philipp and Chandrasekaran, Muthu Kumar and Jaidka, Kokil},
date = {2018},
note = {tex.biburl: https://dblp.org/rec/bib/conf/sigir/{ScharpfSG}18
tex.oldkey: Scharpf2018
tex.preprint: https://ag-gipp.github.io/bib/preprints/scharpf2018.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint, ⛔ No {DOI} found},
biburl = {https://dblp.org/rec/bib/conf/sigir/ScharpfSG18},
oldkey = {Scharpf2018},
preprint = {https://ag-gipp.github.io/bib/preprints/scharpf2018.pdf},
topic = {mathir},
}
@inproceedings{Schubotz18CICM,
title = {Generating {OpenMath} Content Dictionaries from Wikidata},
doi = {10.5281/zenodo.1409946},
booktitle = {Joint Proceedings of the {CME}-{EI}, {FMM}, {CAAT}, {FVPS}, M3SRD, {OpenMath} Workshops, Doctoral Program and Work in Progress at the Conference on Intelligent Computer Mathematics 2018 co-located with the 11th Conference on Intelligent Computer Mathematics ({CICM} 2018)},
author = {Schubotz, Moritz},
editor = {Hasan, Osman and Youssef, Abdou and Naumowicz, Adam and Farmer, William and Kaliszyk, Cezary and Gallois-Wong, Diane and Rabe, Florian and Reis, Gabriel Dos and Passmore, Grant and Davenport, James and Pfeiffer, Markus and Kohlhase, Michael and Autexier, Serge and Tahar, Sofiene and Koprucki, Thomas and Siddique, Umair and Neuper, Walther and Windsteiger, Wolfgang and Schreiner, Wolfgang and Sperber, Wolfram and Kovács, Zoltán},
date = {2018},
note = {tex.oldkey: Schubotz2018b
tex.preprint: https://github.com/ag-gipp/18CicmWikidata/releases/download/build-master-2018-10-16-15/main.pdf
tex.topic: blockchain},
keywords = {!ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
oldkey = {Schubotz2018b},
preprint = {https://github.com/ag-gipp/18CicmWikidata/releases/download/build-master-2018-10-16-15/main.pdf},
topic = {blockchain},
}
@inproceedings{HamborgLSH18,
title = {Giveme5W: Main Event Retrieval from News Articles by Extraction of the Five Journalistic W Questions},
volume = {10766},
url = {https://doi.org/10.1007/978-3-319-78105-1_39},
doi = {10.1007/978-3-319-78105-1_39},
series = {Lecture notes in computer science},
pages = {356--366},
booktitle = {Transforming Digital Worlds - 13th International Conference, {iConference} 2018, Sheffield, Uk, March 25-28, 2018, Proceedings},
publisher = {Springer},
author = {Hamborg, Felix and Lachnit, Soeren and Schubotz, Moritz and Hepp, Thomas and Gipp, Bela},
editor = {Chowdhury, Gobinda and {McLeod}, Julie and Gillet, Valerie J. and Willett, Peter},
date = {2018},
note = {tex.biburl: https://dblp.org/rec/bib/conf/iconference/{HamborgLSHG}18
tex.oldkey: Hamborg2018a
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/hamborg2018.pdf
tex.topic: newsanalysis},
keywords = {!bg, !bg\_author, !bg\_preprint, !fh, !fh\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://dblp.org/rec/bib/conf/iconference/HamborgLSHG18},
oldkey = {Hamborg2018a},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/hamborg2018.pdf},
topic = {newsanalysis},
}
@inproceedings{Schubotz18GI,
title = {Mathematische Formeln in Wikipedia},
doi = {10.17877/de290r-19676},
pages = {1635--1638},
booktitle = {Beiträge zum Mathematikunterricht 2018},
publisher = {Gesellschaft für Didaktik der Mathematik},
author = {Schubotz, Moritz},
editor = {der Mathematik der Universität Paderborn, Fachgruppe Didaktik},
date = {2018},
langid = {german},
note = {tex.biburl: https://search.datacite.org/works/10.17877/{DE}290R-19676
tex.oldkey: Schubotz2018
tex.preprint: https://eldorado.tu-dortmund.de/bitstream/2003/37681/1/{BzMU}18$_{\textrm{S}}${CHUBOTZₘathwiki}.pdf
tex.topic: mathir},
keywords = {!ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://search.datacite.org/works/10.17877/DE290R-19676},
oldkey = {Schubotz2018},
preprint = {https://eldorado.tu-dortmund.de/bitstream/2003/37681/1/BzMU18<sub>S</sub>CHUBOTZₘathwiki.pdf},
topic = {mathir},
}
@article{HeckS18,
title = {{DiViDu} - an Open Source Solution for Dual Task Experiments with Integrated Divided Visual Field Paradigm},
volume = {6},
url = {https://doi.org/10.5334/jors.199},
doi = {10.5334/jors.199},
journaltitle = {Journal of Open Research Software},
author = {Heck, Nina and Schubotz, Moritz},
date = {2018},
note = {tex.ids: {HeckS}18
tex.biburl: https://kops.uni-konstanz.de/handle/123456789/42726
tex.oldkey: Heck2018
tex.publisher: Ubiquity Press, Ltd.},
keywords = {!ms, !ms\_author, !ms\_cv, \#nosource, jabref\_imp2, old\_tex\_field\_preprint},
ids = {HeckS18},
biburl = {https://kops.uni-konstanz.de/handle/123456789/42726},
oldkey = {Heck2018},
publisher = {Ubiquity Press, Ltd.},
}
@incollection{GreinerPetterSCG18,
title = {{MathTools}: An open {API} for convenient {MathML} handling},
volume = {11006},
url = {https://www.gipp.com/wp-content/papercite-data/pdf/greinerpetter2018.pdf},
series = {Lecture notes in computer science},
pages = {104--110},
booktitle = {Intelligent Computer Mathematics - 11th International Conference, {CICM} 2018, Hagenberg, Austria, August 13-17, 2018, Proceedings},
publisher = {Springer},
author = {Greiner-Petter, André and Schubotz, Moritz and Cohl, Howard S. and Gipp, Bela},
editor = {Rabe, Florian and Farmer, William M. and Passmore, Grant O. and Youssef, Abdou},
date = {2018},
doi = {10.1007/978-3-319-96812-4_9},
note = {tex.ids: {GreinerPetterSCG}18a
tex.biburl: https://dblp.org/rec/bib/conf/mkm/Greiner-{PetterS}18
tex.oldkey: Greiner-Petter2018
tex.topic: mathir
tex.url\_orig: https://doi.org/10.1007/978-3-319-96812-4\_9
{seriesTitle}: Lecture Notes in Computer Science},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
ids = {GreinerPetterSCG18a},
biburl = {https://dblp.org/rec/bib/conf/mkm/Greiner-PetterS18},
oldkey = {Greiner-Petter2018},
topic = {mathir},
url_orig = {https://doi.org/10.1007/978-3-319-96812-4_9},
}
@inproceedings{CohlGS18,
title = {Automated Symbolic and Numerical Testing of {DLMF} Formulae Using Computer Algebra Systems},
volume = {11006},
url = {http://hcohl.sdf.org/CICM2018_Chap4.pdf},
doi = {10.1007/978-3-319-96812-4_4},
series = {Lecture notes in computer science},
pages = {39--52},
booktitle = {Intelligent Computer Mathematics - 11th International Conference, {CICM} 2018, Hagenberg, Austria, August 13-17, 2018, Proceedings},
publisher = {Springer},
author = {Cohl, Howard S. and Greiner-Petter, André and Schubotz, Moritz},
editor = {Rabe, Florian and Farmer, William M. and Passmore, Grant O. and Youssef, Abdou},
date = {2018},
note = {tex.biburl: https://dblp.uni-trier.de/rec/bibtex/conf/mkm/{CohlGS}18
tex.oldkey: Cohl2018
tex.url\_orig: https://doi.org/10.1007/978-3-319-96812-4₄},
keywords = {!ms, !ms\_author, !ms\_cv, !ms\_preprint, {DFG}1259-1, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://dblp.uni-trier.de/rec/bibtex/conf/mkm/CohlGS18},
oldkey = {Cohl2018},
url_orig = {https://doi.org/10.1007/978-3-319-96812-4₄},
}
@inproceedings{HamborgBSL18,
title = {Extraction of Main Event Descriptors from News Articles by Answering the Journalistic Five W and One H Questions},
url = {http://doi.acm.org/10.1145/3197026.3203899},
doi = {10.1145/3197026.3203899},
pages = {339--340},
booktitle = {Proceedings of the 18th Annual International {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL}) Fort Worth, {TX}, {USA}, June 03-07, 2018},
publisher = {{ACM}},
author = {Hamborg, Felix and Breitinger, Corinna and Schubotz, Moritz and Lachnit, Soeren and Gipp, Bela},
editor = {Chen, Jiangping and Gonçalves, Marcos André and Allen, Jeff M. and Fox, Edward A. and Kan, Min-Yen and Petras, Vivien},
date = {2018},
note = {tex.biburl: https://dblp.uni-trier.de/rec/bibtex/conf/jcdl/{HamborgBSLG}18
tex.core: A*
tex.oldkey: Hamborg2018
tex.preprint: https://www.gipp.com/wp-content/papercite-data/pdf/hamborg2018a.pdf
tex.topic: newsanalysis},
keywords = {!bg, !bg\_author, !bg\_preprint, !cb, !cb\_author, !fh, !fh\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, jabref\_imp2, old\_tex\_field\_preprint},
biburl = {https://dblp.uni-trier.de/rec/bibtex/conf/jcdl/HamborgBSLG18},
core = {A*},
oldkey = {Hamborg2018},
preprint = {https://www.gipp.com/wp-content/papercite-data/pdf/hamborg2018a.pdf},
topic = {newsanalysis},
}
@inproceedings{MeuschkeSHS17,
location = {Singapore},
title = {Analyzing Mathematical Content to Detect Academic Plagiarism},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-4918-5},
url = {http://doi.acm.org/10.1145/3132847.3133144},
doi = {10.1145/3132847.3133144},
shorttitle = {Proc. {CIKM}},
abstract = {This paper presents, to our knowledge, the first study on analyzing mathematical expressions to detect academic plagiarism. We make the following contributions. First, we investigate confirmed cases of plagiarism to categorize the similarities of mathematical content commonly found in plagiarized publications. From this investigation, we derive possible feature selection and feature comparison strategies for developing math-based detection approaches and a ground truth for our experiments. Second, we create a test collection by embedding confirmed cases of plagiarism into the {NTCIR}-11 {MathIR} Task dataset, which contains approx. 60 million mathematical expressions in 105,120 documents from {arXiv}.org. Third, we develop a first math-based detection approach by implementing and evaluating different feature comparison approaches using an open source parallel data processing pipeline built using the Apache Flink framework. The best performing approach identifies all but two of our real-world test cases at the top rank and achieves a mean reciprocal rank of 0.86. The results show that mathematical expressions are promising text-independent features to identify academic plagiarism in large collections. To facilitate future research on math-based plagiarism detection, we make our source code and data available. ? 2017 Copyright held by the owner/author(s). Publication rights licensed to {ACM}.},
pages = {2211--2214},
booktitle = {Proceedings {ACM} Conference on Information and Knowledge Management ({CIKM})},
publisher = {{ACM}},
author = {Meuschke, Norman and Schubotz, Moritz and Hamborg, Felix and Skopal, Tomas and Gipp, Bela},
date = {2017-11},
note = {tex.ids: {MeuschkeSHS}17a, {MeuschkeSHS}17b
tex.biburl: https://dblp.org/rec/bib/conf/cikm/{MeuschkeSHSG}17
tex.core: A;Core Rank A;http://portal.core.edu.au/conf-ranks/25/
tex.oldkey: Meuschke2017b
tex.owner: norman
tex.preprint: https://ag-gipp.github.io/bib/preprints/meuschke2017b.pdf
tex.topic: pd},
keywords = {!bg, !bg\_author, !bg\_preprint, !fh, !fh\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, 21InfWissHb, jabref\_imp1\_clean, old\_tex\_field\_preprint, pd\_litrev19},
ids = {MeuschkeSHS17a, MeuschkeSHS17b},
biburl = {https://dblp.org/rec/bib/conf/cikm/MeuschkeSHSG17},
core = {A;Core Rank A;http://portal.core.edu.au/conf-ranks/25/},
oldkey = {Meuschke2017b},
owner = {norman},
preprint = {https://ag-gipp.github.io/bib/preprints/meuschke2017b.pdf},
topic = {pd},
}
@inproceedings{SchwarzerBSM17,
title = {Citolytics: A Link-based Recommender System for Wikipedia},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-4652-8},
doi = {10.1145/3109859.3109981},
shorttitle = {Citolytics},
abstract = {We present Citolytics - a novel link-based recommendation system for Wikipedia articles. In a preliminary study, Citolytics achieved promising results compared to the widely used text-based approach of Apache Lucene's {MoreLikeThis} ({MLT}). In this demo paper, we describe how we plan to integrate Citolytics into the Wikipedia infrastructure by using Elasticsearch and Apache Flink to serve recommendations for Wikipedia articles. Additionally, we propose a large-scale online evaluation design using the Wikipedia Android app. Working with Wikipedia data has several unique advantages. First, the availability of a very large user sample contributes to statistically significant results. Second, the openness of Wikipedia's architecture allows making our source code and evaluation data public, thus benefiting other researchers. If link-based recommendations show promise in our online evaluation, a deployment of the presented system within Wikipedia would have a far-reaching impact on Wikipedia's more than 30 million users.},
pages = {360--361},
booktitle = {Proceedings of the 11th {ACM} Conference on Recommender Systems ({RecSys})},
publisher = {{ACM}},
author = {Schwarzer, Malte and Breitinger, Corinna and Schubotz, Moritz and Meuschke, Norman and Gipp, Bela},
date = {2017-08},
note = {tex.ids: {SchwarzerBSM}17a
tex.biburl: https://dblp.org/rec/bib/conf/recsys/{SchwarzerBSMG}17
tex.oldkey: Schwarzer2017
tex.preprint: https://ag-gipp.github.io/bib/preprints/schwarzer2017.pdf
tex.topic: rec},
keywords = {!bg, !bg\_author, !bg\_preprint, !cb, !cb\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, jabref\_imp1\_clean, old\_tex\_field\_preprint},
ids = {SchwarzerBSM17a},
biburl = {https://dblp.org/rec/bib/conf/recsys/SchwarzerBSMG17},
oldkey = {Schwarzer2017},
preprint = {https://ag-gipp.github.io/bib/preprints/schwarzer2017.pdf},
topic = {rec},
}
@incollection{SchubotzKMH17,
location = {Cham},
title = {Evaluating and Improving the Extraction of Mathematical Identifier Definitions},
volume = {10456 {LNCS}},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-3-319-65812-4 978-3-319-65813-1},
url = {http://link.springer.com/10.1007/978-3-319-65813-1_7},
series = {Lecture Notes in Computer Science},
abstract = {Mathematical formulae in academic texts significantly contribute to the overall semantic content of such texts, especially in the fields of Science, Technology, Engineering and Mathematics. Knowing the definitions of the identifiers in mathematical formulae is essential to understand the semantics of the formulae. Similar to the sense-making process of human readers, mathematical information retrieval systems can analyze the text that surrounds formulae to extract the definitions of identifiers occurring in the formulae. Several approaches for extracting the definitions of mathematical identifiers from documents have been proposed in recent years. So far, these approaches have been evaluated using different collections and gold standard datasets, which prevented comparative performance assessments. To facilitate future research on the task of identifier definition extraction, we make three contributions. First, we provide an automated evaluation framework, which uses the dataset and gold standard of the {NTCIR}-11 Math Retrieval Wikipedia task. Second, we compare existing identifier extraction approaches using the developed evaluation framework. Third, we present a new identifier extraction approach that uses machine learning to combine the well-performing features of previous approaches. The new approach increases the precision of extracting identifier definitions from 17.85\% to 48.60\%, and increases the recall from 22.58\% to 28.06\%. The evaluation framework, the dataset and our source code are openly available at: https://ident.formulasearchengine.com.},
pages = {82--94},
booktitle = {Experimental {IR} Meets Multilinguality, Multimodality, and Interaction},
publisher = {Springer International Publishing},
author = {Schubotz, Moritz and Krämer, Leonard and Meuschke, Norman and Hamborg, Felix and Gipp, Bela},
editor = {Jones, Gareth J.F. and Lawless, Séamus and Gonzalo, Julio and Kelly, Liadh and Goeuriot, Lorraine and Mandl, Thomas and Cappellato, Linda and Ferro, Nicola},
date = {2017-08},
doi = {10.1007/978-3-319-65813-1_7},
note = {Series Title: Lecture Notes in Computer Science
tex.ids: {SchubotzKMH}17a
tex.biburl: https://dblp.org/rec/bib/conf/clef/{SchubotzKMHG}17
tex.oldkey: Schubotz2017
tex.preprint: https://ag-gipp.github.io/bib/preprints/schubotz2017.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !fh, !fh\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, jabref\_imp1\_clean, old\_tex\_field\_preprint},
ids = {SchubotzKMH17a},
biburl = {https://dblp.org/rec/bib/conf/clef/SchubotzKMHG17},
oldkey = {Schubotz2017},
preprint = {https://ag-gipp.github.io/bib/preprints/schubotz2017.pdf},
topic = {mathir},
}
@incollection{SchubotzMHC17,
title = {{VMEXT}: A Visualization Tool for Mathematical Expression Trees},
volume = {10383 {LNCS}},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-3-319-62074-9},
url = {https://doi.org/10.1007/978-3-319-62075-6_24},
series = {Lecture Notes in Computer Science},
shorttitle = {{VMEXT}},
abstract = {Mathematical expressions can be represented as a tree consisting of terminal symbols, such as identifiers or numbers (leaf nodes), and functions or operators (non-leaf nodes). Expression trees are an important mechanism for storing and processing mathematical expressions as well as the most frequently used visualization of the structure of mathematical expressions. Typically, researchers and practitioners manually visualize expression trees using general-purpose tools. This approach is laborious, redundant, and error-prone. Manual visualizations represents a user’s notion of what the markup of an expression should be, but not necessarily what the actual markup is. This paper presents {VMEXT} – a free and open source tool to directly visualize expression trees from parallel Open image in new window. {VMEXT} simultaneously visualizes the presentation elements and the semantic structure of mathematical expressions to enable users to quickly spot deficiencies in the Content Open image in new window markup that does not affect the presentation of the expression. Identifying such discrepancies previously required reading the verbose and complex Open image in new window markup. {VMEXT} also allows one to visualize similar and identical elements of two expressions. Visualizing expression similarity can support developers in designing retrieval approaches and enable improved interaction concepts for users of mathematical information retrieval systems. We demonstrate {VMEXT}’s visualizations in two web-based applications. The first application presents the visualizations alone. The second application shows a possible integration of the visualizations in systems for mathematical knowledge management and mathematical information retrieval. The application converts Open image in new window input to parallel Open image in new window, computes basic similarity measures for mathematical expressions, and visualizes the results using {VMEXT}.},
pages = {340--355},
booktitle = {Intelligent Computer Mathematics},
publisher = {Springer},
author = {Schubotz, Moritz and Meuschke, Norman and Hepp, Thomas and Cohl, Howard S. and Gipp, Bela},
editor = {Geuvers, Herman and England, Matthew and Hasan, Osman and Rabe, Florian and Teschke, Olaf},
date = {2017-07},
note = {tex.ids: {SchubotzMHC}17a, {SchubotzMHC}17b
tex.biburl: http://dblp.uni-trier.de/rec/bib/conf/mkm/{SchubotzMHCG}17
tex.oldkey: vmext17
tex.preprint: https://arxiv.org/pdf/1707.03540.pdf
tex.topic: mathir},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, jabref\_imp1\_clean, old\_tex\_field\_preprint},
ids = {SchubotzMHC17a, SchubotzMHC17b},
biburl = {http://dblp.uni-trier.de/rec/bib/conf/mkm/SchubotzMHCG17},
oldkey = {vmext17},
preprint = {https://arxiv.org/pdf/1707.03540.pdf},
topic = {mathir},
}
@inproceedings{MeuschkeSSG17,
location = {Toronto, Canada},
title = {Analyzing Semantic Concept Patterns to Detect Academic Plagiarism},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-5388-5},
doi = {10.1145/3127526.3127535},
abstract = {Detecting academic plagiarism is a pressing problem, e.g., for educational and research institutions, funding agencies, and academic publishers. Existing plagiarism detection systems reliably identify copied text, or near copies of text, but often fail to detect disguised forms of academic plagiarism, such as paraphrases, translations, and idea plagiarism. We present Semantic Concept Pattern Analysis - an approach that performs an integrated analysis of semantic text relatedness and structural text similarity. Using 25 officially retracted academic plagiarism cases, we demonstrate that our approach can detect plagiarism that established text matching approaches would not identify. We view our approach as a promising addition to improve the detection capabilities for strong paraphrases. We plan to further improve Semantic Concept Pattern Analysis and include the approach as part of an integrated detection process that analyzes heterogeneous similarity features to better identify the many possible forms of plagiarism in academic documents.},
pages = {46--53},
booktitle = {Proceedings of the International Workshop on Mining Scientific Publications ({WOSP}) co-located with the {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL})},
publisher = {{IEEE} Computer Society},
author = {Meuschke, Norman and Siebeck, Nicolas and Schubotz, Moritz and Gipp, Bela},
date = {2017-06},
note = {tex.ids: {MeuschkeSSG}17a
tex.biburl: https://dblp.org/rec/bib/conf/jcdl/{MeuschkeSSG}17
tex.oldkey: Meuschke2017
tex.preprint: https://ag-gipp.github.io/bib/preprints/meuschke2017a.pdf},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, jabref\_imp1\_clean, old\_tex\_field\_preprint, pd\_litrev19},
ids = {MeuschkeSSG17a},
biburl = {https://dblp.org/rec/bib/conf/jcdl/MeuschkeSSG17},
oldkey = {Meuschke2017},
preprint = {https://ag-gipp.github.io/bib/preprints/meuschke2017a.pdf},
}
@inproceedings{DahmSMG17,
title = {A Vision for Performing Social and Economic Data Analysis using Wikipedia's Edit History},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-4914-7},
url = {http://doi.acm.org/10.1145/3041021.3053363},
doi = {10.1145/3041021.3053363},
abstract = {In this vision paper, we suggest combining two lines of research to study the collective behavior of Wikipedia contributors. The first line of research analyzes Wikipedia's edit history to quantify the quality of individual contributions and the resulting reputation of the contributor. The second line of research surveys Wikipedia contributors to gain insights, e.g., on their personal and professional background, socioeconomic status, or motives to contribute {toWikipedia}. While both lines of research are valuable on their own, we argue that the combination of both approaches could yield insights that exceed the sum of the individual parts. Linking survey data to contributor reputation and content-based quality metrics could provide a large-scale, public domain data set to perform user modeling, i.e. deducing interest profiles of user groups. User profiles can, among other applications, help to improve recommender systems. The resulting dataset can also enable a better understanding and improved prediction of high quality Wikipedia content and {successfulWikipedia} contributors. Furthermore, the dataset can enable novel research approaches to investigate team composition and collective behavior as well as help to identify domain experts and young talents. We report on the status of implementing our large-scale, content-based analysis of the Wikipedia edit history using the big data processing framework Apache Flink. Additionally, we describe our plans to conduct a survey among Wikipedia contributors to enhance the content-based quality metrics.},
pages = {1627--1634},
booktitle = {Proceedings of the 26th International Conference on World Wide Web Companion},
publisher = {{ACM}},
author = {Dahm, Erik and Schubotz, Moritz and Meuschke, Norman and Gipp, Bela},
date = {2017-04},
note = {tex.ids: {DahmSMG}17a
tex.oldkey: Dahm2017
tex.preprint: https://ag-gipp.github.io/bib/preprints/dahm2017.pdf
tex.topic: wiki},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, jabref\_imp1\_clean, old\_tex\_field\_preprint, wikipedia},
ids = {DahmSMG17a},
oldkey = {Dahm2017},
preprint = {https://ag-gipp.github.io/bib/preprints/dahm2017.pdf},
topic = {wiki},
}
@inproceedings{CorneliS17,
title = {math.wikipedia.org: A vision for a collaborative semi-formal, language independent math(s) encyclopedia},
url = {https://www.research.ed.ac.uk/portal/files/32938085/corneli2017math.pdf},
booktitle = {Conference on Artificial Intelligence and Theorem Proving},
author = {Corneli, Joe and Schubotz, Moritz},
date = {2017},
note = {tex.ids: {CorneliS}17
tex.biburl: https://www.research.ed.ac.uk/portal/en/publications/mathwikipediaorg-a-vision-for-a-collaborative-semiformal-language-independent-maths-encyclopedia(9588c61f-5234-4f9d-a036-c7c3daac9307).bibtex?download=true
tex.oldkey: Corneli2017
tex.owner: Moritz},
keywords = {!ms, !ms\_author, !ms\_cv, !ms\_preprint, \#nosource, jabref\_imp2, old\_tex\_field\_preprint, ⛔ No {DOI} found},
ids = {CorneliS17},
biburl = {https://www.research.ed.ac.uk/portal/en/publications/mathwikipediaorg-a-vision-for-a-collaborative-semiformal-language-independent-maths-encyclopedia(9588c61f-5234-4f9d-a036-c7c3daac9307).bibtex?download=true},
oldkey = {Corneli2017},
owner = {Moritz},
}
@inproceedings{CohlSYG17,
title = {Semantic Preserving Bijective Mappings of Mathematical Formulae Between Document Preparation Systems and Computer Algebra Systems},
volume = {10383},
url = {http://hcohl.sdf.org/drmfcas.pdf},
doi = {10.1007/978-3-319-62075-6_9},
series = {Lecture Notes in Computer Science},
pages = {115--131},
booktitle = {Intelligent Computer Mathematics - 10th International Conference, {CICM}2017, Edinburgh, Uk, July 17-21, 2017, Proceedings},
publisher = {Springer},
author = {Cohl, Howard S. and Schubotz, Moritz and Youssef, Abdou and Greiner-Petter, André and Gerhard, Jürgen and Saunders, Bonita V. and {McClain}, Marjorie A. and Bang, Joon and Chen, Kevin},
editor = {Geuvers, Herman and England, Matthew and Hasan, Osman and Rabe, Florian and Teschke, Olaf},
date = {2017},
note = {tex.ids: {CohlSYG}17
tex.biburl: https://dblp.org/rec/bib/conf/mkm/{CohlSYGGSMBC}17
tex.oldkey: Cohl2017
tex.owner: Moritz
tex.url\_orig: https://doi.org/10.1007/978-3-319-62075-6\_9},
keywords = {!ms, !ms\_author, !ms\_cv, !ms\_preprint, \#nosource, jabref\_imp2, old\_tex\_field\_preprint},
ids = {CohlSYG17},
biburl = {https://dblp.org/rec/bib/conf/mkm/CohlSYGGSMBC17},
oldkey = {Cohl2017},
owner = {Moritz},
url_orig = {https://doi.org/10.1007/978-3-319-62075-6_9},
}
@book{Schubotz17,
title = {Augmenting Mathematical Formulae for More Effective Querying \& Efficient Presentation},
isbn = {978-3-7450-6208-3},
url = {https://www.epubli.de/preview/publication/64471},
publisher = {Epubli Verlag, Berlin},
author = {Schubotz, Moritz},
date = {2017},
doi = {10.14279/depositonce-6034},
note = {tex.oldkey: dis
tex.owner: Moritz},
keywords = {!ms, !ms\_author, \#nosource, jabref\_imp2},
oldkey = {dis},
owner = {Moritz},
}
@unpublished{Schubotz16CICM,
location = {Bialystok, Poland},
title = {Implicit Content Dictionaries in the {NIST} Digital Repository of Mathematical Formulae},
url = {http://cicm-conference.org/2016/cicm.php?event=&menu=talks#O3},
note = {{OpenMath} Workshop of the 9th Conference on Intelligent Computer Mathematics {CICM} 2016},
author = {Schubotz, Moritz},
urldate = {2016-10-03},
date = {2016-07-25},
keywords = {!ms, !ms\_author, \#nosource, jabref\_imp2},
jabref-groups = {phd-m},
oldkey = {schubotz16implCd},
owner = {Moritz},
}
@report{Schubotz16NTCIRData,
title = {Identifier Gold Standard for {NTCIR} 11 Math Wikipedia Dataset},
url = {https://depositonce.tu-berlin.de/handle/11303/6571},
abstract = {Mathematical formulae are essential in science, but face challenges of ambiguity, due to the use of a small number of identifiers to represent an immense number of concepts. Corresponding to word sense disambiguation in Natural Language Processing, we disambiguate mathematical identifiers. By regarding formulae and natural text as one monolithic information source, we are able to extract the semantics of identifiers in a process we term Mathematical Language Processing ({MLP}). As scientific communities tend to establish standard (identifier) notations, we use the document domain to infer the actual meaning of an identifier. Therefore, we adapt the software development concept of namespaces to mathematical notation. Thus, we learn namespace definitions by clustering the {MLP} results and mapping those clusters to subject classification schemata. In addition, this gives fundamental insights into the usage of mathematical notations in science, technology, engineering and mathematics. Our gold standard based evaluation shows that {MLP} extracts relevant identifier-definitions. Moreover, we discover that identifier namespaces improve the performance of automated identifier-definition extraction, and elevate it to a level that cannot be achieved within the document context alone.},
author = {Schubotz, Moritz},
editora = {{Technische Universität Berlin} and Howard, S. Cohl},
editoratype = {collaborator},
urldate = {2020-04-11},
date = {2016-07-18},
langid = {english},
doi = {10.14279/depositonce-6064},
note = {tex.ids: 11303\_6571, Schubotz16NTCIRData
tex.oldkey: {dataIdentifierGold}16
tex.owner: Moritz
tex.publisher: Technische Universität Berlin},
keywords = {!ms, !ms\_author, \#nosource, jabref\_imp2},
ids = {11303_6571, Schubotz16NTCIRData},
oldkey = {dataIdentifierGold16},
owner = {Moritz},
publisher = {Technische Universität Berlin},
}
@inproceedings{SchubotzGLC16,
location = {New York, {NY}, {USA}},
title = {Semantification of Identifiers in Mathematics for Better Math Information Retrieval},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-4069-4},
doi = {10.1145/2911451.2911503},
series = {{SIGIR} '16},
shorttitle = {Semantification of Identifiers in Mathematics for {MIR}},
abstract = {Mathematical formulae are essential in science, but face challenges of ambiguity, due to the use of a small number of identifiers to represent an immense number of concepts. Corresponding to word sense disambiguation in Natural Language Processing, we disambiguate mathematical identifiers. By regarding formulae and natural text as one monolithic information source, we are able to extract the semantics of identifiers in a process we term Mathematical Language Processing ({MLP}). As scientific communities tend to establish standard (identifier) notations, we use the document domain to infer the actual meaning of an identifier. Therefore, we adapt the software development concept of namespaces to mathematical notation. Thus, we learn namespace definitions by clustering the {MLP} results and mapping those clusters to subject classification schemata. In addition, this gives fundamental insights into the usage of mathematical notations in science, technology, engineering and mathematics. Our gold standard based evaluation shows that {MLP} extracts relevant identifier-definitions. Moreover, we discover that identifier namespaces improve the performance of automated identifier-definition extraction, and elevate it to a level that cannot be achieved within the document context alone.},
pages = {135--144},
booktitle = {Proceedings of the 39th International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval},
publisher = {{ACM}},
author = {Schubotz, Moritz and Grigorev, Alexey and Leich, Marcus and Cohl, Howard S. and Meuschke, Norman and Gipp, Bela and Youssef, Abdou S. and Markl, Volker},
date = {2016-07},
note = {tex.ids: {SchubotzGLC}16a, {disSigir}16
tex.core: A*
tex.jabref-groups: phd-m
tex.numpages: 10
tex.oldkey: Schubotz16
tex.owner: Moritz
tex.preprint: https://ag-gipp.github.io/bib/preprints/schubotz16.pdf
tex.topic: mathir
place: Pisa, Italy},
keywords = {!bg, !bg\_author, !bg\_preprint, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, {MIR}, {MLP}, definitions, identifiers, jabref\_imp1\_clean, mathematical information retrieval, mathematical knowledge management, mathematical language processing, mathematics, mathoid, mathosphere, namespace discovery, old\_tex\_field\_preprint, wikipedia},
ids = {SchubotzGLC16a, disSigir16},
core = {A*},
jabref-groups = {phd-m},
numpages = {10},
oldkey = {Schubotz16},
owner = {Moritz},
preprint = {https://ag-gipp.github.io/bib/preprints/schubotz16.pdf},
topic = {mathir},
}
@inproceedings{SchwarzerSMB16,
location = {Newark, New Jersey, {USA}},
title = {Evaluating Link-based Recommendations for Wikipedia},
rights = {Creative Commons Attribution 4.0 International License ({CC}-{BY})},
isbn = {978-1-4503-4229-2},
doi = {10.1145/2910896.2910908},
abstract = {Literature recommender systems support users in filtering the vast and increasing number of documents in digital libraries and on the Web. For academic literature, research has proven the ability of citation-based document similarity measures, such as Co-Citation ({CoCit}), or Co-Citation Proximity Analysis ({CPA}) to improve recommendation quality. In this paper, we report on the first large-scale investigation of the performance of the {CPA} approach in generating literature recommendations for Wikipedia, which is fundamentally different from the academic literature domain. We analyze links instead of citations to generate article recommendations. We evaluate {CPA}, {CoCit}, and the Apache Lucene {MoreLikeThis} ({MLT}) function, which represents a traditional text-based similarity measure. We use two datasets of 779,716 and 2.57 million Wikipedia articles, the Big Data processing framework Apache Flink, and a ten-node computing cluster. To enable our large-scale evaluation, we derive two quasi-gold standards from the links in Wikipedia's "See also" sections and a comprehensive Wikipedia clickstream dataset.
Our results show that the citation-based measures {CPA} and {CoCit} have complementary strengths compared to the text-based {MLT} measure. While {MLT} performs well in identifying narrowly similar articles that share similar words and structure, the citation- based measures are better able to identify topically related information, such as information on the city of a certain university or other technical universities in the region. The {CPA} approach, which consistently outperformed {CoCit}, is better suited for identifying a broader spectrum of related articles, as well as popular articles that typically exhibit a higher quality. Additional benefits of the {CPA} approach are its lower runtime requirements and its language-independence that allows for a cross-language retrieval of articles. We present a manual analysis of exemplary articles to demonstrate and discuss our findings. The raw data and source code of our study, together with a manual on how to use them, are openly available at: https://github.com/wikimedia/citolytics},
pages = {191--200},
booktitle = {Proceedings of the 16th Annual International {ACM}/{IEEE} Joint Conference on Digital Libraries ({JCDL})},
publisher = {{ACM}},
author = {Schwarzer, Malte and Schubotz, Moritz and Meuschke, Norman and Breitinger, Corinna and Markl, Volker and Gipp, Bela},
date = {2016-06},
note = {tex.ids: {SchwarzerSMB}16a
tex.core: A*
tex.oldkey: Schwarzer2016
tex.preprint: https://ag-gipp.github.io/bib/preprints/schwarzer2016.pdf
tex.topic: rec},
keywords = {!bg, !bg\_author, !bg\_preprint, !cb, !cb\_author, !ms, !ms\_author, !ms\_cv, !ms\_preprint, !nm, !nm\_author, !nm\_preprint, jabref\_imp1\_clean, old\_tex\_field\_preprint},
ids = {SchwarzerSMB16a},
core = {A*},
oldkey = {Schwarzer2016},
preprint = {https://ag-gipp.github.io/bib/preprints/schwarzer2016.pdf},