-
Notifications
You must be signed in to change notification settings - Fork 0
/
bib.bib
executable file
·981 lines (881 loc) · 37.2 KB
/
bib.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
% LOGIC
@inproceedings{Pnueli:1977:TLP:1382431.1382534,
author = {Pnueli, Amir},
title = {The Temporal Logic of Programs},
booktitle = {Proceedings of the 18th Annual Symposium on Foundations of Computer Science},
series = {SFCS '77},
year = {1977},
pages = {46--57},
numpages = {12},
url = {https://doi.org/10.1109/SFCS.1977.32},
doi = {10.1109/SFCS.1977.32},
acmid = {1382534},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}
@inproceedings{de2013linear,
title={Linear Temporal Logic and Linear Dynamic Logic on Finite Traces.},
author={De Giacomo, Giuseppe and Vardi, Moshe Y.},
booktitle={IJCAI},
volume={13},
pages={854--860},
year={2013}
}
@InCollection{sep-logic-modal,
author = {Garson, James},
title = {Modal Logic},
booktitle = {The Stanford Encyclopedia of Philosophy},
editor = {Edward N. Zalta},
howpublished = {\url{https://plato.stanford.edu/archives/spr2016/entries/logic-modal/}},
year = {2016},
edition = {Spring 2016},
publisher = {Metaphysics Research Lab, Stanford University}
}
@InCollection{sep-logic-classical,
author = {Shapiro, Stewart and Kouri Kissel, Teresa},
title = {Classical Logic},
booktitle = {The Stanford Encyclopedia of Philosophy},
editor = {Edward N. Zalta},
howpublished = {\url{https://plato.stanford.edu/archives/spr2018/entries/logic-classical/}},
year = {2018},
edition = {Spring 2018},
publisher = {Metaphysics Research Lab, Stanford University}
}
@InCollection{sep-logic-dynamic,
author = {Troquard, Nicolas and Balbiani, Philippe},
title = {Propositional Dynamic Logic},
booktitle = {The Stanford Encyclopedia of Philosophy},
editor = {Edward N. Zalta},
howpublished = {\url{https://plato.stanford.edu/archives/spr2015/entries/logic-dynamic/}},
year = {2015},
edition = {Spring 2015},
publisher = {Metaphysics Research Lab, Stanford University}
}
@InCollection{sep-logic-temporal,
author = {Goranko, Valentin and Galton, Antony},
title = {Temporal Logic},
booktitle = {The Stanford Encyclopedia of Philosophy},
editor = {Edward N. Zalta},
howpublished = {\url{https://plato.stanford.edu/archives/win2015/entries/logic-temporal/}},
year = {2015},
edition = {Winter 2015},
publisher = {Metaphysics Research Lab, Stanford University}
}
@book{Clarke:2000:MC:332656,
author = {Clarke,Jr., Edmund M. and Grumberg, Orna and Peled, Doron A.},
title = {Model Checking},
year = {1999},
isbn = {0-262-03270-8},
publisher = {MIT Press},
address = {Cambridge, MA, USA},
}
@book{Hopcroft:2000:IAT:557657,
author = {Hopcroft, John E. and Motwani, Rajeev and Rotwani and Ullman, Jeffrey D.},
title = {Introduction to Automata Theory, Languages and Computability},
year = {2000},
isbn = {0201441241},
edition = {2nd},
publisher = {Addison-Wesley Longman Publishing Co., Inc.},
address = {Boston, MA, USA},
}
@article{Sistla:1985:CPL:3828.3837,
author = {Sistla, A. P. and Clarke, E. M.},
title = {The Complexity of Propositional Linear Temporal Logics},
journal = {J. ACM},
issue_date = {July 1985},
volume = {32},
number = {3},
month = jul,
year = {1985},
issn = {0004-5411},
pages = {733--749},
numpages = {17},
url = {http://doi.acm.org/10.1145/3828.3837},
doi = {10.1145/3828.3837},
acmid = {3837},
publisher = {ACM},
address = {New York, NY, USA},
}
@article{FISCHER1979194,
title = "Propositional dynamic logic of regular programs",
journal = "Journal of Computer and System Sciences",
volume = "18",
number = "2",
pages = "194 - 211",
year = "1979",
issn = "0022-0000",
doi = "https://doi.org/10.1016/0022-0000(79)90046-1",
url = "http://www.sciencedirect.com/science/article/pii/0022000079900461",
author = "Michael J. Fischer and Richard E. Ladner"
}
@techreport{Pratt:1976:SCF:889769,
author = {Pratt, V. R.},
title = {SEMANTICAL CONSIDERATIONS ON FLOYD-HOARE LOGIC},
year = {1976},
source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Amitai%3AMIT-LCS%2F%2FMIT%2FLCS%2FTR-168},
publisher = {Massachusetts Institute of Technology},
address = {Cambridge, MA, USA},
}
@INPROCEEDINGS{4567894,
author={V. R. Pratt},
booktitle={17th Annual Symposium on Foundations of Computer Science (sfcs 1976)},
title={Semantical consideration on floyo-hoare logic},
year={1976},
volume={},
number={},
pages={109-121},
keywords={Boolean functions;Calculus;Logic;Mathematics;Testing},
doi={10.1109/SFCS.1976.27},
ISSN={0272-5428},
month={Oct},}
@article{PRATT1980231,
title = "A near-optimal method for reasoning about action",
journal = "Journal of Computer and System Sciences",
volume = "20",
number = "2",
pages = "231 - 254",
year = "1980",
issn = "0022-0000",
doi = "https://doi.org/10.1016/0022-0000(80)90061-6",
url = "http://www.sciencedirect.com/science/article/pii/0022000080900616",
author = "Vaughan R. Pratt"
}
@article{deGiacomo:2000:CDM:359243.359271,
author = {De Giacomo, Giuseppe and Massacci, Fabio},
title = {Combining Deduction and Model Checking into Tableaux and Algorithms for converse-PDL},
journal = {Inf. Comput.},
issue_date = {Oct 2000},
volume = {162},
number = {1/2},
month = oct,
year = {2000},
issn = {0890-5401},
pages = {117--137},
numpages = {21},
url = {http://dl.acm.org/citation.cfm?id=359243.359271},
acmid = {359271},
publisher = {Academic Press, Inc.},
address = {Duluth, MN, USA},
}
@inproceedings{Pesic:2006:DAF:2135571.2135592,
author = {Pesic, M. and van der Aalst, W. M. P.},
title = {A Declarative Approach for Flexible Business Processes Management},
booktitle = {Proceedings of the 2006 International Conference on Business Process Management Workshops},
series = {BPM'06},
year = {2006},
isbn = {3-540-38444-8, 978-3-540-38444-1},
location = {Vienna, Austria},
pages = {169--180},
numpages = {12},
url = {http://dx.doi.org/10.1007/11837862_18},
doi = {10.1007/11837862_18},
acmid = {2135592},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
keywords = {declarative model specification, dynamic workflow, flexibility, temporal logic, workflow management},
}
@inproceedings{DeGiacomo:2014:RLF:2893873.2894033,
author = {De Giacomo, Giuseppe and De Masellis, Riccardo and Montali, Marco},
title = {Reasoning on LTL on Finite Traces: Insensitivity to Infiniteness},
booktitle = {Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence},
series = {AAAI'14},
year = {2014},
location = {Qu\&\#233;bec City, Qu\&\#233;bec, Canada},
pages = {1027--1033},
numpages = {7},
url = {http://dl.acm.org/citation.cfm?id=2893873.2894033},
acmid = {2894033},
publisher = {AAAI Press},
}
@techreport{Gabbay:1997:TAF:903586,
author = {Gabbay, D. and Pnueli, A. and Shelah, S. and Stavi, J.},
title = {On the Temporal Analysis of Fairness},
year = {1997},
source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Aweizmann_il%3Ancstrl.weizmann_il%2F%2FCS97-13},
publisher = {Weizmann Science Press of Israel},
address = {Jerusalem, Israel, Israel},
}
@article{Wolper1981TemporalLC,
title={Temporal logic can be more expressive},
author={Pierre Wolper},
journal={22nd Annual Symposium on Foundations of Computer Science (sfcs 1981)},
year={1981},
pages={340-348}
}
@article{doi:10.1002/malq.19600060105,
author = {Richard J. B\"uchi},
title = {Weak Second‐Order Arithmetic and Finite Automata},
journal = {Mathematical Logic Quarterly},
volume = {6},
number = {1‐6},
pages = {66-92},
doi = {10.1002/malq.19600060105},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/malq.19600060105},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/malq.19600060105},
year={1960}
}
@article{10.2307/1993511,
ISSN = {00029947},
URL = {http://www.jstor.org/stable/1993511},
author = {Calvin C. Elgot},
journal = {Transactions of the American Mathematical Society},
number = {1},
pages = {21--51},
publisher = {American Mathematical Society},
title = {Decision Problems of Finite Automata Design and Related Arithmetics},
volume = {98},
year = {1961}
}
@Article{zbMATH03186872,
Author = {B.A. {Trakhtenbrot}},
Title = {{Finite automata and the logic of single-place predicates.}},
FJournal = {{Soviet Physics. Doklady}},
Journal = {{Sov. Phys., Dokl.}},
ISSN = {0038-5689},
Volume = {6},
Pages = {753--755},
Year = {1961},
Publisher = {Consultants Bureau, New York},
Language = {English},
MSC2010 = {03-XX 68Qxx},
Zbl = {0115.00702}
}
@article{THOMAS1979148,
title = "Star-free regular sets of ω-sequences",
journal = "Information and Control",
volume = "42",
number = "2",
pages = "148 - 156",
year = "1979",
issn = "0019-9958",
doi = "https://doi.org/10.1016/S0019-9958(79)90629-6",
url = "http://www.sciencedirect.com/science/article/pii/S0019995879906296",
author = "Wolfgang Thomas"
}
@book{Khoussainov:2001:ATA:558914,
author = {Khoussainov, Bakhadyr and Nerode, Anil},
title = {Automata Theory and Its Applications},
year = {2001},
isbn = {3764342072},
publisher = {Birkhauser Boston, Inc.},
address = {Secaucus, NJ, USA},
}
@article{Rabin:1959:FAD:1661907.1661909,
author = {Rabin, M. O. and Scott, D.},
title = {Finite Automata and Their Decision Problems},
journal = {IBM J. Res. Dev.},
issue_date = {April 1959},
volume = {3},
number = {2},
month = apr,
year = {1959},
issn = {0018-8646},
pages = {114--125},
numpages = {12},
url = {http://dx.doi.org/10.1147/rd.32.0114},
doi = {10.1147/rd.32.0114},
acmid = {1661909},
publisher = {IBM Corp.},
address = {Riverton, NJ, USA},
}
@inproceedings{DeGiacomo:2015:SLL:2832415.2832466,
author = {De Giacomo, Giuseppe and Vardi, Moshe Y.},
title = {Synthesis for LTL and LDL on Finite Traces},
booktitle = {Proceedings of the 24th International Conference on Artificial Intelligence},
series = {IJCAI'15},
year = {2015},
isbn = {978-1-57735-738-4},
location = {Buenos Aires, Argentina},
pages = {1558--1564},
numpages = {7},
url = {http://dl.acm.org/citation.cfm?id=2832415.2832466},
acmid = {2832466},
publisher = {AAAI Press},
}
@article{Kupferman:2001:MCS:569028.569032,
author = {Kupferman, Orna and Y. Vardi, Moshe},
title = {Model Checking of Safety Properties},
journal = {Form. Methods Syst. Des.},
issue_date = {November 2001},
volume = {19},
number = {3},
month = oct,
year = {2001},
issn = {0925-9856},
pages = {291--314},
numpages = {24},
url = {https://doi.org/10.1023/A:1011254632723},
doi = {10.1023/A:1011254632723},
acmid = {569032},
publisher = {Kluwer Academic Publishers},
address = {Hingham, MA, USA},
keywords = {automata, model checking, safety properties},
}
@inproceedings{Lacerda:2015:OPG:2832415.2832470,
author = {Lacerda, Bruno and Parker, David and Hawes, Nick},
title = {Optimal Policy Generation for Partially Satisfiable Co-safe LTL Specifications},
booktitle = {Proceedings of the 24th International Conference on Artificial Intelligence},
series = {IJCAI'15},
year = {2015},
isbn = {978-1-57735-738-4},
location = {Buenos Aires, Argentina},
pages = {1587--1593},
numpages = {7},
url = {http://dl.acm.org/citation.cfm?id=2832415.2832470},
acmid = {2832470},
publisher = {AAAI Press},
}
% RL
@Article{Sutton1988,
author="Sutton, Richard S.",
title="Learning to predict by the methods of temporal differences",
journal="Machine Learning",
year="1988",
month="Aug",
day="01",
volume="3",
number="1",
pages="9--44",
abstract="This article introduces a class of incremental learning procedures specialized for prediction-that is, for using past experience with an incompletely known system to predict its future behavior. Whereas conventional prediction-learning methods assign credit by means of the difference between predicted and actual outcomes, the new methods assign credit by means of the difference between temporally successive predictions. Although such temporal-difference methods have been used in Samuel's checker player, Holland's bucket brigade, and the author's Adaptive Heuristic Critic, they have remained poorly understood. Here we prove their convergence and optimality for special cases and relate them to supervised-learning methods. For most real-world prediction problems, temporal-difference methods require less memory and less peak computation than conventional methods and they produce more accurate predictions. We argue that most problems to which supervised learning is currently applied are really prediction problems of the sort to which temporal-difference methods can be applied to advantage.",
issn="1573-0565",
doi="10.1007/BF00115009",
url="https://doi.org/10.1007/BF00115009"
}
%REINFORCEMENT LEARNING
@book{Sutton:1998:IRL:551283,
author = {Sutton, Richard S. and Barto, Andrew G.},
title = {Introduction to Reinforcement Learning},
year = {1998},
isbn = {0262193981},
edition = {1st},
publisher = {MIT Press},
address = {Cambridge, MA, USA},
}
@inproceedings{Ng:1999:PIU:645528.657613,
author = {Ng, Andrew Y. and Harada, Daishi and Russell, Stuart J.},
title = {Policy Invariance Under Reward Transformations: Theory and Application to Reward Shaping},
booktitle = {Proceedings of the Sixteenth International Conference on Machine Learning},
series = {ICML '99},
year = {1999},
isbn = {1-55860-612-2},
pages = {278--287},
numpages = {10},
url = {http://dl.acm.org/citation.cfm?id=645528.657613},
acmid = {657613},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
}
@inproceedings{Grzes:2017:RSE:3091125.3091208,
author = {Grze\'{s}, Marek},
title = {Reward Shaping in Episodic Reinforcement Learning},
booktitle = {Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems},
series = {AAMAS '17},
year = {2017},
location = {S\&\#227;o Paulo, Brazil},
pages = {565--573},
numpages = {9},
url = {http://dl.acm.org/citation.cfm?id=3091125.3091208},
acmid = {3091208},
publisher = {International Foundation for Autonomous Agents and Multiagent Systems},
address = {Richland, SC},
keywords = {multiagent learning, potential-based reward shaping, reinforcement learning, reward shaping, reward structures for learning},
}
@inproceedings{Devlin:2012:DPR:2343576.2343638,
author = {Devlin, Sam and Kudenko, Daniel},
title = {Dynamic Potential-based Reward Shaping},
booktitle = {Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems - Volume 1},
series = {AAMAS '12},
year = {2012},
isbn = {0-9817381-1-7, 978-0-9817381-1-6},
location = {Valencia, Spain},
pages = {433--440},
numpages = {8},
url = {http://dl.acm.org/citation.cfm?id=2343576.2343638},
acmid = {2343638},
publisher = {International Foundation for Autonomous Agents and Multiagent Systems},
address = {Richland, SC},
keywords = {reinforcement learning, reward shaping},
}
@phdthesis{grzes2010improving,
title={Improving exploration in reinforcement learning through domain knowledge and parameter analysis},
author={Grzes, Marek},
year={2010},
school={University of York}
}
@inproceedings{GuptaPerformanceCO,
title={Performance Comparison of Sarsa(λ) and Watkin’s Q(λ) Algorithms},
author={Karan M. Gupta}
}
@inproceedings{DBLP:conf/icmla/GrzesK09,
author = {Marek Grzes and
Daniel Kudenko},
title = {Theoretical and Empirical Analysis of Reward Shaping in Reinforcement
Learning},
booktitle = {{ICMLA}},
pages = {337--344},
publisher = {{IEEE} Computer Society},
year = {2009}
}
@article{Singh:1996:RLR:225667.225679,
author = {Singh, Satinder P. and Sutton, Richard S.},
title = {Reinforcement Learning with Replacing Eligibility Traces},
journal = {Mach. Learn.},
issue_date = {Jan./Feb./March 1996},
volume = {22},
number = {1-3},
month = jan,
year = {1996},
issn = {0885-6125},
pages = {123--158},
numpages = {36},
url = {http://dx.doi.org/10.1007/BF00114726},
doi = {10.1007/BF00114726},
acmid = {225679},
publisher = {Kluwer Academic Publishers},
address = {Hingham, MA, USA},
keywords = {CMAC, Markov chain, Monte Carlo method, eligibility trace, reinforcement learning, temporal difference learning},
}
@phdthesis{watkins1989learning,
title={Learning from delayed rewards},
author={Watkins, Christopher John Cornish Hellaby},
year={1989}
}
@Article{Watkins1992,
author="Watkins, Christopher J. C. H.
and Dayan, Peter",
title="Q-learning",
journal="Machine Learning",
year="1992",
month="May",
day="01",
volume="8",
number="3",
pages="279--292",
abstract="Q-learning (Watkins, 1989) is a simple way for agents to learn how to act optimally in controlled Markovian domains. It amounts to an incremental method for dynamic programming which imposes limited computational demands. It works by successively improving its evaluations of the quality of particular actions at particular states.",
issn="1573-0565",
doi="10.1007/BF00992698",
url="https://doi.org/10.1007/BF00992698"
}
@Book{Bellman:1957,
author = "Bellman, Richard",
title = "Dynamic Programming",
publisher = "Princeton University Press",
year = "1957",
address = "Princeton, NJ, USA",
edition = "1",
url = "http://books.google.com/books?id=fyVtp3EMxasC&pg=PR5&dq=dynamic+programming+richard+e+bellman&client=firefox-a#v=onepage&q=dynamic%20programming%20richard%20e%20bellman&f=false",
bib2html_rescat = "General RL",
}
@article{Brafman2017SpecifyingNR,
title={Specifying Non-Markovian Rewards in MDPs Using LDL on Finite Traces (Preliminary Version)},
author={Ronen I. Brafman and Giuseppe De Giacomo and Fabio Patrizi},
journal={CoRR},
year={2017},
volume={abs/1706.08100}
}
@paper{AAAI1817342,
author = {Ronen Brafman and Giuseppe De Giacomo and Fabio Patrizi},
title = {LTLf/LDLf Non-Markovian Rewards},
conference = {AAAI Conference on Artificial Intelligence},
year = {2018},
keywords = {MDPs; non-Markovian Rewards; LTLf/LDLf},
abstract = {In Markov Decision Processes (MDPs), the reward obtained in a state is Markovian, i.e., depends on the last state and action. This dependency makes it difficult to reward more interesting long-term behaviors, such as always closing a door after it has been opened, or providing coffee only following a request. Extending MDPs to handle non-Markovian reward functions was the subject of two previous lines of work. Both use LTL variants to specify the reward function and then compile the new model back into a Markovian model. Building on recent progress in temporal logics over finite traces, we adopt LDLf for specifying non-Markovian rewards and provide an elegant automata construction for building a Markovian model, which extends that of previous work and offers strong minimality and compositionality guarantees.},
url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/17342}
}
@inproceedings{Baier:2008:BCP:1620270.1620321,
author = {Baier, Jorge A. and Fritz, Christian and Bienvenu, Meghyn and McIlraith, Sheila A},
title = {Beyond Classical Planning: Procedural Control Knowledge and Preferences in State-of-the-art Planners},
booktitle = {Proceedings of the 23rd National Conference on Artificial Intelligence - Volume 3},
series = {AAAI'08},
year = {2008},
isbn = {978-1-57735-368-3},
location = {Chicago, Illinois},
pages = {1509--1512},
numpages = {4},
url = {http://dl.acm.org/citation.cfm?id=1620270.1620321},
acmid = {1620321},
publisher = {AAAI Press},
}
@INPROCEEDINGS{5381523,
author={M. Grzes and D. Kudenko},
booktitle={2009 International Conference on Machine Learning and Applications},
title={Theoretical and Empirical Analysis of Reward Shaping in Reinforcement Learning},
year={2009},
volume={},
number={},
pages={337-344},
keywords={Markov processes;decision theory;learning (artificial intelligence);Markov decision process discount factor;knowledge-based approaches;reinforcement learning;reward shaping;state space explosion;temporal credit assignment problem;Application software;Artificial intelligence;Computer science;Explosions;Machine learning;Optimal control;Performance analysis;Scalability;Shape control;State-space methods;heuristics;reinforcement learning;reward shaping},
doi={10.1109/ICMLA.2009.33},
ISSN={},
month={Dec},}
@INPROCEEDINGS{Dietterich98themaxq,
author = {Thomas G. Dietterich},
title = {The MAXQ Method for Hierarchical Reinforcement Learning},
booktitle = {In Proceedings of the Fifteenth International Conference on Machine Learning},
year = {1998},
pages = {118--126},
publisher = {Morgan Kaufmann}
}
% state-of-the-art
@article{icarte2018teaching,
title={Teaching Multiple Tasks to an RL Agent using LTL},
author={Icarte, Rodrigo Toro and Klassen, Toryn Q and Valenzano, Richard and McIlraith, Sheila A},
year={2018}
}
@inproceedings{camacho2018ltl,
title={LTL Realizability via Safety and Reachability Games.},
author={Camacho, Alberto and Muise, Christian J and Baier, Jorge A and McIlraith, Sheila A},
booktitle={IJCAI},
pages={4683--4691},
year={2018}
}
@inproceedings{camacho2018finite,
title={Finite LTL Synthesis as Planning.},
author={Camacho, Alberto and Baier, Jorge A and Muise, Christian J and McIlraith, Sheila A},
booktitle={ICAPS},
pages={29--38},
year={2018}
}
@inproceedings{bacchus1996rewarding,
title={Rewarding behaviors},
author={Bacchus, Fahiem and Boutilier, Craig and Grove, Adam},
booktitle={PROCEEDINGS OF THE NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE},
pages={1160--1167},
year={1996}
}
@article{ThiebauxGSPK06,
Author = {Sylvie Thi{\'{e}}baux and Charles Gretton and John K. Slaney and David Price and Froduald Kabanza},
Journal = {J. Artif. Intell. Res. {(JAIR)}},
Pages = {17--74},
Title = {Decision-Theoretic Planning with non-Markovian Rewards},
Volume = 25,
Year = 2006}
@inproceedings{gretton2014more,
title={A More Expressive Behavioral Logic for Decision-Theoretic Planning},
author={Gretton, Charles},
booktitle={Pacific Rim International Conference on Artificial Intelligence},
pages={13--25},
year={2014},
organization={Springer}
}
@InProceedings{pmlr-v70-andreas17a,
title = {Modular Multitask Reinforcement Learning with Policy Sketches},
author = {Jacob Andreas and Dan Klein and Sergey Levine},
booktitle = {Proceedings of the 34th International Conference on Machine Learning},
pages = {166--175},
year = {2017},
editor = {Doina Precup and Yee Whye Teh},
volume = {70},
series = {Proceedings of Machine Learning Research},
address = {International Convention Centre, Sydney, Australia},
month = {06--11 Aug},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v70/andreas17a/andreas17a.pdf},
url = {http://proceedings.mlr.press/v70/andreas17a.html},
abstract = {We describe a framework for multitask deep reinforcement learning guided by policy sketches. Sketches annotate tasks with sequences of named subtasks, providing information about high-level structural relationships among tasks but not how to implement them—specifically not providing the detailed guidance used by much previous work on learning policy abstractions for RL (e.g. intermediate rewards, subtask completion signals, or intrinsic motivations). To learn from sketches, we present a model that associates every subtask with a modular subpolicy, and jointly maximizes reward over full task-specific policies by tying parameters across shared subpolicies. Optimization is accomplished via a decoupled actor–critic training objective that facilitates learning common behaviors from multiple dissimilar reward functions. We evaluate the effectiveness of our approach in three environments featuring both discrete and continuous control, and with sparse rewards that can be obtained only after completing a number of high-level subgoals. Experiments show that using our approach to learn policies guided by sketches gives better performance than existing techniques for learning task-specific or shared policies, while naturally inducing a library of interpretable primitive behaviors that can be recombined to rapidly adapt to new tasks.}
}
@InProceedings{mona1998,
author = {Jacob Elgaard and Nils Klarlund and Anders M{\o}ller},
title = {{MONA} 1.x: new techniques for {WS1S} and {WS2S}},
booktitle = {Proc. 10th International Conference on
Computer-Aided Verification (CAV)},
year = {1998},
month = {June/July},
volume = {1427},
series = {LNCS},
publisher = {Springer-Verlag},
pages = {516--520},
}
@MastersThesis{progress2000,
author = {Anders M\o{}ller},
title = {{MONA}, {DSD}, and {\tt <bigwig>}},
school = {Aarhus University},
month = {May},
year = {2000},
note = {BRICS Ph.D.\ Progress Report},
}
@Manual{monamanual2001,
author = {Nils Klarlund and Anders M{\o}ller},
title = {{MONA Version 1.4 User Manual}},
organization = {BRICS, Department of Computer Science, Aarhus University},
month = {January},
year = {2001},
note = {Notes Series NS-01-1.
Available from \texttt{\small http://www.brics.dk/mona/}.
Revision of BRICS NS-98-3}
}
@inproceedings{cecconi2018interestingness,
title={Interestingness of Traces in Declarative Process Mining: The Janus LTLpf Approach},
author={Cecconi, Alessio and Di Ciccio, Claudio and De Giacomo, Giuseppe and Mendling, Jan},
booktitle={International Conference on Business Process Management},
pages={121--138},
year={2018},
organization={Springer}
}
@article{markey2003temporal,
title={Temporal logic with past is exponentially more succinct},
author={Markey, Nicolas},
journal={EATCS Bulletin},
volume={79},
pages={122--128},
year={2003},
publisher={European Association for Theoretical Computer Science}
}
@misc{zpv2018,
title={First-Order vs. Second-Order for LTLf-to-Automata: An Extended Abstract},
author={Zhu, Shufang and Pu Geguang and Vardi, Moshe Y.},
howpublished={Women in Logic},
year={2018}
}
@inproceedings{gabbay1980temporal,
title={On the temporal analysis of fairness},
author={Gabbay, Dov and Pnueli, Amir and Shelah, Saharon and Stavi, Jonathan},
booktitle={Proceedings of the 7th ACM SIGPLAN-SIGACT symposium on Principles of programming languages},
pages={163--173},
year={1980},
organization={ACM}
}
@phdthesis{Kamp1968,
author = {Hans Kamp},
title = {Tense Logic and the Theory of Linear Order},
school = {University of California Los Angeles},
year = {1968},
note = {Published as Johan Anthony Willem Kamp},
url = {http://www.ims.uni-stuttgart.de/archiv/kamp/files/1968.kamp.thesis.pdf}
}
@inproceedings{biehl1996algorithms,
title={Algorithms for guided tree automata},
author={Biehl, Morten and Klarlund, Nils and Rauhe, Theis},
booktitle={International Workshop on Implementing Automata},
pages={6--25},
year={1996},
organization={Springer}
}
@inproceedings{henriksen1995mona,
title={Mona: Monadic second-order logic in practice},
author={Henriksen, Jesper G and Jensen, Jakob and J{\o}rgensen, Michael and Klarlund, Nils and Paige, Robert and Rauhe, Theis and Sandholm, Anders},
booktitle={International Workshop on Tools and Algorithms for the Construction and Analysis of Systems},
pages={89--110},
year={1995},
organization={Springer}
}
@incollection{montali2010declarative,
title={Declarative process mining},
author={Montali, Marco},
booktitle={Specification and verification of declarative open interaction models},
pages={343--365},
year={2010},
publisher={Springer}
}
@phdthesis{pesic2008constraint,
title = "Constraint-based workflow management systems: shifting control to users",
abstract = "Many organizations use information technology to support various aspects of their business processes: the operational aspect, collaboration between employees, etc. Workow management systems aim at supporting the operational aspect of complex business processes by using process models to automate the ordering of activities (i.e., ow of work). The term `support' here relates to the ability of workow management systems to control the execution of business processes. Contemporary workow management systems lack exibility, i.e., the system controls in detail how employees should execute business processes. While workow management systems deal well with predictable business processes, they are not able to handle unforeseen situations, which occur often in real-life business processes. Although employees mostly have the knowledge and experience that enables them to deal with exceptional situations, they are not able to apply the right action because the system enforces the standard procedure of work. This often has various undesired consequences: work is done `outside' the system, work cannot be done in the appropriate way, dissatisfaction of employees, resistance towards the system, etc. As a result, workow management systems cannot be used properly if it is necessary that employees control the execution of business processes. This thesis proposes a new approach to workow management systems that can facilitate contemporary business processes in a better way by enabling a better balance between exibility and support. As opposed to traditional approaches which use procedural process models to explicitly (i.e., step-by-step) specify the execution procedure, the proposed approach aims at the specification of business processes using constraints, i.e., processes are modeled by rules that should be followed while executing business processes. Constraint-based models implicitly specify the execution procedure by means of constraints: any execution that does not violate constraints is possible. In addition to proposing a constrainbased approach, a concrete language for specification of constraints is given and the proof-of-concept prototype declare is described. On the one hand, constraint-based management systems are exible, which allows employees to deal with specific (e/g/. unpredicted) situations in the most adequate way. On the other hand, constraint-based management systems can support employees when it comes to aspects of business processes that are too complex for humans to handle. There are several ways in which constraintbased management systems can provide both exibility and support.",
author = "M. Pesic",
year = "2008",
doi = "10.6100/IR638413",
language = "English",
isbn = "978-90-386-1319-2",
publisher = "Technische Universiteit Eindhoven",
school = "Department of Industrial Engineering \& Innovation Sciences",
}
%@article{pesic2008constraint,
% title={Constraint-based workflow management systems: shifting control to users},
% author={Pesic, Maja},
% year={2008}
%}
@inproceedings{dwyer1999patterns,
title={Patterns in property specifications for finite-state verification},
author={Dwyer, Matthew B and Avrunin, George S and Corbett, James C},
booktitle={Proceedings of the 21st international conference on Software engineering},
pages={411--420},
year={1999},
organization={ACM}
}
@inproceedings{maggi2013knowledge,
title={A knowledge-based integrated approach for discovering and repairing declare maps},
author={Maggi, Fabrizio M and Bose, RP Jagadeesh Chandra and van der Aalst, Wil MP},
booktitle={International Conference on Advanced Information Systems Engineering},
pages={433--448},
year={2013},
organization={Springer}
}
@inproceedings{gabbay1989declarative,
title={The declarative past and imperative future},
author={Gabbay, Dov},
booktitle={Temporal logic in specification},
pages={409--448},
year={1989},
organization={Springer}
}
@inproceedings{DBLP:conf/bpm/ValdiviesoLMS18,
author = {Hernan Valdivieso and
Wai Lam Jonathan Lee and
Jorge Munoz{-}Gama and
Marcos Sep{\'{u}}lveda},
title = {OpyenXES: {A} Complete Python Library for the eXtensible Event Stream
Standard},
booktitle = {Proceedings of the Dissertation Award, Demonstration, and Industrial
Track at {BPM} 2018 co-located with 16th International Conference
on Business Process Management {(BPM} 2018), Sydney, Australia, September
9-14, 2018.},
pages = {71--75},
year = {2018},
crossref = {DBLP:conf/bpm/2018d},
url = {http://ceur-ws.org/Vol-2196/BPM\_2018\_paper\_15.pdf},
timestamp = {Wed, 12 Sep 2018 09:54:38 +0200},
biburl = {https://dblp.org/rec/bib/conf/bpm/ValdiviesoLMS18},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{bacchus1998planning,
title={Planning for temporally extended goals},
author={Bacchus, Fahiem and Kabanza, Froduald},
journal={Annals of Mathematics and Artificial Intelligence},
volume={22},
number={1-2},
pages={5--27},
year={1998},
publisher={Springer}
}
@article{doherty2001talplanner,
title={TALplanner: A temporal logic-based planner},
author={Doherty, Patrick and Kvarnstram, Jonas},
journal={AI Magazine},
volume={22},
number={3},
pages={95},
year={2001}
}
@inproceedings{camacho2017non,
title={Non-Deterministic Planning with Temporally Extended Goals: LTL over Finite and Infinite Traces.},
author={Camacho, Alberto and Triantafillou, Eleni and Muise, Christian J and Baier, Jorge A and McIlraith, Sheila A},
booktitle={AAAI},
pages={3716--3724},
year={2017}
}
@book{ghallab2004automated,
title={Automated Planning: theory and practice},
author={Ghallab, Malik and Nau, Dana and Traverso, Paolo},
year={2004},
publisher={Elsevier}
}
@article{geffner2013concise,
title={A concise introduction to models and methods for automated planning},
author={Geffner, Hector and Bonet, Blai},
journal={Synthesis Lectures on Artificial Intelligence and Machine Learning},
volume={8},
number={1},
pages={1--141},
year={2013},
publisher={Morgan \& Claypool Publishers}
}
@article{cimatti2003weak,
title={Weak, strong, and strong cyclic planning via symbolic model checking},
author={Cimatti, Alessandro and Pistore, Marco and Roveri, Marco and Traverso, Paolo},
journal={Artificial Intelligence},
volume={147},
number={1-2},
pages={35--84},
year={2003},
publisher={Elsevier}
}
@inproceedings{rintanen2004complexity,
title={Complexity of Planning with Partial Observability.},
author={Rintanen, Jussi},
booktitle={ICAPS},
pages={345--354},
year={2004}
}
@book{fagin2004reasoning,
title={Reasoning about knowledge},
author={Fagin, Ronald and Halpern, Joseph Y and Moses, Yoram and Vardi, Moshe},
year={2004},
publisher={MIT press}
}
@inproceedings{lichtenstein1985glory,
title={The glory of the past},
author={Lichtenstein, Orna and Pnueli, Amir and Zuck, Lenore},
booktitle={Workshop on Logic of Programs},
pages={196--218},
year={1985},
organization={Springer}
}
@article{zhu2017symbolic,
title={Symbolic ltlf synthesis},
author={Zhu, Shufang and Tabajara, Lucas M and Li, Jianwen and Pu, Geguang and Vardi, Moshe Y},
journal={arXiv preprint arXiv:1705.08426},
year={2017}
}
@article{geffner2018compact,
title={Compact Policies for Fully-Observable Non-Deterministic Planning as SAT},
author={Geffner, Tomas and Geffner, Hector},
journal={arXiv preprint arXiv:1806.09455},
year={2018}
}
@inproceedings{de2018automata,
title={Automata-Theoretic Foundations of FOND Planning for LTLf and LDLf Goals.},
author={De Giacomo, Giuseppe and Rubin, Sasha},
booktitle={IJCAI},
pages={4729--4735},
year={2018}
}
@inproceedings{hanks1986default,
title={Default Reasoning, Nonmonotonic Logics, and the Frame Problem.},
author={Hanks, Steve and McDermott, Drew V},
booktitle={AAAI},
volume={86},
pages={328--333},
year={1986}
}
@inproceedings{kissmann2009solving,
title={Solving fully-observable non-deterministic planning problems via translation into a general game},
author={Kissmann, Peter and Edelkamp, Stefan},
booktitle={Annual Conference on Artificial Intelligence},
pages={1--8},
year={2009},
organization={Springer}
}
@article{bercher2010pattern,
title={Pattern Database Heuristics for Fully Observable Nondeterministic Planning},
author={Bercher, Pascal},
year={2010}
}
@inproceedings{ramirez2014directed,
title={Directed Fixed-Point Regression-Based Planning for Non-Deterministic Domains.},
author={Ramirez, Miquel and Sardina, Sebastian},
booktitle={ICAPS},
year={2014}
}
@inproceedings{kuter2008using,
title={Using classical planners to solve nondeterministic planning problems},
author={Kuter, Ugur and Nau, Dana and Reisner, Elnatan and Goldman, Robert P},
booktitle={Proceedings of the Eighteenth International Conference on International Conference on Automated Planning and Scheduling},
pages={190--197},
year={2008},
organization={AAAI Press}
}
@inproceedings{fu2011simple,
title={Simple and fast strong cyclic planning for fully-observable nondeterministic planning problems},
author={Fu, Jicheng and Ng, Vincent and Bastani, Farokh B and Yen, I-Ling and others},
booktitle={IJCAI Proceedings-International Joint Conference on Artificial Intelligence},
volume={22},
number={3},
pages={1949},
year={2011}
}
@inproceedings{muise2012improved,
title={Improved Non-Deterministic Planning by Exploiting State Relevance.},
author={Muise, Christian J and McIlraith, Sheila A and Beck, J Christopher},
booktitle={ICAPS},
year={2012}
}
@article{mcdermott1998pddl,
title={PDDL-the planning domain definition language},
author={McDermott, Drew and Ghallab, Malik and Howe, Adele and Knoblock, Craig and Ram, Ashwin and Veloso, Manuela and Weld, Daniel and Wilkins, David},
year={1998}
}
@misc{chandra1981acm,
title={ACM, vol. 28 (1)},
author={Chandra, AK and Kozen, DC and Stockmeyer, LJ and Alternation, J},
year={1981},
publisher={Jan}
}