bib.bib

% LOGIC

@inproceedings{Pnueli:1977:TLP:1382431.1382534,
	author = {Pnueli, Amir},
	title = {The Temporal Logic of Programs},
	booktitle = {Proceedings of the 18th Annual Symposium on Foundations of Computer Science},
	series = {SFCS '77},
	year = {1977},
	pages = {46--57},
	numpages = {12},
	url = {https://doi.org/10.1109/SFCS.1977.32},
	doi = {10.1109/SFCS.1977.32},
	acmid = {1382534},
	publisher = {IEEE Computer Society},
	address = {Washington, DC, USA},
}

@inproceedings{de2013linear,
	title={Linear Temporal Logic and Linear Dynamic Logic on Finite Traces.},
	author={De Giacomo, Giuseppe and Vardi, Moshe Y.},
	booktitle={IJCAI},
	volume={13},
	pages={854--860},
	year={2013}
}

@InCollection{sep-logic-modal,
	author       =	{Garson, James},
	title        =	{Modal Logic},
	booktitle    =	{The Stanford Encyclopedia of Philosophy},
	editor       =	{Edward N. Zalta},
	howpublished =	{\url{https://plato.stanford.edu/archives/spr2016/entries/logic-modal/}},
	year         =	{2016},
	edition      =	{Spring 2016},
	publisher    =	{Metaphysics Research Lab, Stanford University}
}

@InCollection{sep-logic-classical,
	author       =	{Shapiro, Stewart and Kouri Kissel, Teresa},
	title        =	{Classical Logic},
	booktitle    =	{The Stanford Encyclopedia of Philosophy},
	editor       =	{Edward N. Zalta},
	howpublished =	{\url{https://plato.stanford.edu/archives/spr2018/entries/logic-classical/}},
	year         =	{2018},
	edition      =	{Spring 2018},
	publisher    =	{Metaphysics Research Lab, Stanford University}
}
@InCollection{sep-logic-dynamic,
	author       =	{Troquard, Nicolas and Balbiani, Philippe},
	title        =	{Propositional Dynamic Logic},
	booktitle    =	{The Stanford Encyclopedia of Philosophy},
	editor       =	{Edward N. Zalta},
	howpublished =	{\url{https://plato.stanford.edu/archives/spr2015/entries/logic-dynamic/}},
	year         =	{2015},
	edition      =	{Spring 2015},
	publisher    =	{Metaphysics Research Lab, Stanford University}
}

@InCollection{sep-logic-temporal,
	author       =	{Goranko, Valentin and Galton, Antony},
	title        =	{Temporal Logic},
	booktitle    =	{The Stanford Encyclopedia of Philosophy},
	editor       =	{Edward N. Zalta},
	howpublished =	{\url{https://plato.stanford.edu/archives/win2015/entries/logic-temporal/}},
	year         =	{2015},
	edition      =	{Winter 2015},
	publisher    =	{Metaphysics Research Lab, Stanford University}
}

@book{Clarke:2000:MC:332656,
	author = {Clarke,Jr., Edmund M. and Grumberg, Orna and Peled, Doron A.},
	title = {Model Checking},
	year = {1999},
	isbn = {0-262-03270-8},
	publisher = {MIT Press},
	address = {Cambridge, MA, USA},
} 

@book{Hopcroft:2000:IAT:557657,
	author = {Hopcroft, John E. and Motwani, Rajeev and Rotwani and Ullman, Jeffrey D.},
	title = {Introduction to Automata Theory, Languages and Computability},
	year = {2000},
	isbn = {0201441241},
	edition = {2nd},
	publisher = {Addison-Wesley Longman Publishing Co., Inc.},
	address = {Boston, MA, USA},
} 

@article{Sistla:1985:CPL:3828.3837,
	author = {Sistla, A. P. and Clarke, E. M.},
	title = {The Complexity of Propositional Linear Temporal Logics},
	journal = {J. ACM},
	issue_date = {July 1985},
	volume = {32},
	number = {3},
	month = jul,
	year = {1985},
	issn = {0004-5411},
	pages = {733--749},
	numpages = {17},
	url = {http://doi.acm.org/10.1145/3828.3837},
	doi = {10.1145/3828.3837},
	acmid = {3837},
	publisher = {ACM},
	address = {New York, NY, USA},
} 

@article{FISCHER1979194,
	title = "Propositional dynamic logic of regular programs",
	journal = "Journal of Computer and System Sciences",
	volume = "18",
	number = "2",
	pages = "194 - 211",
	year = "1979",
	issn = "0022-0000",
	doi = "https://doi.org/10.1016/0022-0000(79)90046-1",
	url = "http://www.sciencedirect.com/science/article/pii/0022000079900461",
	author = "Michael J. Fischer and Richard E. Ladner"
}

@techreport{Pratt:1976:SCF:889769,
	author = {Pratt, V. R.},
	title = {SEMANTICAL CONSIDERATIONS ON FLOYD-HOARE LOGIC},
	year = {1976},
	source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Amitai%3AMIT-LCS%2F%2FMIT%2FLCS%2FTR-168},
	publisher = {Massachusetts Institute of Technology},
	address = {Cambridge, MA, USA},
} 

@INPROCEEDINGS{4567894,
	author={V. R. Pratt},
	booktitle={17th Annual Symposium on Foundations of Computer Science (sfcs 1976)},
	title={Semantical consideration on floyo-hoare logic},
	year={1976},
	volume={},
	number={},
	pages={109-121},
	keywords={Boolean functions;Calculus;Logic;Mathematics;Testing},
	doi={10.1109/SFCS.1976.27},
	ISSN={0272-5428},
	month={Oct},}

@article{PRATT1980231,
	title = "A near-optimal method for reasoning about action",
	journal = "Journal of Computer and System Sciences",
	volume = "20",
	number = "2",
	pages = "231 - 254",
	year = "1980",
	issn = "0022-0000",
	doi = "https://doi.org/10.1016/0022-0000(80)90061-6",
	url = "http://www.sciencedirect.com/science/article/pii/0022000080900616",
	author = "Vaughan R. Pratt"
}

@article{deGiacomo:2000:CDM:359243.359271,
	author = {De Giacomo, Giuseppe and Massacci, Fabio},
	title = {Combining Deduction and Model Checking into Tableaux and Algorithms for converse-PDL},
	journal = {Inf. Comput.},
	issue_date = {Oct 2000},
	volume = {162},
	number = {1/2},
	month = oct,
	year = {2000},
	issn = {0890-5401},
	pages = {117--137},
	numpages = {21},
	url = {http://dl.acm.org/citation.cfm?id=359243.359271},
	acmid = {359271},
	publisher = {Academic Press, Inc.},
	address = {Duluth, MN, USA},
} 

@inproceedings{Pesic:2006:DAF:2135571.2135592,
	author = {Pesic, M. and van der Aalst, W. M. P.},
	title = {A Declarative Approach for Flexible Business Processes Management},
	booktitle = {Proceedings of the 2006 International Conference on Business Process Management Workshops},
	series = {BPM'06},
	year = {2006},
	isbn = {3-540-38444-8, 978-3-540-38444-1},
	location = {Vienna, Austria},
	pages = {169--180},
	numpages = {12},
	url = {http://dx.doi.org/10.1007/11837862_18},
	doi = {10.1007/11837862_18},
	acmid = {2135592},
	publisher = {Springer-Verlag},
	address = {Berlin, Heidelberg},
	keywords = {declarative model specification, dynamic workflow, flexibility, temporal logic, workflow management},
} 

@inproceedings{DeGiacomo:2014:RLF:2893873.2894033,
	author = {De Giacomo, Giuseppe and De Masellis, Riccardo and Montali, Marco},
	title = {Reasoning on LTL on Finite Traces: Insensitivity to Infiniteness},
	booktitle = {Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence},
	series = {AAAI'14},
	year = {2014},
	location = {Qu\&\#233;bec City, Qu\&\#233;bec, Canada},
	pages = {1027--1033},
	numpages = {7},
	url = {http://dl.acm.org/citation.cfm?id=2893873.2894033},
	acmid = {2894033},
	publisher = {AAAI Press},
} 

@techreport{Gabbay:1997:TAF:903586,
	author = {Gabbay, D. and Pnueli, A. and Shelah, S. and Stavi, J.},
	title = {On the Temporal Analysis of Fairness},
	year = {1997},
	source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Aweizmann_il%3Ancstrl.weizmann_il%2F%2FCS97-13},
	publisher = {Weizmann Science Press of Israel},
	address = {Jerusalem, Israel, Israel},
} 

@article{Wolper1981TemporalLC,
	title={Temporal logic can be more expressive},
	author={Pierre Wolper},
	journal={22nd Annual Symposium on Foundations of Computer Science (sfcs 1981)},
	year={1981},
	pages={340-348}
}

@article{doi:10.1002/malq.19600060105,
	author = {Richard J. B\"uchi},
	title = {Weak Second‐Order Arithmetic and Finite Automata},
	journal = {Mathematical Logic Quarterly},
	volume = {6},
	number = {1‐6},
	pages = {66-92},
	doi = {10.1002/malq.19600060105},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/malq.19600060105},
	eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/malq.19600060105},
	year={1960}
}

@article{10.2307/1993511,
	ISSN = {00029947},
	URL = {http://www.jstor.org/stable/1993511},
	author = {Calvin C. Elgot},
	journal = {Transactions of the American Mathematical Society},
	number = {1},
	pages = {21--51},
	publisher = {American Mathematical Society},
	title = {Decision Problems of Finite Automata Design and Related Arithmetics},
	volume = {98},
	year = {1961}
}

@Article{zbMATH03186872,
	Author = {B.A. {Trakhtenbrot}},
	Title = {{Finite automata and the logic of single-place predicates.}},
	FJournal = {{Soviet Physics. Doklady}},
	Journal = {{Sov. Phys., Dokl.}},
	ISSN = {0038-5689},
	Volume = {6},
	Pages = {753--755},
	Year = {1961},
	Publisher = {Consultants Bureau, New York},
	Language = {English},
	MSC2010 = {03-XX 68Qxx},
	Zbl = {0115.00702}
}

@article{THOMAS1979148,
	title = "Star-free regular sets of ω-sequences",
	journal = "Information and Control",
	volume = "42",
	number = "2",
	pages = "148 - 156",
	year = "1979",
	issn = "0019-9958",
	doi = "https://doi.org/10.1016/S0019-9958(79)90629-6",
	url = "http://www.sciencedirect.com/science/article/pii/S0019995879906296",
	author = "Wolfgang Thomas"
}

@book{Khoussainov:2001:ATA:558914,
	author = {Khoussainov, Bakhadyr and Nerode, Anil},
	title = {Automata Theory and Its Applications},
	year = {2001},
	isbn = {3764342072},
	publisher = {Birkhauser Boston, Inc.},
	address = {Secaucus, NJ, USA},
} 

@article{Rabin:1959:FAD:1661907.1661909,
	author = {Rabin, M. O. and Scott, D.},
	title = {Finite Automata and Their Decision Problems},
	journal = {IBM J. Res. Dev.},
	issue_date = {April 1959},
	volume = {3},
	number = {2},
	month = apr,
	year = {1959},
	issn = {0018-8646},
	pages = {114--125},
	numpages = {12},
	url = {http://dx.doi.org/10.1147/rd.32.0114},
	doi = {10.1147/rd.32.0114},
	acmid = {1661909},
	publisher = {IBM Corp.},
	address = {Riverton, NJ, USA},
} 

@inproceedings{DeGiacomo:2015:SLL:2832415.2832466,
	author = {De Giacomo, Giuseppe and Vardi, Moshe Y.},
	title = {Synthesis for LTL and LDL on Finite Traces},
	booktitle = {Proceedings of the 24th International Conference on Artificial Intelligence},
	series = {IJCAI'15},
	year = {2015},
	isbn = {978-1-57735-738-4},
	location = {Buenos Aires, Argentina},
	pages = {1558--1564},
	numpages = {7},
	url = {http://dl.acm.org/citation.cfm?id=2832415.2832466},
	acmid = {2832466},
	publisher = {AAAI Press},
} 

@article{Kupferman:2001:MCS:569028.569032,
	author = {Kupferman, Orna and Y. Vardi, Moshe},
	title = {Model Checking of Safety Properties},
	journal = {Form. Methods Syst. Des.},
	issue_date = {November 2001},
	volume = {19},
	number = {3},
	month = oct,
	year = {2001},
	issn = {0925-9856},
	pages = {291--314},
	numpages = {24},
	url = {https://doi.org/10.1023/A:1011254632723},
	doi = {10.1023/A:1011254632723},
	acmid = {569032},
	publisher = {Kluwer Academic Publishers},
	address = {Hingham, MA, USA},
	keywords = {automata, model checking, safety properties},
} 

@inproceedings{Lacerda:2015:OPG:2832415.2832470,
	author = {Lacerda, Bruno and Parker, David and Hawes, Nick},
	title = {Optimal Policy Generation for Partially Satisfiable Co-safe LTL Specifications},
	booktitle = {Proceedings of the 24th International Conference on Artificial Intelligence},
	series = {IJCAI'15},
	year = {2015},
	isbn = {978-1-57735-738-4},
	location = {Buenos Aires, Argentina},
	pages = {1587--1593},
	numpages = {7},
	url = {http://dl.acm.org/citation.cfm?id=2832415.2832470},
	acmid = {2832470},
	publisher = {AAAI Press},
} 


% RL

@Article{Sutton1988,
	author="Sutton, Richard S.",
	title="Learning to predict by the methods of temporal differences",
	journal="Machine Learning",
	year="1988",
	month="Aug",
	day="01",
	volume="3",
	number="1",
	pages="9--44",
	abstract="This article introduces a class of incremental learning procedures specialized for prediction-that is, for using past experience with an incompletely known system to predict its future behavior. Whereas conventional prediction-learning methods assign credit by means of the difference between predicted and actual outcomes, the new methods assign credit by means of the difference between temporally successive predictions. Although such temporal-difference methods have been used in Samuel's checker player, Holland's bucket brigade, and the author's Adaptive Heuristic Critic, they have remained poorly understood. Here we prove their convergence and optimality for special cases and relate them to supervised-learning methods. For most real-world prediction problems, temporal-difference methods require less memory and less peak computation than conventional methods and they produce more accurate predictions. We argue that most problems to which supervised learning is currently applied are really prediction problems of the sort to which temporal-difference methods can be applied to advantage.",
	issn="1573-0565",
	doi="10.1007/BF00115009",
	url="https://doi.org/10.1007/BF00115009"
}


%REINFORCEMENT LEARNING
@book{Sutton:1998:IRL:551283,
	author = {Sutton, Richard S. and Barto, Andrew G.},
	title = {Introduction to Reinforcement Learning},
	year = {1998},
	isbn = {0262193981},
	edition = {1st},
	publisher = {MIT Press},
	address = {Cambridge, MA, USA},
}

@inproceedings{Ng:1999:PIU:645528.657613,
	author = {Ng, Andrew Y. and Harada, Daishi and Russell, Stuart J.},
	title = {Policy Invariance Under Reward Transformations: Theory and Application to Reward Shaping},
	booktitle = {Proceedings of the Sixteenth International Conference on Machine Learning},
	series = {ICML '99},
	year = {1999},
	isbn = {1-55860-612-2},
	pages = {278--287},
	numpages = {10},
	url = {http://dl.acm.org/citation.cfm?id=645528.657613},
	acmid = {657613},
	publisher = {Morgan Kaufmann Publishers Inc.},
	address = {San Francisco, CA, USA},
}

@inproceedings{Grzes:2017:RSE:3091125.3091208,
	author = {Grze\'{s}, Marek},
	title = {Reward Shaping in Episodic Reinforcement Learning},
	booktitle = {Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems},
	series = {AAMAS '17},
	year = {2017},
	location = {S\&\#227;o Paulo, Brazil},
	pages = {565--573},
	numpages = {9},
	url = {http://dl.acm.org/citation.cfm?id=3091125.3091208},
	acmid = {3091208},
	publisher = {International Foundation for Autonomous Agents and Multiagent Systems},
	address = {Richland, SC},
	keywords = {multiagent learning, potential-based reward shaping, reinforcement learning, reward shaping, reward structures for learning},
} 


@inproceedings{Devlin:2012:DPR:2343576.2343638,
	author = {Devlin, Sam and Kudenko, Daniel},
	title = {Dynamic Potential-based Reward Shaping},
	booktitle = {Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems - Volume 1},
	series = {AAMAS '12},
	year = {2012},
	isbn = {0-9817381-1-7, 978-0-9817381-1-6},
	location = {Valencia, Spain},
	pages = {433--440},
	numpages = {8},
	url = {http://dl.acm.org/citation.cfm?id=2343576.2343638},
	acmid = {2343638},
	publisher = {International Foundation for Autonomous Agents and Multiagent Systems},
	address = {Richland, SC},
	keywords = {reinforcement learning, reward shaping},
}

@phdthesis{grzes2010improving,
	title={Improving exploration in reinforcement learning through domain knowledge and parameter analysis},
	author={Grzes, Marek},
	year={2010},
	school={University of York}
}

@inproceedings{GuptaPerformanceCO,
	title={Performance Comparison of Sarsa(λ) and Watkin’s Q(λ) Algorithms},
	author={Karan M. Gupta}
}


@inproceedings{DBLP:conf/icmla/GrzesK09,
	author    = {Marek Grzes and
	Daniel Kudenko},
	title     = {Theoretical and Empirical Analysis of Reward Shaping in Reinforcement
	Learning},
	booktitle = {{ICMLA}},
	pages     = {337--344},
	publisher = {{IEEE} Computer Society},
	year      = {2009}
}

@article{Singh:1996:RLR:225667.225679,
	author = {Singh, Satinder P. and Sutton, Richard S.},
	title = {Reinforcement Learning with Replacing Eligibility Traces},
	journal = {Mach. Learn.},
	issue_date = {Jan./Feb./March 1996},
	volume = {22},
	number = {1-3},
	month = jan,
	year = {1996},
	issn = {0885-6125},
	pages = {123--158},
	numpages = {36},
	url = {http://dx.doi.org/10.1007/BF00114726},
	doi = {10.1007/BF00114726},
	acmid = {225679},
	publisher = {Kluwer Academic Publishers},
	address = {Hingham, MA, USA},
	keywords = {CMAC, Markov chain, Monte Carlo method, eligibility trace, reinforcement learning, temporal difference learning},
} 
@phdthesis{watkins1989learning,
	title={Learning from delayed rewards},
	author={Watkins, Christopher John Cornish Hellaby},
	year={1989}
}

@Article{Watkins1992,
	author="Watkins, Christopher J. C. H.
	and Dayan, Peter",
	title="Q-learning",
	journal="Machine Learning",
	year="1992",
	month="May",
	day="01",
	volume="8",
	number="3",
	pages="279--292",
	abstract="Q-learning (Watkins, 1989) is a simple way for agents to learn how to act optimally in controlled Markovian domains. It amounts to an incremental method for dynamic programming which imposes limited computational demands. It works by successively improving its evaluations of the quality of particular actions at particular states.",
	issn="1573-0565",
	doi="10.1007/BF00992698",
	url="https://doi.org/10.1007/BF00992698"
}

@Book{Bellman:1957,
	author =       "Bellman, Richard",
	title =        "Dynamic Programming",
	publisher =    "Princeton University Press",
	year =         "1957",
	address =   "Princeton, NJ, USA",
	edition =   "1",
	url = "http://books.google.com/books?id=fyVtp3EMxasC&pg=PR5&dq=dynamic+programming+richard+e+bellman&client=firefox-a#v=onepage&q=dynamic%20programming%20richard%20e%20bellman&f=false",
	bib2html_rescat = "General RL",
}

@article{Brafman2017SpecifyingNR,
	title={Specifying Non-Markovian Rewards in MDPs Using LDL on Finite Traces (Preliminary Version)},
	author={Ronen I. Brafman and Giuseppe De Giacomo and Fabio Patrizi},
	journal={CoRR},
	year={2017},
	volume={abs/1706.08100}
}

@paper{AAAI1817342,
	author = {Ronen Brafman and Giuseppe De Giacomo and Fabio Patrizi},
	title = {LTLf/LDLf Non-Markovian Rewards},
	conference = {AAAI Conference on Artificial Intelligence},
	year = {2018},
	keywords = {MDPs; non-Markovian Rewards; LTLf/LDLf},
	abstract = {In Markov Decision Processes (MDPs), the reward obtained in a state is Markovian, i.e., depends on the last state and action. This dependency makes it difficult to reward more interesting long-term behaviors, such as always closing a door after it has been opened, or providing coffee only following a request. Extending MDPs to handle non-Markovian reward functions was the subject of two previous lines of work. Both use LTL variants to specify the reward function and then compile the new model back into a Markovian model. Building on recent progress in temporal logics over finite traces, we adopt LDLf for specifying non-Markovian rewards and provide an elegant automata construction for building a Markovian model, which extends that of previous work and offers strong minimality and compositionality guarantees.},
	url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/17342}
}


@inproceedings{Baier:2008:BCP:1620270.1620321,
	author = {Baier, Jorge A. and Fritz, Christian and Bienvenu, Meghyn and McIlraith, Sheila A},
	title = {Beyond Classical Planning: Procedural Control Knowledge and Preferences in State-of-the-art Planners},
	booktitle = {Proceedings of the 23rd National Conference on Artificial Intelligence - Volume 3},
	series = {AAAI'08},
	year = {2008},
	isbn = {978-1-57735-368-3},
	location = {Chicago, Illinois},
	pages = {1509--1512},
	numpages = {4},
	url = {http://dl.acm.org/citation.cfm?id=1620270.1620321},
	acmid = {1620321},
	publisher = {AAAI Press},
} 

@INPROCEEDINGS{5381523,
	author={M. Grzes and D. Kudenko},
	booktitle={2009 International Conference on Machine Learning and Applications},
	title={Theoretical and Empirical Analysis of Reward Shaping in Reinforcement Learning},
	year={2009},
	volume={},
	number={},
	pages={337-344},
	keywords={Markov processes;decision theory;learning (artificial intelligence);Markov decision process discount factor;knowledge-based approaches;reinforcement learning;reward shaping;state space explosion;temporal credit assignment problem;Application software;Artificial intelligence;Computer science;Explosions;Machine learning;Optimal control;Performance analysis;Scalability;Shape control;State-space methods;heuristics;reinforcement learning;reward shaping},
	doi={10.1109/ICMLA.2009.33},
	ISSN={},
	month={Dec},}

@INPROCEEDINGS{Dietterich98themaxq,
	author = {Thomas G. Dietterich},
	title = {The MAXQ Method for Hierarchical Reinforcement Learning},
	booktitle = {In Proceedings of the Fifteenth International Conference on Machine Learning},
	year = {1998},
	pages = {118--126},
	publisher = {Morgan Kaufmann}
}


% state-of-the-art

@article{icarte2018teaching,
	title={Teaching Multiple Tasks to an RL Agent using LTL},
	author={Icarte, Rodrigo Toro and Klassen, Toryn Q and Valenzano, Richard and McIlraith, Sheila A},
	year={2018}
}

@inproceedings{camacho2018ltl,
  title={LTL Realizability via Safety and Reachability Games.},
  author={Camacho, Alberto and Muise, Christian J and Baier, Jorge A and McIlraith, Sheila A},
  booktitle={IJCAI},
  pages={4683--4691},
  year={2018}
}

@inproceedings{camacho2018finite,
  title={Finite LTL Synthesis as Planning.},
  author={Camacho, Alberto and Baier, Jorge A and Muise, Christian J and McIlraith, Sheila A},
  booktitle={ICAPS},
  pages={29--38},
  year={2018}
}

@inproceedings{bacchus1996rewarding,
	title={Rewarding behaviors},
	author={Bacchus, Fahiem and Boutilier, Craig and Grove, Adam},
	booktitle={PROCEEDINGS OF THE NATIONAL CONFERENCE ON ARTIFICIAL INTELLIGENCE},
	pages={1160--1167},
	year={1996}
}

@article{ThiebauxGSPK06,
	Author = {Sylvie Thi{\'{e}}baux and Charles Gretton and John K. Slaney and David Price and Froduald Kabanza},
	Journal = {J. Artif. Intell. Res. {(JAIR)}},
	Pages = {17--74},
	Title = {Decision-Theoretic Planning with non-Markovian Rewards},
	Volume = 25,
	Year = 2006}

@inproceedings{gretton2014more,
	title={A More Expressive Behavioral Logic for Decision-Theoretic Planning},
	author={Gretton, Charles},
	booktitle={Pacific Rim International Conference on Artificial Intelligence},
	pages={13--25},
	year={2014},
	organization={Springer}
}


@InProceedings{pmlr-v70-andreas17a,
	title = 	 {Modular Multitask Reinforcement Learning with Policy Sketches},
	author = 	 {Jacob Andreas and Dan Klein and Sergey Levine},
	booktitle = 	 {Proceedings of the 34th International Conference on Machine Learning},
	pages = 	 {166--175},
	year = 	 {2017},
	editor = 	 {Doina Precup and Yee Whye Teh},
	volume = 	 {70},
	series = 	 {Proceedings of Machine Learning Research},
	address = 	 {International Convention Centre, Sydney, Australia},
	month = 	 {06--11 Aug},
	publisher = 	 {PMLR},
	pdf = 	 {http://proceedings.mlr.press/v70/andreas17a/andreas17a.pdf},
	url = 	 {http://proceedings.mlr.press/v70/andreas17a.html},
	abstract = 	 {We describe a framework for multitask deep reinforcement learning guided by policy sketches. Sketches annotate tasks with sequences of named subtasks, providing information about high-level structural relationships among tasks but not how to implement them—specifically not providing the detailed guidance used by much previous work on learning policy abstractions for RL (e.g. intermediate rewards, subtask completion signals, or intrinsic motivations). To learn from sketches, we present a model that associates every subtask with a modular subpolicy, and jointly maximizes reward over full task-specific policies by tying parameters across shared subpolicies. Optimization is accomplished via a decoupled actor–critic training objective that facilitates learning common behaviors from multiple dissimilar reward functions. We evaluate the effectiveness of our approach in three environments featuring both discrete and continuous control, and with sparse rewards that can be obtained only after completing a number of high-level subgoals. Experiments show that using our approach to learn policies guided by sketches gives better performance than existing techniques for learning task-specific or shared policies, while naturally inducing a library of interpretable primitive behaviors that can be recombined to rapidly adapt to new tasks.}
}

@InProceedings{mona1998,
  author =       {Jacob Elgaard and Nils Klarlund and Anders M{\o}ller},
  title =        {{MONA} 1.x: new techniques for {WS1S} and {WS2S}},
  booktitle =    {Proc. 10th International Conference on 
                  Computer-Aided Verification (CAV)},
  year =         {1998},
  month =        {June/July},
  volume =       {1427},
  series =       {LNCS},
  publisher =    {Springer-Verlag},
  pages =        {516--520},
}

@MastersThesis{progress2000,
  author =       {Anders M\o{}ller},
  title =        {{MONA}, {DSD}, and {\tt <bigwig>}},
  school =       {Aarhus University},
  month =        {May},
  year =         {2000},
  note =         {BRICS Ph.D.\ Progress Report},
}

@Manual{monamanual2001,
  author =       {Nils Klarlund and Anders M{\o}ller},
  title =        {{MONA Version 1.4 User Manual}},
  organization = {BRICS, Department of Computer Science, Aarhus University},
  month =        {January},
  year =         {2001},
  note =         {Notes Series NS-01-1.
                  Available from \texttt{\small http://www.brics.dk/mona/}. 
                  Revision of BRICS NS-98-3}
}

@inproceedings{cecconi2018interestingness,
  title={Interestingness of Traces in Declarative Process Mining: The Janus LTLpf Approach},
  author={Cecconi, Alessio and Di Ciccio, Claudio and De Giacomo, Giuseppe and Mendling, Jan},
  booktitle={International Conference on Business Process Management},
  pages={121--138},
  year={2018},
  organization={Springer}
}

@article{markey2003temporal,
  title={Temporal logic with past is exponentially more succinct},
  author={Markey, Nicolas},
  journal={EATCS Bulletin},
  volume={79},
  pages={122--128},
  year={2003},
  publisher={European Association for Theoretical Computer Science}
}

@misc{zpv2018,
 title={First-Order vs. Second-Order for LTLf-to-Automata: An Extended Abstract},
 author={Zhu, Shufang and Pu Geguang and Vardi, Moshe Y.},
 howpublished={Women in Logic},
 year={2018}
}

@inproceedings{gabbay1980temporal,
  title={On the temporal analysis of fairness},
  author={Gabbay, Dov and Pnueli, Amir and Shelah, Saharon and Stavi, Jonathan},
  booktitle={Proceedings of the 7th ACM SIGPLAN-SIGACT symposium on Principles of programming languages},
  pages={163--173},
  year={1980},
  organization={ACM}
}

@phdthesis{Kamp1968,
  author = {Hans Kamp},
  title = {Tense Logic and the Theory of Linear Order},
  school = {University of California Los Angeles},
  year = {1968},
  note = {Published as Johan Anthony Willem Kamp},
  url = {http://www.ims.uni-stuttgart.de/archiv/kamp/files/1968.kamp.thesis.pdf}
}

@inproceedings{biehl1996algorithms,
  title={Algorithms for guided tree automata},
  author={Biehl, Morten and Klarlund, Nils and Rauhe, Theis},
  booktitle={International Workshop on Implementing Automata},
  pages={6--25},
  year={1996},
  organization={Springer}
}

@inproceedings{henriksen1995mona,
  title={Mona: Monadic second-order logic in practice},
  author={Henriksen, Jesper G and Jensen, Jakob and J{\o}rgensen, Michael and Klarlund, Nils and Paige, Robert and Rauhe, Theis and Sandholm, Anders},
  booktitle={International Workshop on Tools and Algorithms for the Construction and Analysis of Systems},
  pages={89--110},
  year={1995},
  organization={Springer}
}

@incollection{montali2010declarative,
  title={Declarative process mining},
  author={Montali, Marco},
  booktitle={Specification and verification of declarative open interaction models},
  pages={343--365},
  year={2010},
  publisher={Springer}
}

@phdthesis{pesic2008constraint,
  title = "Constraint-based workflow management systems: shifting control to users",
  abstract = "Many organizations use information technology to support various aspects of their business processes: the operational aspect, collaboration between employees, etc. Workow management systems aim at supporting the operational aspect of complex business processes by using process models to automate the ordering of activities (i.e., ow of work). The term `support' here relates to the ability of workow management systems to control the execution of business processes. Contemporary workow management systems lack exibility, i.e., the system controls in detail how employees should execute business processes. While workow management systems deal well with predictable business processes, they are not able to handle unforeseen situations, which occur often in real-life business processes. Although employees mostly have the knowledge and experience that enables them to deal with exceptional situations, they are not able to apply the right action because the system enforces the standard procedure of work. This often has various undesired consequences: work is done `outside' the system, work cannot be done in the appropriate way, dissatisfaction of employees, resistance towards the system, etc. As a result, workow management systems cannot be used properly if it is necessary that employees control the execution of business processes. This thesis proposes a new approach to workow management systems that can facilitate contemporary business processes in a better way by enabling a better balance between exibility and support. As opposed to traditional approaches which use procedural process models to explicitly (i.e., step-by-step) specify the execution procedure, the proposed approach aims at the specification of business processes using constraints, i.e., processes are modeled by rules that should be followed while executing business processes. Constraint-based models implicitly specify the execution procedure by means of constraints: any execution that does not violate constraints is possible. In addition to proposing a constrainbased approach, a concrete language for specification of constraints is given and the proof-of-concept prototype declare is described. On the one hand, constraint-based management systems are exible, which allows employees to deal with specific (e/g/. unpredicted) situations in the most adequate way. On the other hand, constraint-based management systems can support employees when it comes to aspects of business processes that are too complex for humans to handle. There are several ways in which constraintbased management systems can provide both exibility and support.",
  author = "M. Pesic",
  year = "2008",
  doi = "10.6100/IR638413",
  language = "English",
  isbn = "978-90-386-1319-2",
  publisher = "Technische Universiteit Eindhoven",
  school = "Department of Industrial Engineering \& Innovation Sciences",
}

%@article{pesic2008constraint,
%  title={Constraint-based workflow management systems: shifting control to users},
%  author={Pesic, Maja},
%  year={2008}
%}

@inproceedings{dwyer1999patterns,
  title={Patterns in property specifications for finite-state verification},
  author={Dwyer, Matthew B and Avrunin, George S and Corbett, James C},
  booktitle={Proceedings of the 21st international conference on Software engineering},
  pages={411--420},
  year={1999},
  organization={ACM}
}

@inproceedings{maggi2013knowledge,
  title={A knowledge-based integrated approach for discovering and repairing declare maps},
  author={Maggi, Fabrizio M and Bose, RP Jagadeesh Chandra and van der Aalst, Wil MP},
  booktitle={International Conference on Advanced Information Systems Engineering},
  pages={433--448},
  year={2013},
  organization={Springer}
}

@inproceedings{gabbay1989declarative,
  title={The declarative past and imperative future},
  author={Gabbay, Dov},
  booktitle={Temporal logic in specification},
  pages={409--448},
  year={1989},
  organization={Springer}
}

@inproceedings{DBLP:conf/bpm/ValdiviesoLMS18,
  author    = {Hernan Valdivieso and
               Wai Lam Jonathan Lee and
               Jorge Munoz{-}Gama and
               Marcos Sep{\'{u}}lveda},
  title     = {OpyenXES: {A} Complete Python Library for the eXtensible Event Stream
               Standard},
  booktitle = {Proceedings of the Dissertation Award, Demonstration, and Industrial
               Track at {BPM} 2018 co-located with 16th International Conference
               on Business Process Management {(BPM} 2018), Sydney, Australia, September
               9-14, 2018.},
  pages     = {71--75},
  year      = {2018},
  crossref  = {DBLP:conf/bpm/2018d},
  url       = {http://ceur-ws.org/Vol-2196/BPM\_2018\_paper\_15.pdf},
  timestamp = {Wed, 12 Sep 2018 09:54:38 +0200},
  biburl    = {https://dblp.org/rec/bib/conf/bpm/ValdiviesoLMS18},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{bacchus1998planning,
  title={Planning for temporally extended goals},
  author={Bacchus, Fahiem and Kabanza, Froduald},
  journal={Annals of Mathematics and Artificial Intelligence},
  volume={22},
  number={1-2},
  pages={5--27},
  year={1998},
  publisher={Springer}
}

@article{doherty2001talplanner,
  title={TALplanner: A temporal logic-based planner},
  author={Doherty, Patrick and Kvarnstram, Jonas},
  journal={AI Magazine},
  volume={22},
  number={3},
  pages={95},
  year={2001}
}

@inproceedings{camacho2017non,
  title={Non-Deterministic Planning with Temporally Extended Goals: LTL over Finite and Infinite Traces.},
  author={Camacho, Alberto and Triantafillou, Eleni and Muise, Christian J and Baier, Jorge A and McIlraith, Sheila A},
  booktitle={AAAI},
  pages={3716--3724},
  year={2017}
}

@book{ghallab2004automated,
  title={Automated Planning: theory and practice},
  author={Ghallab, Malik and Nau, Dana and Traverso, Paolo},
  year={2004},
  publisher={Elsevier}
}

@article{geffner2013concise,
  title={A concise introduction to models and methods for automated planning},
  author={Geffner, Hector and Bonet, Blai},
  journal={Synthesis Lectures on Artificial Intelligence and Machine Learning},
  volume={8},
  number={1},
  pages={1--141},
  year={2013},
  publisher={Morgan \& Claypool Publishers}
}

@article{cimatti2003weak,
  title={Weak, strong, and strong cyclic planning via symbolic model checking},
  author={Cimatti, Alessandro and Pistore, Marco and Roveri, Marco and Traverso, Paolo},
  journal={Artificial Intelligence},
  volume={147},
  number={1-2},
  pages={35--84},
  year={2003},
  publisher={Elsevier}
}

@inproceedings{rintanen2004complexity,
  title={Complexity of Planning with Partial Observability.},
  author={Rintanen, Jussi},
  booktitle={ICAPS},
  pages={345--354},
  year={2004}
}

@book{fagin2004reasoning,
  title={Reasoning about knowledge},
  author={Fagin, Ronald and Halpern, Joseph Y and Moses, Yoram and Vardi, Moshe},
  year={2004},
  publisher={MIT press}
}

@inproceedings{lichtenstein1985glory,
  title={The glory of the past},
  author={Lichtenstein, Orna and Pnueli, Amir and Zuck, Lenore},
  booktitle={Workshop on Logic of Programs},
  pages={196--218},
  year={1985},
  organization={Springer}
}

@article{zhu2017symbolic,
  title={Symbolic ltlf synthesis},
  author={Zhu, Shufang and Tabajara, Lucas M and Li, Jianwen and Pu, Geguang and Vardi, Moshe Y},
  journal={arXiv preprint arXiv:1705.08426},
  year={2017}
}

@article{geffner2018compact,
  title={Compact Policies for Fully-Observable Non-Deterministic Planning as SAT},
  author={Geffner, Tomas and Geffner, Hector},
  journal={arXiv preprint arXiv:1806.09455},
  year={2018}
}

@inproceedings{de2018automata,
  title={Automata-Theoretic Foundations of FOND Planning for LTLf and LDLf Goals.},
  author={De Giacomo, Giuseppe and Rubin, Sasha},
  booktitle={IJCAI},
  pages={4729--4735},
  year={2018}
}

@inproceedings{hanks1986default,
  title={Default Reasoning, Nonmonotonic Logics, and the Frame Problem.},
  author={Hanks, Steve and McDermott, Drew V},
  booktitle={AAAI},
  volume={86},
  pages={328--333},
  year={1986}
}

@inproceedings{kissmann2009solving,
  title={Solving fully-observable non-deterministic planning problems via translation into a general game},
  author={Kissmann, Peter and Edelkamp, Stefan},
  booktitle={Annual Conference on Artificial Intelligence},
  pages={1--8},
  year={2009},
  organization={Springer}
}

@article{bercher2010pattern,
  title={Pattern Database Heuristics for Fully Observable Nondeterministic Planning},
  author={Bercher, Pascal},
  year={2010}
}

@inproceedings{ramirez2014directed,
  title={Directed Fixed-Point Regression-Based Planning for Non-Deterministic Domains.},
  author={Ramirez, Miquel and Sardina, Sebastian},
  booktitle={ICAPS},
  year={2014}
}

@inproceedings{kuter2008using,
  title={Using classical planners to solve nondeterministic planning problems},
  author={Kuter, Ugur and Nau, Dana and Reisner, Elnatan and Goldman, Robert P},
  booktitle={Proceedings of the Eighteenth International Conference on International Conference on Automated Planning and Scheduling},
  pages={190--197},
  year={2008},
  organization={AAAI Press}
}

@inproceedings{fu2011simple,
  title={Simple and fast strong cyclic planning for fully-observable nondeterministic planning problems},
  author={Fu, Jicheng and Ng, Vincent and Bastani, Farokh B and Yen, I-Ling and others},
  booktitle={IJCAI Proceedings-International Joint Conference on Artificial Intelligence},
  volume={22},
  number={3},
  pages={1949},
  year={2011}
}

@inproceedings{muise2012improved,
  title={Improved Non-Deterministic Planning by Exploiting State Relevance.},
  author={Muise, Christian J and McIlraith, Sheila A and Beck, J Christopher},
  booktitle={ICAPS},
  year={2012}
}

@article{mcdermott1998pddl,
  title={PDDL-the planning domain definition language},
  author={McDermott, Drew and Ghallab, Malik and Howe, Adele and Knoblock, Craig and Ram, Ashwin and Veloso, Manuela and Weld, Daniel and Wilkins, David},
  year={1998}
}

@misc{chandra1981acm,
  title={ACM, vol. 28 (1)},
  author={Chandra, AK and Kozen, DC and Stockmeyer, LJ and Alternation, J},
  year={1981},
  publisher={Jan}
}