diff --git a/docs/programmers/tutorials/workflows/downloads/simAsset.R b/docs/programmers/tutorials/workflows/downloads/simAsset.R index 1dc7610d..f760866f 100755 --- a/docs/programmers/tutorials/workflows/downloads/simAsset.R +++ b/docs/programmers/tutorials/workflows/downloads/simAsset.R @@ -112,5 +112,5 @@ paste("DEBUG: nsims=", nsims); S <- assetPaths(S0, mu, sig, dt, etime, nsims); ## write out results -write.csv(S, "results.csv", row.names=F); +write.table(t(S), "results.csv", row.names=FALSE, col.names=FALSE, sep=","); plotPaths(S, "results.pdf"); diff --git a/docs/programmers/tutorials/workflows/part06.pdf b/docs/programmers/tutorials/workflows/part06.pdf index ececeb16..73cb658b 100644 Binary files a/docs/programmers/tutorials/workflows/part06.pdf and b/docs/programmers/tutorials/workflows/part06.pdf differ diff --git a/docs/programmers/tutorials/workflows/part06.tex b/docs/programmers/tutorials/workflows/part06.tex index c93695c9..668239ed 100644 --- a/docs/programmers/tutorials/workflows/part06.tex +++ b/docs/programmers/tutorials/workflows/part06.tex @@ -17,7 +17,7 @@ \\[1ex] University of Zurich } -\date{November~14--17, 2016} +\date{January~23--27, 2017} \begin{document} @@ -224,7 +224,10 @@ \part{Post-processing} \+ The path to the output directory is available as - \lstinline|self.output_dir|; if \texttt{stdout} and \texttt{stderr} + \lstinline|self.output_dir|. + + \+ + If \texttt{stdout} and \texttt{stderr} have been captured, the \textbf{relative} paths to the capture files are available as \lstinline|self.stdout| and \lstinline|self.stderr|. @@ -249,6 +252,32 @@ \part{Post-processing} \end{frame} +\begin{frame}[fragile] + \frametitle{Useful in post-processing}\small + + These attributes are available in the \texttt{terminated()} method: + + \+ + \begin{describe}{\lstinline|self.inputs|} + Python dictionary, mapping local (absolute) paths to remote paths (relative + to execution directory) + \end{describe} + + \+ + \begin{describe}{\lstinline|self.outputs|} + Python dictionary, mapping remote paths (relative to execution directory) to + \emph{URLs} where they have been copied. In particular, + \lstinline|self.outputs.keys()| is the list of output file names. + \end{describe} + + \+ + \begin{describe}{\lstinline|self.output_dir|} + Path to the local directory where output files have been downloaded. + \end{describe} + +\end{frame} + + \begin{frame} \begin{exercise*}[6.A] @@ -308,7 +337,7 @@ \part{Termination status} 7 signal}} for a list of OS signals and their numeric values. \+ - {\bfseries Note that GC3Pie overloads some signal codes (unused + {\bfseries Note that GC3Pie uses some signal codes (not used by the OS) to represent its own specific errors.} \+ @@ -402,7 +431,7 @@ \part{Termination status} \begin{frame}[fragile] - \begin{exercise*}[6.C] \emph{(Difficult)} \small + \begin{exercise*}[6.C] \emph{(Difficult)} \footnotesize MATLAB has the annoying habit of exiting with code 0 even when some error occurred. @@ -413,13 +442,14 @@ \part{Termination status} script file, like this: \texttt{app = MatlabApp("\href{https://github.com/uzh/gc3pie/blob/master/docs/programmers/tutorials/workflows/downloads/ra.m}{ra.m}")}; \item Runs the following command: \begin{semiverbatim} -matlab -nodesktop -nojvm \emph{file.m} +matlab -nodesktop -nojvm -r \emph{file} \end{semiverbatim} where \emph{file.m} is the file given to the \texttt{MatlabApp()} constructor. - \item captures the standard error output (\texttt{stderr}) of the - MATLAB script and, if the string ``\texttt{Out of memory.}'' - occurs in it, sets the application exitcode to 11. + \item captures the standard error output (\texttt{stderr}) of the MATLAB + script and, if one of the strings ``\texttt{Out of memory.}'' or + ``\texttt{exceeds maximum array size}'' occurs in it, sets the application + exitcode to 11. \end{itemize} Verify that it works by running MATLAB script @@ -430,59 +460,76 @@ \part{Termination status} \end{frame} -% \begin{frame}[fragile] -% \frametitle{Global post-processing} -% To add some code which will be executed \emph{just before the script -% exits,} add a \lstinline|after_main_loop| method: - -% \begin{python} -% def after_main_loop(self): -% model_names = {} -% for app in ~\HL{self.session.tasks.values()}~: -% if app.execution.state != Run.State.TERMINATED: -% return -% if app.model_name in model_names: -% model_names[app.model_name] += 1 -% else: -% model_names[app.model_name] = 1 -% \end{python} - -% \begin{itemize} -% \item \lstinline|self.session.tasks| is a map -% \lstinline|JobID|~$\Rightarrow$~\lstinline|Application| object -% \item \lstinline|self.session.tasks.values()| thus contains a list -% of all the \textbf{Application}s created by the \lstinline|new_tasks| -% \end{itemize} -% \end{frame} - - \begin{frame} - \frametitle{Global post-processing} + \frametitle{Global post-processing, I} Further options for customizing a session-based script: -\begin{description} -\item [\texttt{before\_main\_loop(self)}] to execute some code - \emph{before} the submission of the jobs. -\item [\texttt{after\_main\_loop(self)}] to execute some code - \emph{after} the main loop. A list of all Application objects is - available in the \lstinline|self.session.tasks.values()| list. -\end{description} + +\+ +\begin{describe}{\lstinline|before_main_loop(self)|} + to execute some code \emph{before} the main loop starts. +\end{describe} + +\+ +\begin{describe}{\lstinline|after_main_loop(self)|} + to execute some code \emph{after} the main loop, i.e., before the script + quits. A list of all Application objects is available in the + \lstinline|self.session.tasks.values()| list. +\end{describe} +\end{frame} + + +\begin{frame}[fragile] + \frametitle{Global post-processing, II} + Example: compute statistical distribution of termination statuses: + + \begin{python} +def after_main_loop(self): + # check that all tasks are terminated + can_postprocess = True + for task in self.session.tasks.values(): + if task.execution.state != 'TERMINATED': + can_postprocess = False + break + if can_postprocess: + # do stuff... (see next slide) + \end{python} +\end{frame} + + +\begin{frame}[fragile] + \frametitle{Global post-processing, III} + Example: compute statistical distribution of termination statuses (cont'd): + + \begin{python} +def after_main_loop(self): + # ... (see prev slide) + if can_postprocess: + status_counts = defaultdict(int) + for app in self.session.tasks.values(): + termstatus = app.execution.returncode + status_counts[termstatus] += 1 + \end{python} + + \+\small Variable \lstinline|self.session.tasks| holds a mapping + \lstinline|JobID|~$\Rightarrow$~\lstinline|Application|; thus + \lstinline|self.session.tasks.values()| is a list of all the + \texttt{Application} instances returned by \lstinline|new_tasks| \end{frame} \begin{frame} - \frametitle{Detour: Asian ``put'' option pricing, I} - \small - The script \texttt{simAsset.R} simulates pricing - \href{https://en.wikipedia.org/wiki/Asian_option}{Asian ``put'' - options} over a certain amount of time. Different pricing paths - are generated, all starting from the same initial price. + \frametitle{Detour: asset pricing, I} + \small The script \texttt{simAsset.R} simulates asset pricing over a certain + amount of time. Different pricing paths are generated using a + \href{https://en.wikipedia.org/wiki/Wiener_process}{1D Brownian motion}, + all starting from the same initial price. \begin{center} \includegraphics[width=0.75\linewidth]{fig/simAsset.pdf} \end{center} \end{frame} \begin{frame}[fragile] - \frametitle{Detour: Asian ``put'' option pricing, II} + \frametitle{Detour: asset pricing, II} \small You can run the \texttt{simAsset.R} script with these positional parameters: \begin{description} @@ -516,7 +563,7 @@ \part{Termination status} \item takes the same command-line positional arguments as \texttt{simAsset.R}, \emph{plus} an additional integer trailing parameter $P$; \item runs \texttt{simAsset.R} (in parallel) $P$ times with the given arguments (so, effectively simulates $N \cdot P$ price paths); \item reads all the generated \texttt{results.csv} files, and - \item computes and prints the average value of the option at the end of the simulated time, across all $N \cdot P$ price paths. + \item computes and prints the average value of the asset at the end of the simulated time, across all $N \cdot P$ price paths. \end{itemize} \+ {\footnotesize (For easier reading CSV files, you can use the standard diff --git a/docs/programmers/tutorials/workflows/solutions/ex6a.py b/docs/programmers/tutorials/workflows/solutions/ex6a.py new file mode 100755 index 00000000..f417a74a --- /dev/null +++ b/docs/programmers/tutorials/workflows/solutions/ex6a.py @@ -0,0 +1,79 @@ +#! /usr/bin/env python + +""" +Exercise 6.A: In the ``colorize.py`` script from Exercise 4.A, +modify the ColorizeApp application to move the output picture file +into directory ``/home/ubuntu/pictures``. You might need to store the +output file name to have it available when the application has +terminated running. +""" + +import os +from os.path import abspath, basename, exists, join +import sys + +from gc3libs import Application, log +from gc3libs.cmdline import SessionBasedScript + + +if __name__ == '__main__': + from ex6a import ColorizeScript + ColorizeScript().run() + + +class ColorizeScript(SessionBasedScript): + """ + Colorize multiple images and collect results + into directory ``./pictures`` + """ + def __init__(self): + super(ColorizeScript, self).__init__(version='1.0') + def setup_args(self): + self.add_param('colors', nargs=3, help="Three colors") + self.add_param('images', nargs='+', help="Images to colorize") + def new_tasks(self, extra): + col1, col2, col3 = self.params.colors + apps_to_run = [] + for input_file in self.params.images: + input_file = abspath(input_file) + apps_to_run.append(ColorizeApp(input_file, col1, col2, col3)) + return apps_to_run + + +from shutil import move + +from gc3libs.quantity import GB + +class ColorizeApp(Application): + """Add colors to a grayscale image.""" + def __init__(self, img, col1, col2, col3): + inp = basename(img) + # need to save this for later reference in ``terminated()`` + self.output_file_name = "color-" + inp + Application.__init__( + self, + arguments=[ + "convert", inp, + "(", "xc:"+col1, "xc:"+col2, "xc:"+col3, "+append", ")", "-clut", + self.output_file_name], + inputs=[img], + outputs=[self.output_file_name], + output_dir="colorized-" + inp + ".d", + stdout="stdout.txt", + stderr="stderr.txt", + # required for running on the cloud, see GC3Pie issue #559 + requested_memory=1*GB) + def terminated(self): + # full path to output file on local filesystem + output_file = join(self.output_dir, self.output_file_name) + # if the output file is not there, log an error and exit + if not exists(output_file): + log.error("Expected output file `%s` from %s does not exists!", + output_file, self) + return + # ensure destination directory exists + if not exists('pictures'): + os.mkdir('pictures') + # the trailing slash ensures `shutil.move` raises an error if + # the destination exists but is not a directory + move(output_file, 'pictures/') diff --git a/docs/programmers/tutorials/workflows/solutions/ex6b.py b/docs/programmers/tutorials/workflows/solutions/ex6b.py new file mode 100755 index 00000000..8cb7170e --- /dev/null +++ b/docs/programmers/tutorials/workflows/solutions/ex6b.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python + +""" +Exercise 6.B: Modify the grayscaling script ex2c (or the code it +depends upon) so that, when a ``GrayscaleApp`` task has terminated +execution, it prints: + +* whether the program has been killed by a signal, and the signal number; +* whether the program has terminated by exiting, and the exit code. +""" + +import os +from os.path import abspath, basename +import sys + +from gc3libs import Application, log +from gc3libs.cmdline import SessionBasedScript +from gc3libs.quantity import GB + + +if __name__ == '__main__': + from ex6b import GrayscaleScript + GrayscaleScript().run() + + +class GrayscaleScript(SessionBasedScript): + """ + Convert images to grayscale. + """ + def __init__(self): + super(GrayscaleScript, self).__init__(version='1.0') + def new_tasks(self, extra): + # since `self.params.args` is already a list of file names, + # just iterate over it to build the list of apps to run... + apps_to_run = [] + for input_file in self.params.args: + input_file = abspath(input_file) + apps_to_run.append(VerboseGrayscaleApp(input_file)) + return apps_to_run + + +# alternately, one could just copy code from `grayscale_app.py` here, +# and append the `terminated()` method to the definition + +from grayscale_app import GrayscaleApp + +class VerboseGrayscaleApp(GrayscaleApp): + """Convert a single image file to grayscale and log termination status.""" + def terminated(self): + if self.execution.signal != 0: + log.info("Task %s killed by signal %d", self, self.execution.signal) + else: + # self.execution.signal == 0, hence normal termination + if self.execution.exitcode == 0: + log.info("Task %s exited successfully!", self) + else: + log.info("Task %s exited with error code %d", self, self.execution.exitcode) diff --git a/docs/programmers/tutorials/workflows/solutions/ex6bplus.py b/docs/programmers/tutorials/workflows/solutions/ex6bplus.py new file mode 100755 index 00000000..a2e000e2 --- /dev/null +++ b/docs/programmers/tutorials/workflows/solutions/ex6bplus.py @@ -0,0 +1,66 @@ +#! /usr/bin/env python + +""" +Exercise 6.B: Modify the grayscaling script ex2c (or the code it +depends upon) so that, when a ``GrayscaleApp`` task has terminated +execution, it prints: + +* whether the program has been killed by a signal, and the signal number; +* whether the program has terminated by exiting, and the exit code. +""" + +import os +from os.path import abspath, basename +import sys + +from gc3libs import Application, log +from gc3libs.cmdline import SessionBasedScript +from gc3libs.quantity import GB + + +if __name__ == '__main__': + from ex6b import GrayscaleScript + GrayscaleScript().run() + + +class GrayscaleScript(SessionBasedScript): + """ + Convert images to grayscale. + """ + def __init__(self): + super(GrayscaleScript, self).__init__(version='1.0') + def new_tasks(self, extra): + # since `self.params.args` is already a list of file names, + # just iterate over it to build the list of apps to run... + apps_to_run = [] + for input_file in self.params.args: + input_file = abspath(input_file) + apps_to_run.append(GrayscaleApp(input_file)) + return apps_to_run + + +class GrayscaleApp(Application): + """Convert a single image file to grayscale.""" + def __init__(self, img): + inp = basename(img) + out = "gray-" + inp + Application.__init__( + self, + arguments=[ + "convert", inp, "-colorspace", "gray", out], + inputs=[img], + outputs=[out], + output_dir=("gray-" + inp + ".d"), + stdout="stdout.txt", + stderr="stderr.txt", + # this is needed to circumvent GC3Pie issue #559 + requested_memory=1*GB) + def terminated(self): + if self.execution.signal != 0: + log.info("Task %s killed by signal %d", self, self.execution.signal) + else: + # self.execution.signal == 0, hence normal termination + if self.execution.exitcode == 0: + log.info("Task %s exited successfully!", self) + else: + log.info("Task %s exited with error code %d", self, self.execution.exitcode) diff --git a/docs/programmers/tutorials/workflows/solutions/ex6c.py b/docs/programmers/tutorials/workflows/solutions/ex6c.py new file mode 100755 index 00000000..3380e286 --- /dev/null +++ b/docs/programmers/tutorials/workflows/solutions/ex6c.py @@ -0,0 +1,73 @@ +#! /usr/bin/env python + +""" +Exercise 6.B: Modify the grayscaling script ex2c (or the code it +depends upon) so that, when a ``GrayscaleApp`` task has terminated +execution, it prints: + +* whether the program has been killed by a signal, and the signal number; +* whether the program has terminated by exiting, and the exit code. +""" + +import os +from os.path import abspath, basename +import sys + +from gc3libs import Application, log +from gc3libs.cmdline import SessionBasedScript +from gc3libs.quantity import GB + + +if __name__ == '__main__': + from ex6b import OomScript + OomScript().run() + + +class OomScript(SessionBasedScript): + """ + Convert images to grayscale. + """ + def __init__(self): + super(OomScript, self).__init__(version='1.0') + def new_tasks(self, extra): + # since `self.params.args` is already a list of file names, + # just iterate over it to build the list of apps to run... + apps_to_run = [] + for input_file in self.params.args: + input_file = abspath(input_file) + apps_to_run.append(MatlabApp('downloads/ra.m')) + return apps_to_run + + +class MatlabApp(Application): + """Run a MATLAB source file.""" + application_name = 'matlab' + + def __init__(self, code_file_path): + code_file_name = basename(code_file_path) + code_func_name = code_file_name[:-len('.m')] # remove `.m` extension + Application.__init__( + self, + arguments=["matlab", "-nodesktop", "-nojvm", "-r", code_func_name], + inputs=[code_file_path], + outputs=[], + output_dir=("matlab.out.d"), + stdout="matlab.log", + stderr="matlab.log", + # this is needed to circumvent GC3Pie issue #559 + requested_memory=1*GB) + def terminated(self): + err_file_path = os.path.join(self.output_dir, self.stderr) + with open(err_file_path, 'r') as err_file: + errors = err_file.read() + if 'Out of memory' in errors or 'exceeds maximum array size' in errors: + self.execution.exitcode = 11 + # verbosely notify user + if self.execution.signal != 0: + log.info("Task %s killed by signal %d", self, self.execution.signal) + else: + # self.execution.signal == 0, hence normal termination + if self.execution.exitcode == 0: + log.info("Task %s exited successfully!", self) + else: + log.info("Task %s exited with error code %d", self, self.execution.exitcode) diff --git a/docs/programmers/tutorials/workflows/solutions/ex6d.py b/docs/programmers/tutorials/workflows/solutions/ex6d.py new file mode 100755 index 00000000..e468168e --- /dev/null +++ b/docs/programmers/tutorials/workflows/solutions/ex6d.py @@ -0,0 +1,88 @@ +#! /usr/bin/env python + +""" +Write a ``sim_asset.py`` program that: + +* takes the same command-line positional arguments as ``simAsset.R``, plus an + additional integer trailing parameter P; + +* runs ``simAsset.R`` (in parallel) P times with the given arguments (so, + effectively simulates N x P price paths); + +* reads all the generated ``results.csv`` files, and I computes and prints the + average value of the option at the end of the simulated time, across all N x + P price paths. +""" + +import csv +import os +import sys + +from gc3libs import Application +from gc3libs.cmdline import SessionBasedScript + + +if __name__ == '__main__': + from ex6d import SimAssetScript + SimAssetScript().run() + + +class SimAssetScript(SessionBasedScript): + """ + Simulate asset pricing via Monte-Carlo methods. + """ + + def __init__(self): + super(SimAssetScript, self).__init__(version='1.0') + + def setup_args(self): + self.add_param('S0', type=float, help="stock price today (e.g., 50)") + self.add_param('mu', type=float, help="expected return (e.g., 0.04)") + self.add_param('sigma', type=float, help="volatility (e.g., 0.1)") + self.add_param('dt', type=float, help="size of time steps (e.g., 0.273)") + self.add_param('etime', type=int, help="days to expiry (e.g., 1000)") + self.add_param('nsims', type=int, help="number of simulation paths per task") + self.add_param('P', type=int, help="number of task to run") + + def new_tasks(self, extra): + apps_to_run = [] + for seqnr in range(self.params.P): + app = SimAssetApp(self.params.S0, self.params.mu, self.params.sigma, + self.params.dt, self.params.etime, self.params.nsims, seqnr) + apps_to_run.append(app) + return apps_to_run + + def after_main_loop(self): + # check that all tasks are terminated + can_postprocess = True + for task in self.session.tasks.values(): + if task.execution.state != 'TERMINATED': + can_postprocess = False + break + if can_postprocess: + final_prices = [] + for task in self.session.tasks.values(): + result_path = os.path.join(task.output_dir, 'results.csv') + with open(result_path, 'r') as result_file: + result_csv = csv.reader(result_file) + for row in result_csv: + final_prices.append(float(row[-1])) + # now compute average + if final_prices: + average = sum(final_prices) / len(final_prices) + print("==> Average final price is: {average}".format(average=average)) + else: + print("==> No data to compute average!") + + +class SimAssetApp(Application): + def __init__(self, S0, mu, sigma, delta, etime, nsims, seqnr): + Application.__init__( + self, + ['/usr/bin/Rscript', 'simAsset.R', S0, mu, sigma, delta, etime, nsims], + inputs=['downloads/simAsset.R'], + outputs=['results.csv'], + output_dir=('simAsset-%d.d' % seqnr), + stdout="simAsset.log", + stderr="simAsset.log" + ) diff --git a/docs/programmers/tutorials/workflows/solutions/sim_asset.py b/docs/programmers/tutorials/workflows/solutions/sim_asset.py new file mode 120000 index 00000000..54eda2dc --- /dev/null +++ b/docs/programmers/tutorials/workflows/solutions/sim_asset.py @@ -0,0 +1 @@ +ex5d.py \ No newline at end of file