From 2f62bf256d3e6a3e2af19fb32a6461997bbbcae3 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 14:17:55 +0200 Subject: [PATCH 01/14] exchange m4l plot with official pdf version --- images/CMS-HIG-19-001_Figure_004-a.pdf | Bin 0 -> 130 bytes images/plot.png | 3 --- 2 files changed, 3 deletions(-) create mode 100644 images/CMS-HIG-19-001_Figure_004-a.pdf delete mode 100644 images/plot.png diff --git a/images/CMS-HIG-19-001_Figure_004-a.pdf b/images/CMS-HIG-19-001_Figure_004-a.pdf new file mode 100644 index 0000000000000000000000000000000000000000..25979ee53e5531d2a83318cbf3ffdb02f4d32b33 GIT binary patch literal 130 zcmWl~OA^8$3;@u5Pr(H&e4vyVbwE LNdK_1XiZqY>j5X_ literal 0 HcmV?d00001 diff --git a/images/plot.png b/images/plot.png deleted file mode 100644 index d6dfb7a..0000000 --- a/images/plot.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a940d640d61af81f171eaa2ac4cb9ab1df6ad2e524032fee3866a0d59728bd6 -size 218302 From 595a9b5f5413955e84464eb6e804bdd7fcb88af8 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 15:28:14 +0200 Subject: [PATCH 02/14] bib file for cf demo --- references.bib | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 references.bib diff --git a/references.bib b/references.bib new file mode 100644 index 0000000..bcf5fd2 --- /dev/null +++ b/references.bib @@ -0,0 +1,24 @@ +@software{cf_repo, + author = {The ColumnFlow Team}, + title = {The ColumnFlow project}, + year = {2024}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/columnflow/columnflow}, + version = {3b8e0f3}, + note = {Documentation available at \url{https://columnflow.readthedocs.io/en/latest/}} +} + +@published{h4l_analysis, + title = {Measurements of production cross sections of the Higgs boson in the four-lepton final state in proton–proton collisions at $\sqrt{s} = 13\, \text{TeV}$}, + volume = {81}, + ISSN = {1434-6052}, + url = {http://dx.doi.org/10.1140/epjc/s10052-021-09200-x}, + DOI = {10.1140/epjc/s10052-021-09200-x}, + number = {6}, + journal = {The European Physical Journal C}, + publisher = {Springer Science and Business Media LLC}, + author = {The CMS Collaboration}, + year = {2021}, + month = jun +} \ No newline at end of file From c1c2cc05b3853682904039dbfd8e154ded7ccd39 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 15:28:59 +0200 Subject: [PATCH 03/14] add more intermediary files from lates --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d6abe19..8ffb2e5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ *.xml *.DS_STORE *.txss2 +*.lof +*.lot From 960f5c039097feb800b198b90dcbf51bd6d46a86 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 15:29:55 +0200 Subject: [PATCH 04/14] restructured text --- chapters/exercise.tex | 21 ++------------------- chapters/intro.tex | 22 ++++------------------ sections/goal.tex | 17 +++++++++++++++++ sections/setup.tex | 2 +- 4 files changed, 24 insertions(+), 38 deletions(-) create mode 100644 sections/goal.tex diff --git a/chapters/exercise.tex b/chapters/exercise.tex index 2ce07e8..35df7b5 100644 --- a/chapters/exercise.tex +++ b/chapters/exercise.tex @@ -1,21 +1,4 @@ -\chapter{$H \rightarrow ZZ \rightarrow 4l$} -\justifying -\paragraph{} -The goal of this exercise is to reconstruct the Standard Model (SM) Higgs boson mass, using a selection targeting the four-lepton final state. This is considered a \textit{golden} channel to rediscovered the Higgs because: -\begin{itemize} - \item there is a \textbf{\underline{ large signal to background ratio}} -- it is easy to discriminate between the peak of the reconstructed four-lepton mass ($m_{4l}$) and the overall flat background shape; - \item we have excellent \textbf{\underline{ mass resolution}} -- thanks to the great resolution power of CMS, we have optimal shape reconstruction of $m_{4l}$; - \item it is a \textbf{\underline{ resolved final state}} -- detection of the four leptons in the final state ensures good discrimination of signal and background. -\end{itemize} - -\begin{figure}[!h] - \centering - \includegraphics[scale=0.35]{images/plot.png} - \caption{\justifying{Reconstructed four-lepton invariant mass $m_{4l}$ with 2018 data. The SM Higgs boson signal with $m_H = 125$ GeV, denoted as $H(125)$, and the $ZZ$ backgrounds are normalized to the SM expectation. The $Z+X$ background is normalized to the estimation from data.}} - \label{higgs_plot} -\end{figure} - -\include{sections/setup} -\include{sections/strategy} +\chapter{Basic Functionalities} +\label{chap:basics} \include{sections/calibrator} \include{sections/selector} diff --git a/chapters/intro.tex b/chapters/intro.tex index ec5c3bc..d6a897e 100644 --- a/chapters/intro.tex +++ b/chapters/intro.tex @@ -1,19 +1,5 @@ -\chapter{Introduction to ColumnFlow} +\include{sections/general_intro} +\include{sections/goal} +\include{sections/setup} +\include{sections/strategy} -ColumnFlow is intended as a back-end for analyses in order to facilitate processing large amounts of data. -It is purely python-based and employs multiple packages that are well-received and {-maintained} in the HEP community. -At the time of writing these instructions, the team of developer's purely consists of data analysts at the CMS experiment. -Therefore, this exercise is structured accordingly. -Please note that ColumnFlow is in principle designed in an experiment-agnostic way, such that it can also be extended to other use cases. - -Additionally, please note that this hands-on exercise is not meant to fully document all available functionalities. -The purpose of this exercise is to give an overview of the most fundamental aspects and concepts that are available at the time of writing. -For a more comprehensive overview, please visit the \href{https://columnflow.readthedocs.io/en/latest/}{official documentation}. % might want to put this as a proper reference -In case of any questions are comments, feel free to contact the maintainers for example via the \href{https://github.com/columnflow/columnflow}{git repository}. - -\begin{figure}[p] - \centering - \includegraphics[scale=0.8]{images/CF_tasks.png} - \caption{\justifying{ColumnFlow task graph hierarchy}} - \label{fig:task_graph} -\end{figure} \ No newline at end of file diff --git a/sections/goal.tex b/sections/goal.tex new file mode 100644 index 0000000..281fb8c --- /dev/null +++ b/sections/goal.tex @@ -0,0 +1,17 @@ +\section{Physics example: $H \rightarrow ZZ \rightarrow 4l$} +\justifying +\paragraph{} +The goal of this exercise is to reconstruct the Standard Model (SM) Higgs boson mass, using a selection targeting the four-lepton final state. This is considered a \textit{golden} channel to rediscovered the Higgs because: +\begin{itemize} + \item there is a \textbf{\underline{ large signal to background ratio}} -- it is easy to discriminate between the peak of the reconstructed four-lepton mass ($m_{4l}$) and the overall flat background shape; + \item we have excellent \textbf{\underline{ mass resolution}} -- thanks to the great resolution power of CMS, we have optimal shape reconstruction of $m_{4l}$; + \item it is a \textbf{\underline{ resolved final state}} -- detection of the four leptons in the final state ensures good discrimination of signal and background. +\end{itemize} + +\begin{figure}[t] + \centering + \includegraphics[width=\textwidth]{images/CMS-HIG-19-001_Figure_004-a.pdf} + \Caption{Reconstructed four-lepton invariant mass $m_{4l}$ with full Run2 data}{The SM Higgs boson signal with $m_H = 125\,\text{GeV}$, denoted as $H(125)$, and the $ZZ$ backgrounds are normalized to the SM expectation. The $Z+X$ background is normalized to the estimation from data. + Figure taken from ref.~\cite{h4l_analysis}.} + \label{higgs_plot} +\end{figure} \ No newline at end of file diff --git a/sections/setup.tex b/sections/setup.tex index e78fa05..a72f773 100644 --- a/sections/setup.tex +++ b/sections/setup.tex @@ -72,7 +72,7 @@ \section{Installation \& Setup} \includegraphics[scale=0.62]{images/CF_demo.png} \end{figure} -\subsection{ColumnFlow Tasks} +%\subsection{ColumnFlow Tasks} This exercise is organized in the form of \code{law} tasks, where different tasks create some form of output. By default, these tasks will save their output on a remote file system (e.g. \texttt{WLGC}), for which you will require a \code{voms-proxy}. If you would like to save certain/all outputs locally, we recommend to create a directory on a system with a larger amount of disk space (e.g. \texttt{EOS}). For such cases, you will need to update the \code{law.cfg} file accordingly. You can view the available tasks by running: \begin{lstlisting}[language=bash] From b3c6eab48e8edbf854323b48430ba0f63c8ec7a8 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 15:30:18 +0200 Subject: [PATCH 05/14] define custom commands for columnflow name and caption with proper short captions --- style_declarations.tex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/style_declarations.tex b/style_declarations.tex index 7c8a05a..c331c66 100644 --- a/style_declarations.tex +++ b/style_declarations.tex @@ -1 +1,5 @@ -\newcommand{\CCSPStlye}[1]{\texttt{\textcolor{LimeGreen}{#1}}} \ No newline at end of file +\newcommand{\CCSPStlye}[1]{\texttt{\textcolor{LimeGreen}{#1}}} + +\newcommand{\columnflow}{\text{ColumnFlow}\xspace} + +\newcommand{\Caption}[2]{\caption[#1]{\textbf{#1}. #2}} \ No newline at end of file From 2aa54d5dd9ef7ddaccdcbd22992ebc4ac8147e99 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 15:31:09 +0200 Subject: [PATCH 06/14] switched to book to use front/mainmatter and appendix properly, resolved minor issues --- main.tex | 80 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/main.tex b/main.tex index be7b0c0..d2f4b55 100644 --- a/main.tex +++ b/main.tex @@ -1,4 +1,4 @@ -\documentclass[12pt,twoside]{report} +\documentclass[12pt,twoside]{book} \usepackage[utf8]{inputenc} \usepackage[export]{adjustbox} \usepackage[a4paper, top=25mm, bottom=25mm, inner=20mm, outer=30mm]{geometry} @@ -6,6 +6,7 @@ \usepackage[dvipsnames]{xcolor} \usepackage{newtxtext} \usepackage{xcolor} +\usepackage{xspace} \usepackage[explicit]{titlesec} \usepackage{lipsum, caption} \usepackage{type1cm} @@ -19,7 +20,7 @@ \newcommand{\asc}[1]{\textsc{{\MakeLowercase{#1}}}} \usepackage{pdfpages} \usepackage{bbold} -\usepackage{appendix} +\usepackage[title]{appendix} \usepackage{slashed} \usepackage{datetime} \usepackage{graphicx} @@ -31,6 +32,7 @@ \usepackage{tabularray} \usepackage[most]{tcolorbox} + \NewTblrTheme{longtable1}{ \DefTblrTemplate{conthead-text}{fancy}{} \SetTblrTemplate{conthead-text}{fancy} @@ -53,24 +55,25 @@ \newdateformat{monthyeardate}{% \monthname[\THEMONTH], \THEYEAR} -\titlespacing{\section}{0pt}{0}{0} -\titlespacing{\subsection}{0pt}{0}{0} - -\makeatletter -\def\ttl@mkchap@i#1#2#3#4#5#6#7{% - \ttl@assign\@tempskipa#3\relax\beforetitleunit - \vspace{\@tempskipa}%<<<<<< REMOVE THE * AFTER \vspace - \global\@afterindenttrue - \ifcase#5 \global\@afterindentfalse\fi - \ttl@assign\@tempskipb#4\relax\aftertitleunit - \ttl@topmode{\@tempskipb}{% - \ttl@select{#6}{#1}{#2}{#7}}% - \ttl@finmarks % Outside the box! - \@ifundefined{ttlp@#6}{}{\ttlp@write{#6}}} -\makeatother - -\renewcommand{\chaptermark}[1]{\markboth{{\chaptername\ \thechapter.\ #1}}{}} -\renewcommand{\sectionmark}[1]{\markright{{\sectionname\ \thesection.\ #1}}{}} +\titlespacing{\section}{0pt}{0pt}{0pt} +\titlespacing{\subsection}{0pt}{0pt}{0pt} + +%\makeatletter +%\def\ttl@mkchap@i#1#2#3#4#5#6#7{% +% \ttl@assign\@tempskipa#3\relax\beforetitleunit +% \vspace{\@tempskipa}%<<<<<< REMOVE THE * AFTER \vspace +% \global\@afterindenttrue +% \ifcase#5 \global\@afterindentfalse\fi +% \ttl@assign\@tempskipb#4\relax\aftertitleunit +% \ttl@topmode{\@tempskipb}{% +% \ttl@select{#6}{#1}{#2}{#7}}% +% \ttl@finmarks % Outside the box! +% \@ifundefined{ttlp@#6}{}{\ttlp@write{#6}}} +%\makeatother +% +%\renewcommand{\chaptermark}[1]{\markboth{{\chaptername\ \thechapter.\ #1}}{}} +% Following line generates undefined control sequence: section for me, so comment it out for now +%\renewcommand{\sectionmark}[1]{\markright{{\sectionname\ \thesection.\ #1}}{}} \newcommand*\NewPage{\newpage\null\thispagestyle{empty}\newpage} @@ -147,7 +150,7 @@ \lstset{style=mystyle} \usepackage{enumitem} \usepackage{xcolor} -%\usepackage[title]{appendix} + \definecolor{codegreen}{rgb}{0,0.6,0} @@ -178,16 +181,19 @@ \definecolor{codegray}{gray}{0.9} \newcommand{\code}[1]{\colorbox{codegray}{\texttt{#1}}} -\include{style_declarations} - +\input{style_declarations} +% define which files to consider for compilation. +% Nice to test compile smaller parts of the document without changing the toc/page numbering \includeonly{% chapters/intro, -% chapters/exercise, -% sections/setup, -% sections/strategy, -% sections/calibrator, -% sections/selector, + sections/general_intro, + chapters/exercise, + sections/goal, + sections/setup, + sections/strategy, + sections/calibrator, + sections/selector, } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -195,6 +201,7 @@ \pagenumbering{roman} \begin{titlepage} + \frontmatter \newgeometry{top=5mm, bottom=5mm, inner=5mm, outer=5mm} \begin{center} \scshape @@ -221,22 +228,19 @@ \newgeometry{top=25mm, bottom=25mm, inner=20mm, outer=30mm} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\newpage -\thispagestyle{empty} -\vspace*{1cm} - \begin{center}\textbf{}\end{center} +\tableofcontents +\listoffigures +\listoftables -\newpage -\thispagestyle{plain} +\mainmatter \thispagestyle{empty} -\tableofcontents -\pagenumbering{arabic} + \pagestyle{fancy} \captionsetup{justification=raggedright,singlelinecheck=false} \input{chapters/intro} \input{chapters/exercise} - -%\printbibliography +\appendix +\printbibliography \end{document} From 213074b971e6d8cb8faa86e39cb53a1126ba9d39 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 15:31:33 +0200 Subject: [PATCH 07/14] wrote a little about the general structure of cf --- sections/general_intro.tex | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 sections/general_intro.tex diff --git a/sections/general_intro.tex b/sections/general_intro.tex new file mode 100644 index 0000000..f9ed4e5 --- /dev/null +++ b/sections/general_intro.tex @@ -0,0 +1,30 @@ +\chapter{Introduction to \columnflow} +\columnflow is intended as a back-end for analyses in order to facilitate processing large amounts of data. +It is purely python-based and employs multiple packages that are well-received and {-maintained} in the HEP community. +At the time of writing these instructions, the team of developer's purely consists of data analysts at the CMS experiment. +Therefore, this exercise is structured accordingly. +Please note that \columnflow is in principle designed in an experiment-agnostic way, such that it can also be extended to other use cases. + +Additionally, please note that this hands-on exercise is not meant to fully document all available functionalities. +The purpose of this exercise is to give an overview of the most fundamental aspects and concepts that are available at the time of writing. +For a more comprehensive overview, please visit the official documentation~\cite{cf_repo}. % might want to put this as a proper reference +In case of any questions are comments, feel free to contact the maintainers for example via the git repository~\cite{cf_repo}. + +\section{General Structure} +\begin{figure}[p] + \centering + \includegraphics[width=\textwidth]{images/CF_tasks.png} + \Caption{\columnflow task graph hierarchy}{The tasks are arranged in three sections that correspond to general work packages when analysing data. + The line strengths and styles indicate the behaviour when propagating information between tasks. + For more information, please consider ref.~\cite{cf_repo}. +} + \label{fig:task_graph} +\end{figure} + +The guiding principle of \columnflow is that all analyses share basic work packages that need to be done when processing data. +Examples for such packages could be the calibration of relevant objects, applying selections to define a fiducial phase space for the analysis or the calculation of some sensitive observables, which are discussed in more detail in later chapters of this document. +\columnflow defines these work packages as law tasks, which can define dependencies amongst each other and will only run necessary tasks to obtain the requested output. + + +Figure~\ref{fig:task_graph} depicts an overview of the available tasks and their dependencies. +The highlighted regions indicate use cases that are discussed in chapter~\ref{chap:basics}. \ No newline at end of file From 6447e39bcadaf93232a8a923a74274abcdb0c781 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Wed, 5 Jun 2024 15:31:46 +0200 Subject: [PATCH 08/14] updated version of demo docs --- main.pdf | Bin 131 -> 132 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/main.pdf b/main.pdf index c27791f1df7ca4208949ef4153e7f0e80e5b4cc0..47901b55e816f129f82738effb2cae218063bc15 100644 GIT binary patch delta 85 zcmV~$%MpMe3fqZDf9*~#Jc=qO13$SE&pY*0rcBes0hE8Z<1gj~Usl#fw f5)uSOuYw@+M(_x9or~77cuvmOdHY7Csc_3b31}DA delta 84 zcmWN_u@QhE3Ta#E@B0}U1z(f|Me From ae7be80e7bb882ed6706394ebc6058dc9e74df6a Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Thu, 6 Jun 2024 15:00:13 +0200 Subject: [PATCH 09/14] added more general infos to introduction --- sections/general_intro.tex | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/sections/general_intro.tex b/sections/general_intro.tex index f9ed4e5..81d89f1 100644 --- a/sections/general_intro.tex +++ b/sections/general_intro.tex @@ -27,4 +27,22 @@ \section{General Structure} Figure~\ref{fig:task_graph} depicts an overview of the available tasks and their dependencies. -The highlighted regions indicate use cases that are discussed in chapter~\ref{chap:basics}. \ No newline at end of file +The highlighted regions indicate use cases that are discussed in chapter~\ref{chap:basics}. +This chain of jobs starts with obtaining the list of logical file names (LFNs) that contain the events to be analysed in a flat tuple format (e.g.\ the nanoAOD format within CMS). +The first block is dedicated to prepare these events for further analysis. +Such a preparation can entail different things, such as a calibration of the relevant objects in an analysis or the application of selection criteria to define a relevant work space. +In order to facilitate a more efficient calculation in later parts of this workflow, the amount of data is reduced as a last step of the first plot. + +The second block illustrated in fig.~\ref{fig:task_graph} is dedicated to the calculation of different observables and metrics. +At the time of writing these instructions, this blocks offers metrics such as a summary of efficiencies for different stages of the selection(s) and their effect on observables, the calculation of completely new variables and also more complex calculations based on machine learning. +Moreover, it offers the functionality to collect all information of the workflow and save it as a flat tuple in the e.g.\ ROOT or parquet format. +The modular structure of the individual tasks allows for an easy extensions to calculate a variety of observables. + +Finally, the last block is dedicated to the final quantities that are needed for the analysis. +Most of these endpoints of the workflow aim to facilitate a data analysis in a binned format, though this is not a hard criterion. +This includes producing figures illustrating one- or two-dimensional distributions of multiple physics processes under consideration of a wide variety of systematic uncertainties, as well as the input needed for a statistical inference based on the data (e.g.\ datacards for the Combine tool within CMS). + +This structure allows for a full end-to-end analysis. +The explicit definition of dependencies in the code and the implicit check for existing outputs provided by luigi and law result in a sustainable and reproducible workflow that is easily triggered with a single command. +In the following, these capabilities are illustrated using an example that is based on the $H\rightarrow4l$ analysis, for which we will build a selection of the aforementioned modules. +Please note that this example is by no means as complex and sophisticated as the real CMS analysis, and should therefore not be expected to yield the same results. \ No newline at end of file From 787985c0ffc1a68ea63abbba39b63e11dc9ba337 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Thu, 6 Jun 2024 15:00:47 +0200 Subject: [PATCH 10/14] further developed the general outline of the text --- chapters/exercise.tex | 6 ++++-- main.pdf | Bin 132 -> 132 bytes main.tex | 6 +++++- sections/categorizer.tex | 2 ++ sections/inference.tex | 1 + sections/producer.tex | 2 ++ sections/taskarrayfunctions.tex | 3 +++ 7 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 sections/categorizer.tex create mode 100644 sections/inference.tex create mode 100644 sections/producer.tex create mode 100644 sections/taskarrayfunctions.tex diff --git a/chapters/exercise.tex b/chapters/exercise.tex index 35df7b5..3c8355b 100644 --- a/chapters/exercise.tex +++ b/chapters/exercise.tex @@ -1,4 +1,6 @@ -\chapter{Basic Functionalities} -\label{chap:basics} +\include{sections/taskarrayfunctions} \include{sections/calibrator} \include{sections/selector} +\include{sections/producer} +\include{sections/categorizer} +\include{sections/inference} \ No newline at end of file diff --git a/main.pdf b/main.pdf index 47901b55e816f129f82738effb2cae218063bc15..b75444b50baec4c545b7ef9baf21dfdcedd8de87 100644 GIT binary patch delta 85 zcmV~$u@QhE34r0QNw*>nhGY72lBMj=Mp e**YSo3rhjo3egCiATwvWrsU^-*HK7H#mGP6v=-L@ delta 85 zcmV~$%MpMe3fqZDf9*~#Jc=qO*0xa3xop$= Date: Thu, 6 Jun 2024 15:27:24 +0200 Subject: [PATCH 11/14] implemented linting workflow --- .chktexrc | 16 ++++++++++++++++ .github/workflows/lint.yaml | 22 ++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 .chktexrc create mode 100644 .github/workflows/lint.yaml diff --git a/.chktexrc b/.chktexrc new file mode 100644 index 0000000..c8abb92 --- /dev/null +++ b/.chktexrc @@ -0,0 +1,16 @@ + +# in the base directory of your project + +QuoteStyle = Traditional + +VerbEnvir +{ + verbatim comment listing verbatimtab rawhtml errexam picture texdraw + filecontents pgfpicture tikzpicture minted lstlisting texttt + solution +} + +CmdLine +{ + --nowarn 1 --nowarn 8 --nowarn 13 +} diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..7a7671a --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,22 @@ +# in .github/workflows/lint.yml + +name: Lint + +on: + push: + branches: + - main + pull_request: + branches: + - main + + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: LaTeX linter (chktex) + uses: j2kun/chktex-action@main + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 9a738c79f2d5073607114750d46de98c7110cb97 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Thu, 6 Jun 2024 15:27:38 +0200 Subject: [PATCH 12/14] applied new linting rules --- sections/calibrator.tex | 2 +- sections/categorizer.tex | 4 ++-- sections/general_intro.tex | 7 +++---- sections/goal.tex | 3 +-- sections/inference.tex | 2 +- sections/producer.tex | 3 +-- sections/selector.tex | 2 +- sections/setup.tex | 4 ++-- sections/strategy.tex | 2 +- sections/taskarrayfunctions.tex | 5 ++--- 10 files changed, 15 insertions(+), 19 deletions(-) diff --git a/sections/calibrator.tex b/sections/calibrator.tex index 964ced5..d9fa3a0 100644 --- a/sections/calibrator.tex +++ b/sections/calibrator.tex @@ -1 +1 @@ -\section{Writing a Calibrator} +\section{Writing a Calibrator}\label{sec:calibrator} \ No newline at end of file diff --git a/sections/categorizer.tex b/sections/categorizer.tex index a7f7eb6..82e138e 100644 --- a/sections/categorizer.tex +++ b/sections/categorizer.tex @@ -1,2 +1,2 @@ -\chapter{Advanced Topics} -\section{Defining Categories} \ No newline at end of file +\chapter{Advanced Topics}\label{chap:advanced} +\section{Defining Categories}\label{sec:categories} \ No newline at end of file diff --git a/sections/general_intro.tex b/sections/general_intro.tex index 81d89f1..62ad022 100644 --- a/sections/general_intro.tex +++ b/sections/general_intro.tex @@ -17,8 +17,7 @@ \section{General Structure} \Caption{\columnflow task graph hierarchy}{The tasks are arranged in three sections that correspond to general work packages when analysing data. The line strengths and styles indicate the behaviour when propagating information between tasks. For more information, please consider ref.~\cite{cf_repo}. -} - \label{fig:task_graph} +}\label{fig:task_graph} \end{figure} The guiding principle of \columnflow is that all analyses share basic work packages that need to be done when processing data. @@ -27,14 +26,14 @@ \section{General Structure} Figure~\ref{fig:task_graph} depicts an overview of the available tasks and their dependencies. -The highlighted regions indicate use cases that are discussed in chapter~\ref{chap:basics}. +The highlighted regions indicate use cases that are discussed in Chapter~\ref{chap:basics}. This chain of jobs starts with obtaining the list of logical file names (LFNs) that contain the events to be analysed in a flat tuple format (e.g.\ the nanoAOD format within CMS). The first block is dedicated to prepare these events for further analysis. Such a preparation can entail different things, such as a calibration of the relevant objects in an analysis or the application of selection criteria to define a relevant work space. In order to facilitate a more efficient calculation in later parts of this workflow, the amount of data is reduced as a last step of the first plot. The second block illustrated in fig.~\ref{fig:task_graph} is dedicated to the calculation of different observables and metrics. -At the time of writing these instructions, this blocks offers metrics such as a summary of efficiencies for different stages of the selection(s) and their effect on observables, the calculation of completely new variables and also more complex calculations based on machine learning. +At the time of writing these instructions, this blocks offers metrics such as a summary of efficiencies for different stages of the selections and their effect on observables, the calculation of completely new variables and also more complex calculations based on machine learning. Moreover, it offers the functionality to collect all information of the workflow and save it as a flat tuple in the e.g.\ ROOT or parquet format. The modular structure of the individual tasks allows for an easy extensions to calculate a variety of observables. diff --git a/sections/goal.tex b/sections/goal.tex index 281fb8c..a54584c 100644 --- a/sections/goal.tex +++ b/sections/goal.tex @@ -12,6 +12,5 @@ \section{Physics example: $H \rightarrow ZZ \rightarrow 4l$} \centering \includegraphics[width=\textwidth]{images/CMS-HIG-19-001_Figure_004-a.pdf} \Caption{Reconstructed four-lepton invariant mass $m_{4l}$ with full Run2 data}{The SM Higgs boson signal with $m_H = 125\,\text{GeV}$, denoted as $H(125)$, and the $ZZ$ backgrounds are normalized to the SM expectation. The $Z+X$ background is normalized to the estimation from data. - Figure taken from ref.~\cite{h4l_analysis}.} - \label{higgs_plot} + Figure taken from ref.~\cite{h4l_analysis}.}\label{higgs_plot} \end{figure} \ No newline at end of file diff --git a/sections/inference.tex b/sections/inference.tex index 50b8298..69b6e3d 100644 --- a/sections/inference.tex +++ b/sections/inference.tex @@ -1 +1 @@ -\section{Writing datacards} \ No newline at end of file +\section{Writing datacards}\label{sec:inference} \ No newline at end of file diff --git a/sections/producer.tex b/sections/producer.tex index 84d0d13..c4ed2c3 100644 --- a/sections/producer.tex +++ b/sections/producer.tex @@ -1,2 +1 @@ -\section{Writing a Producer} -\label{sec:producer} \ No newline at end of file +\section{Writing a Producer}\label{sec:producer} \ No newline at end of file diff --git a/sections/selector.tex b/sections/selector.tex index 60c78f2..a8ea820 100644 --- a/sections/selector.tex +++ b/sections/selector.tex @@ -1,4 +1,4 @@ -\section{Writing a Selector}\label{selector} +\section{Writing a Selector}\label{sec:selector} The lepton selection we want to implement in this exercise is the following: diff --git a/sections/setup.tex b/sections/setup.tex index a72f773..50d9741 100644 --- a/sections/setup.tex +++ b/sections/setup.tex @@ -57,7 +57,7 @@ \section{Installation \& Setup} Note that the first installation of the software can take \underline{up to several minutes}. -Every time you want to work with ColumnFlow (e.g. if you open a new terminal window), you will need to source the \code{setup.sh} script again. +Every time you want to work with ColumnFlow (e.g.\ if you open a new terminal window), you will need to source the \code{setup.sh} script again. Once the installation is complete you should see a line of green text stating that the analysis has been successfully set up. You are now ready to start working with ColumnFlow! @@ -74,7 +74,7 @@ \section{Installation \& Setup} %\subsection{ColumnFlow Tasks} -This exercise is organized in the form of \code{law} tasks, where different tasks create some form of output. By default, these tasks will save their output on a remote file system (e.g. \texttt{WLGC}), for which you will require a \code{voms-proxy}. If you would like to save certain/all outputs locally, we recommend to create a directory on a system with a larger amount of disk space (e.g. \texttt{EOS}). For such cases, you will need to update the \code{law.cfg} file accordingly. You can view the available tasks by running: +This exercise is organized in the form of \code{law} tasks, where different tasks create some form of output. By default, these tasks will save their output on a remote file system (e.g.\ \texttt{WLGC}), for which you will require a \code{voms-proxy}. If you would like to save certain/all outputs locally, we recommend to create a directory on a system with a larger amount of disk space (e.g.\ \texttt{EOS}). For such cases, you will need to update the \code{law.cfg} file accordingly. You can view the available tasks by running: \begin{lstlisting}[language=bash] law index --verbose \end{lstlisting} diff --git a/sections/strategy.tex b/sections/strategy.tex index 7ea8957..21f27cb 100644 --- a/sections/strategy.tex +++ b/sections/strategy.tex @@ -1,6 +1,6 @@ \section{Analysis strategy} -In order to find Higgs boson candidates we will first need to reconstruct the four leptons in the final state. We will do this by writing a \CCSPStlye{Selector} (section \ref{selector}). +In order to find Higgs boson candidates we will first need to reconstruct the four leptons in the final state. We will do this by writing a \CCSPStlye{Selector} (Section~\ref{sec:selector}). %, which will filter out all physics objects that do not fulfill the selection criteria. In this selector, we will implement \underline{kinematic cuts}, \underline{vertex cuts} and \underline{Isolation \& ID} criteria. We will also implement \underline{trigger selections} such that only events interesting for the $ZZ$ analysis are selected. diff --git a/sections/taskarrayfunctions.tex b/sections/taskarrayfunctions.tex index f504eb3..a214b85 100644 --- a/sections/taskarrayfunctions.tex +++ b/sections/taskarrayfunctions.tex @@ -1,3 +1,2 @@ -\chapter{Basic Functionalities} -\label{chap:basics} -\section{The Mother of all: TaskArrayFunctions} \ No newline at end of file +\chapter{Basic Functionalities}\label{chap:basics} +\section{The Mother of all: TaskArrayFunctions}\label{sec:taskarrayfunc} \ No newline at end of file From d4bb334d2f0c68fecf3b3af2c780239ce50078a1 Mon Sep 17 00:00:00 2001 From: Philip Keicher Date: Thu, 6 Jun 2024 15:39:28 +0200 Subject: [PATCH 13/14] fix format of workflow file --- .github/workflows/{lint.yaml => lint.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{lint.yaml => lint.yml} (100%) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yml similarity index 100% rename from .github/workflows/lint.yaml rename to .github/workflows/lint.yml From 0de98ccedd8975d8fb53a211f87e50e62f9a618d Mon Sep 17 00:00:00 2001 From: Ana A Date: Mon, 10 Jun 2024 14:19:00 +0200 Subject: [PATCH 14/14] implementing suggestions from mschrode --- .vscode/settings.json | 22 ++++++++++ images/QU_logo.jpg | 3 ++ images/logos.png | 4 +- main.tex | 72 ++++++++++++++++----------------- references.bib | 16 +++++++- sections/categorizer.tex | 2 +- sections/general_intro.tex | 38 +++++++++-------- sections/goal.tex | 14 +++---- sections/producer.tex | 2 +- sections/selector.tex | 4 +- sections/setup.tex | 18 ++++----- sections/strategy.tex | 4 +- sections/taskarrayfunctions.tex | 2 +- 13 files changed, 118 insertions(+), 83 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 images/QU_logo.jpg diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d7db2a7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,22 @@ +{ + "workbench.colorCustomizations": { + "activityBar.activeBackground": "#f181b2", + "activityBar.background": "#f181b2", + "activityBar.foreground": "#15202b", + "activityBar.inactiveForeground": "#15202b99", + "activityBarBadge.background": "#caf5a9", + "activityBarBadge.foreground": "#15202b", + "commandCenter.border": "#15202b99", + "sash.hoverBorder": "#f181b2", + "statusBar.background": "#ec5396", + "statusBar.foreground": "#15202b", + "statusBarItem.hoverBackground": "#e7257a", + "statusBarItem.remoteBackground": "#ec5396", + "statusBarItem.remoteForeground": "#15202b", + "titleBar.activeBackground": "#ec5396", + "titleBar.activeForeground": "#15202b", + "titleBar.inactiveBackground": "#ec539699", + "titleBar.inactiveForeground": "#15202b99" + }, + "peacock.color": "#ec5396" +} diff --git a/images/QU_logo.jpg b/images/QU_logo.jpg new file mode 100644 index 0000000..a9c2391 --- /dev/null +++ b/images/QU_logo.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f48199304381bdfa77f90f90b668cb00b7351621bee1c0701811f7d83ef5a468 +size 23069 diff --git a/images/logos.png b/images/logos.png index 5119678..49cdd32 100644 --- a/images/logos.png +++ b/images/logos.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7964f6c5f083c7e7a05cda0e519c3603efb2070913e625f9c586c71681f0e528 -size 118651 +oid sha256:959a1ba488136907771fa84ef3d1d1eef95b66061a23f95371cecf9a56098ccc +size 135961 diff --git a/main.tex b/main.tex index c201d29..2fc29e0 100644 --- a/main.tex +++ b/main.tex @@ -4,7 +4,7 @@ \usepackage[a4paper, top=25mm, bottom=25mm, inner=20mm, outer=30mm]{geometry} \usepackage{afterpage,blindtext,color,hyperref,ragged2e,graphicx,fancyhdr} \usepackage[dvipsnames]{xcolor} -\usepackage{newtxtext} +\usepackage{newtxtext} \usepackage{xcolor} \usepackage{xspace} \usepackage[explicit]{titlesec} @@ -12,7 +12,7 @@ \usepackage{type1cm} \usepackage[nopostdot,toc,acronym,nomain,nonumberlist]{glossaries} \usepackage[backend=biber,style=numeric-comp -,sorting=none]{biblatex} +,sorting=none]{biblatex} \addbibresource{references.bib} \usepackage{multirow} \usepackage{mfirstuc} @@ -105,13 +105,13 @@ {\normalfont\Huge\sffamily\bfseries} {\sffamily\flushright\fontsize{70}{0}\textbf{\textcolor{LimeGreen}{{\Huge\chaptername}~\thechapter\vskip0pt\rule{\textwidth}{5pt}}}}{0pt} {\flushleft\textcolor{LimeGreen}{\fontsize{40}{0}{#1}}\titlespacing*{\chapter}{0pt}{0pt}{-40pt}} - + \titleformat{\section}[display] {\normalfont\large\sffamily\bfseries} {\textbf{\textcolor{LimeGreen}{\large}}}{0pt} {\flushleft\textcolor{LimeGreen}{\fontsize{40}{0}{~\thesection \ #1}}\titlespacing*{\section}{0pt}{0pt}{-40pt}} - + \titleformat{\subsection}[display] {\normalfont\normal\sffamily\bfseries} {\textbf{\textcolor{LimeGreen}{\normal}}}{0pt} @@ -129,21 +129,21 @@ \definecolor{backcolour}{rgb}{0.95,0.95,0.92} \lstdefinestyle{mystyle}{ - backgroundcolor=\color{backcolour}, + backgroundcolor=\color{backcolour}, commentstyle=\color{codeLimeGreen}, keywordstyle=\color{magenta}, numberstyle=\tiny\color{codegray}, stringstyle=\color{codepurple}, basicstyle=\ttfamily\footnotesize, - breakatwhitespace=false, - breaklines=true, - captionpos=b, - keepspaces=true, - numbers=left, - numbersep=5pt, - showspaces=false, + breakatwhitespace=false, + breaklines=true, + captionpos=b, + keepspaces=true, + numbers=left, + numbersep=5pt, + showspaces=false, showstringspaces=false, - showtabs=false, + showtabs=false, tabsize=2 } @@ -159,21 +159,21 @@ \definecolor{backcolour}{rgb}{0.95,0.95,0.92} \lstdefinestyle{mystyle}{ - backgroundcolor=\color{backcolour}, + backgroundcolor=\color{backcolour}, commentstyle=\color{codegreen}, keywordstyle=\color{magenta}, numberstyle=\tiny\color{codegray}, stringstyle=\color{codepurple}, basicstyle=\ttfamily\footnotesize, - breakatwhitespace=false, - breaklines=true, - captionpos=b, - keepspaces=true, - numbers=left, - numbersep=5pt, - showspaces=false, + breakatwhitespace=false, + breaklines=true, + captionpos=b, + keepspaces=true, + numbers=left, + numbersep=5pt, + showspaces=false, showstringspaces=false, - showtabs=false, + showtabs=false, tabsize=2 } @@ -202,29 +202,29 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{document} -\pagenumbering{roman} +\pagenumbering{roman}# \begin{titlepage} \frontmatter \newgeometry{top=5mm, bottom=5mm, inner=5mm, outer=5mm} \begin{center} - \scshape - + \scshape + \includegraphics[scale=0.7]{images/logos.png} \\ \vspace{100pt} - + \begin{Huge} {\sffamily{Hands-on Analysis Exercise\\ \vspace{20pt} $H \rightarrow ZZ \rightarrow 4l$ }} \end{Huge} \\\vspace{20pt} \begin{Large} {\sffamily{with}} \end{Large} \\ \vspace{20pt} - + \includegraphics[scale=0.1]{images/cf_logo.png} \\ \vspace{100pt} - + \begin{large} {\sffamily{Authors}} \end{large} \\ \vspace{5pt} - - \begin{Large} {\sffamily{Matteo Bonanomi, Philip Keicher \\ \vspace{10pt} Daniel Savoiu, Ana Andrade}} \end{Large} \\\vspace{80pt} - + + \begin{Large} {\sffamily{Matteo Bonanomi, Philip Keicher \\ \vspace{10pt} Daniel Savoiu, Ana Andrade}} \end{Large} \\\vspace{80pt} + \begin{large} {\sffamily{June 2024}} \end{large} \\ \vspace{50pt} - + \begin{small} {\sffamily{This exercise was originally created for the Higgs PAG exercise at the \\ CMS Physics Objects \& Data Analysis School held in Hamburg in October 2023}} \end{small} \end{center} @@ -233,8 +233,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \tableofcontents -\listoffigures -\listoftables +%\listoffigures +%\listoftables \mainmatter \thispagestyle{empty} @@ -242,8 +242,8 @@ \pagestyle{fancy} \captionsetup{justification=raggedright,singlelinecheck=false} -\input{chapters/intro} -\input{chapters/exercise} +\input{chapters/intro} +\input{chapters/exercise} \appendix \printbibliography diff --git a/references.bib b/references.bib index bcf5fd2..af9630a 100644 --- a/references.bib +++ b/references.bib @@ -20,5 +20,17 @@ @published{h4l_analysis publisher = {Springer Science and Business Media LLC}, author = {The CMS Collaboration}, year = {2021}, - month = jun -} \ No newline at end of file + month = jun +} + +@article{combine, + author = "Hayrapetyan, Aram and others", + collaboration = "CMS", + title = "{The CMS Statistical Analysis and Combination Tool: COMBINE}", + eprint = "2404.06614", + archivePrefix = "arXiv", + primaryClass = "physics.data-an", + reportNumber = "CMS-CAT-23-001, CERN-EP-2024-078", + month = "4", + year = "2024" +} diff --git a/sections/categorizer.tex b/sections/categorizer.tex index 82e138e..71b4461 100644 --- a/sections/categorizer.tex +++ b/sections/categorizer.tex @@ -1,2 +1,2 @@ \chapter{Advanced Topics}\label{chap:advanced} -\section{Defining Categories}\label{sec:categories} \ No newline at end of file +\section{Defining categories}\label{sec:categories} diff --git a/sections/general_intro.tex b/sections/general_intro.tex index 62ad022..e929813 100644 --- a/sections/general_intro.tex +++ b/sections/general_intro.tex @@ -1,28 +1,26 @@ \chapter{Introduction to \columnflow} -\columnflow is intended as a back-end for analyses in order to facilitate processing large amounts of data. -It is purely python-based and employs multiple packages that are well-received and {-maintained} in the HEP community. -At the time of writing these instructions, the team of developer's purely consists of data analysts at the CMS experiment. -Therefore, this exercise is structured accordingly. -Please note that \columnflow is in principle designed in an experiment-agnostic way, such that it can also be extended to other use cases. +\columnflow is a back-end for analyses in order to facilitate processing large amounts of data. +It is purely python-based and employs multiple packages that are common in the HEP community and well-maintained. +At the time of writing these instructions, the team of developers purely consists of data analysts at the CMS experiment. +Therefore, this exercise is structured accordingly. However, ColumnFlow is designed in an experiment agnostic way and it can be extended to other use cases. Additionally, please note that this hands-on exercise is not meant to fully document all available functionalities. The purpose of this exercise is to give an overview of the most fundamental aspects and concepts that are available at the time of writing. -For a more comprehensive overview, please visit the official documentation~\cite{cf_repo}. % might want to put this as a proper reference -In case of any questions are comments, feel free to contact the maintainers for example via the git repository~\cite{cf_repo}. +For a more comprehensive overview, please visit the documentation~\cite{cf_repo}. % might want to put this as a proper reference +In case of any questions or comments, feel free to contact the maintainers for example via the git repository~\cite{cf_repo}. -\section{General Structure} +\section{General structure} \begin{figure}[p] \centering \includegraphics[width=\textwidth]{images/CF_tasks.png} - \Caption{\columnflow task graph hierarchy}{The tasks are arranged in three sections that correspond to general work packages when analysing data. - The line strengths and styles indicate the behaviour when propagating information between tasks. - For more information, please consider ref.~\cite{cf_repo}. + \Caption{\columnflow task graph hierarchy}{The tasks are arranged in three sections that correspond to general work packages in a data analysis. + The line widths and styles indicate the behaviour when propagating information between tasks, as illustrated in the GitHub Wiki ~\cite{cf_repo}. }\label{fig:task_graph} \end{figure} The guiding principle of \columnflow is that all analyses share basic work packages that need to be done when processing data. Examples for such packages could be the calibration of relevant objects, applying selections to define a fiducial phase space for the analysis or the calculation of some sensitive observables, which are discussed in more detail in later chapters of this document. -\columnflow defines these work packages as law tasks, which can define dependencies amongst each other and will only run necessary tasks to obtain the requested output. +\columnflow defines the work packages as \texttt{law} tasks, which can define dependencies amongst each other and will only run necessary tasks to obtain the requested output. Figure~\ref{fig:task_graph} depicts an overview of the available tasks and their dependencies. @@ -32,16 +30,16 @@ \section{General Structure} Such a preparation can entail different things, such as a calibration of the relevant objects in an analysis or the application of selection criteria to define a relevant work space. In order to facilitate a more efficient calculation in later parts of this workflow, the amount of data is reduced as a last step of the first plot. -The second block illustrated in fig.~\ref{fig:task_graph} is dedicated to the calculation of different observables and metrics. +The second block in Fig.~\ref{fig:task_graph} is dedicated to the calculation of different observables and metrics. At the time of writing these instructions, this blocks offers metrics such as a summary of efficiencies for different stages of the selections and their effect on observables, the calculation of completely new variables and also more complex calculations based on machine learning. -Moreover, it offers the functionality to collect all information of the workflow and save it as a flat tuple in the e.g.\ ROOT or parquet format. +Moreover, it offers the functionality to collect all information of the workflow and save it as a flat tuple in the e.g.\ ROOT or PARQUET format. The modular structure of the individual tasks allows for an easy extensions to calculate a variety of observables. -Finally, the last block is dedicated to the final quantities that are needed for the analysis. +Finally, the last block is dedicated to the final observables that are needed for the analysis. Most of these endpoints of the workflow aim to facilitate a data analysis in a binned format, though this is not a hard criterion. -This includes producing figures illustrating one- or two-dimensional distributions of multiple physics processes under consideration of a wide variety of systematic uncertainties, as well as the input needed for a statistical inference based on the data (e.g.\ datacards for the Combine tool within CMS). +This includes producing figures illustrating one- or two-dimensional distributions of multiple physics processes under consideration of a wide variety of systematic uncertainties, as well as the input needed for a statistical inference based on the data (e.g.\ datacards for the Combine tool within CMS~\cite{combine}). -This structure allows for a full end-to-end analysis. -The explicit definition of dependencies in the code and the implicit check for existing outputs provided by luigi and law result in a sustainable and reproducible workflow that is easily triggered with a single command. -In the following, these capabilities are illustrated using an example that is based on the $H\rightarrow4l$ analysis, for which we will build a selection of the aforementioned modules. -Please note that this example is by no means as complex and sophisticated as the real CMS analysis, and should therefore not be expected to yield the same results. \ No newline at end of file +This structure allows a full end-to-end analysis. +The explicit definition of dependencies in the code and the implicit check for existing outputs provided by \texttt{luigi} and \texttt{law} result in an automatically organised and reproducible workflow that is easily triggered with a single command. +In the following, these capabilities are illustrated using an example that is based on the $H\rightarrow4l$ analysis~\cite{h4l_analysis}, for which we will build a selection of the aforementioned modules. +Please note that this example is by no means as complex and sophisticated as the real CMS analysis, and should therefore not be expected to yield the same results. diff --git a/sections/goal.tex b/sections/goal.tex index a54584c..6f4adaa 100644 --- a/sections/goal.tex +++ b/sections/goal.tex @@ -1,16 +1,16 @@ \section{Physics example: $H \rightarrow ZZ \rightarrow 4l$} \justifying \paragraph{} -The goal of this exercise is to reconstruct the Standard Model (SM) Higgs boson mass, using a selection targeting the four-lepton final state. This is considered a \textit{golden} channel to rediscovered the Higgs because: +The goal of this exercise is to reconstruct the standard model (SM) Higgs boson mass, using a selection targeting the four-lepton final state. This is considered a \textit{golden} channel to measure the properties of the Higgs boson because: \begin{itemize} - \item there is a \textbf{\underline{ large signal to background ratio}} -- it is easy to discriminate between the peak of the reconstructed four-lepton mass ($m_{4l}$) and the overall flat background shape; - \item we have excellent \textbf{\underline{ mass resolution}} -- thanks to the great resolution power of CMS, we have optimal shape reconstruction of $m_{4l}$; - \item it is a \textbf{\underline{ resolved final state}} -- detection of the four leptons in the final state ensures good discrimination of signal and background. + \item it is a \textbf{\underline{ fully resolved final state}} -- the Higgs boson can be reconstructed from the reconstructed particles; + \item we have an excellent \textbf{\underline{ mass resolution}} -- due to the high lepton-$p_T$ resolution, we have optimal shape reconstruction of $m_{4l}$; + \item there is a \textbf{\underline{ large signal to background ratio}} -- it is easy to discriminate between the peak of the reconstructed four-lepton mass ($m_{4l}$) and the overall flat background shape. \end{itemize} \begin{figure}[t] \centering \includegraphics[width=\textwidth]{images/CMS-HIG-19-001_Figure_004-a.pdf} - \Caption{Reconstructed four-lepton invariant mass $m_{4l}$ with full Run2 data}{The SM Higgs boson signal with $m_H = 125\,\text{GeV}$, denoted as $H(125)$, and the $ZZ$ backgrounds are normalized to the SM expectation. The $Z+X$ background is normalized to the estimation from data. - Figure taken from ref.~\cite{h4l_analysis}.}\label{higgs_plot} -\end{figure} \ No newline at end of file + \Caption{Reconstructed four-lepton invariant mass $m_{4l}$ with full Run 2 dataset}{The SM Higgs boson signal with $m_H = 125\,\text{GeV}$, denoted as H(125), and the ZZ backgrounds are normalized to the SM expectation. The $Z+X$ background is normalized to the estimation from data. + Figure taken from Ref.~\cite{h4l_analysis}.}\label{higgs_plot} +\end{figure} diff --git a/sections/producer.tex b/sections/producer.tex index c4ed2c3..e9932f2 100644 --- a/sections/producer.tex +++ b/sections/producer.tex @@ -1 +1 @@ -\section{Writing a Producer}\label{sec:producer} \ No newline at end of file +\section{Writing a Producer}\label{sec:producer} diff --git a/sections/selector.tex b/sections/selector.tex index a8ea820..d46ffb6 100644 --- a/sections/selector.tex +++ b/sections/selector.tex @@ -2,7 +2,7 @@ \section{Writing a Selector}\label{sec:selector} The lepton selection we want to implement in this exercise is the following: -\textbf{\underline{Loose Electrons}} +\textbf{\underline{Loose Electrons}} \begin{itemize} - \item + \item \end{itemize} diff --git a/sections/setup.tex b/sections/setup.tex index 50d9741..a8f0cd0 100644 --- a/sections/setup.tex +++ b/sections/setup.tex @@ -1,15 +1,15 @@ -\section{Installation \& Setup} +\section{Installation \& setup} \justifying \begin{tcolorbox}[colback=green!5!white,colframe=green!75!black,width=\textwidth] Note: ColumnFlown only runs on Linux and may require up to 4 GB of disc space. \tcblower Also, the machine where you run this exercise must be mounted with CERN AFS. \end{tcolorbox} -Start by going to the GitLab repository of this exercise: +Start by going to the GitLab repository of this exercise: \texttt{\textcolor{LimeGreen}{\href{https://gitlab.cern.ch/cms-analysis/analysisexamples/columnflow-demo}{\underline{https://gitlab.cern.ch/cms-analysis/analysisexamples/columnflow-demo}}}} -To have your own copy of the code, fork the repository into your personal area. You can do this by clicking the \code{Fork} button on the upper right corner of the page. To set your Project URL please type your CERN username in the \code{Select a namespace} option. +To have your own copy of the code, fork the repository into your personal area. You can do this by clicking the \code{Fork} button on the upper right corner of the page. To set your Project URL please type your CERN username in the \code{Select a namespace} option. \begin{figure}[!h] \centering @@ -25,7 +25,7 @@ \section{Installation \& Setup} \texttt{https://gitlab.cern.ch//columnflow-demo} \newpage -In your forked project, go to the \code{Code} button on the right hand side of the page and copy the address under the \code{Clone with HTTPS} option. If you have an SSH key registered on GitLab prior to this exercise, you can also use the \code{Clone with SSH} option. +In your forked project, go to the \code{Code} button on the right hand side of the page and copy the address under the \code{Clone with HTTPS} option. If you have an SSH key registered on GitLab prior to this exercise, you can also use the \code{Clone with SSH} option. \begin{figure}[!h] \centering @@ -48,18 +48,18 @@ \section{Installation \& Setup} source setup.sh dev \end{lstlisting} -You will be asked to define a series of variables, the first of which is your CERN username. For all other variables you can keep the default name by just pressing \code{Enter}. Variables specific to this exercise will start with \code{H4L\_}, while ColumnFlow specific variables start with \code{CF\_}. You can find all variables in the \code{.setups/dev.sh} bash file. We invite you to check out this file and familiarize yourself with these variables. +You will be asked to define a series of variables, the first of which is your CERN username. For all other variables you can keep the default value by just pressing \code{Enter}. Variables specific to this exercise will start with \code{H4L\_}, while ColumnFlow specific variables start with \code{CF\_}. You can find all variables in the \code{.setups/dev.sh} bash file. We invite you to check out this file and familiarize yourself with these variables. \begin{figure}[!h] \centering \includegraphics[scale=0.62]{images/setup.png} \end{figure} -Note that the first installation of the software can take \underline{up to several minutes}. +Note that the first installation of the software can take \underline{up to several minutes}. Every time you want to work with ColumnFlow (e.g.\ if you open a new terminal window), you will need to source the \code{setup.sh} script again. -Once the installation is complete you should see a line of green text stating that the analysis has been successfully set up. You are now ready to start working with ColumnFlow! +Once the installation is complete you should see a line of green text stating that the analysis has been successfully set up. You are now ready to start working with ColumnFlow! \begin{figure}[!h] \centering @@ -74,7 +74,7 @@ \section{Installation \& Setup} %\subsection{ColumnFlow Tasks} -This exercise is organized in the form of \code{law} tasks, where different tasks create some form of output. By default, these tasks will save their output on a remote file system (e.g.\ \texttt{WLGC}), for which you will require a \code{voms-proxy}. If you would like to save certain/all outputs locally, we recommend to create a directory on a system with a larger amount of disk space (e.g.\ \texttt{EOS}). For such cases, you will need to update the \code{law.cfg} file accordingly. You can view the available tasks by running: +This exercise is organized in the form of \code{law} tasks, where different tasks create some form of output. You can view the available tasks by running: \begin{lstlisting}[language=bash] law index --verbose \end{lstlisting} @@ -89,5 +89,5 @@ \section{Installation \& Setup} \item \texttt{\textcolor{LimeGreen}{cf.CreateDatacards}} \end{itemize} - +By default, these tasks will save their output on a remote file system (e.g.\ \texttt{WLGC}), for which you will require a \code{voms-proxy}. If you would like to save certain/all outputs locally, we recommend to create a directory on a system with a larger amount of disk space (e.g.\ \texttt{EOS}). For such cases, you will need to update the \code{law.cfg} file accordingly. diff --git a/sections/strategy.tex b/sections/strategy.tex index 21f27cb..50e00da 100644 --- a/sections/strategy.tex +++ b/sections/strategy.tex @@ -1,10 +1,10 @@ \section{Analysis strategy} -In order to find Higgs boson candidates we will first need to reconstruct the four leptons in the final state. We will do this by writing a \CCSPStlye{Selector} (Section~\ref{sec:selector}). +In order to find Higgs boson candidates, we need to reconstruct the four leptons in the final state. To select the four lepton candidates in the first place, we will need to write a \CCSPStlye{Selector} (Section~\ref{sec:selector}). %, which will filter out all physics objects that do not fulfill the selection criteria. In this selector, we will implement \underline{kinematic cuts}, \underline{vertex cuts} and \underline{Isolation \& ID} criteria. We will also implement \underline{trigger selections} such that only events interesting for the $ZZ$ analysis are selected. \begin{figure}[!h] \centering \includegraphics[scale=0.62]{images/strategy.png} -\end{figure} \ No newline at end of file +\end{figure} diff --git a/sections/taskarrayfunctions.tex b/sections/taskarrayfunctions.tex index a214b85..6ba175d 100644 --- a/sections/taskarrayfunctions.tex +++ b/sections/taskarrayfunctions.tex @@ -1,2 +1,2 @@ \chapter{Basic Functionalities}\label{chap:basics} -\section{The Mother of all: TaskArrayFunctions}\label{sec:taskarrayfunc} \ No newline at end of file +\section{The mother of all: TaskArrayFunctions}\label{sec:taskarrayfunc}