diff --git a/.gitignore b/.gitignore index b5e675a4..e99a905f 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,8 @@ src/main/python/test.pdf src/main/python/experiment_table.tex src/main/python/result_analysis/experiment_table.tex src/main/python/output.txt +kls_database.db +bin +dataset/mined-cherries +dataset/repo-sample.yaml +dataset/unsplit-mined-cherries.zip diff --git a/Dockerfile b/Dockerfile index 05cfcc18..dc79028e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ -FROM openjdk:19-alpine +FROM alpine:latest +RUN apk update +RUN apk add --no-cache --upgrade openjdk21 # Build the jar files WORKDIR /home/user COPY src ./src @@ -12,15 +14,30 @@ COPY gradle gradle WORKDIR /home/user RUN ./gradlew Cherries || exit -FROM openjdk:19-alpine +FROM alpine:latest RUN apk update # Install dependencies for unix patch -RUN apk add --no-cache --upgrade bash diffutils patch git python3 py3-matplotlib unzip +RUN apk add --no-cache --upgrade bash diffutils patch git python3 py3-matplotlib unzip openjdk21 # Install dependencies for patching with matching RUN apk add --no-cache curl bash gcc musl-dev +RUN apk add --no-cache python3 poetry + +RUN apk add --no-cache zip unzip +COPY dataset /home/user/dataset +WORKDIR /home/user/dataset +RUN zip -s 0 mined-cherries.zip --out unsplit-mined-cherries.zip +RUN unzip unsplit-mined-cherries +RUN unzip repo-sample.zip +WORKDIR /home/user + +RUN apk add --no-cache texlive-most + +COPY src/main/python ./analysis +WORKDIR /home/user/analysis + ARG GROUP_ID ARG USER_ID @@ -57,3 +74,8 @@ RUN rustup default stable # RUN rustup default nightly RUN cargo install --path /home/user/mpatch + + +WORKDIR /home/user/analysis +RUN poetry install +WORKDIR /home/user/ diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 00000000..a6e947eb --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,82 @@ +# Installation +## Installation Instructions +In the following, we describe how to setup the evaluation of our paper step-by-step. +The instructions explain how to build the Docker image and run the validation in a Docker container. +For __Windows__ users, we recommend the use of WSL2 or a similar Linux environment. + +### 1. Install Docker (if required) +How to install Docker depends on your operating system: + +- _Windows or Mac_: You can find download and installation instructions [here](https://www.docker.com/get-started). +- _Linux Distributions_: How to install Docker on your system, depends on your distribution. The chances are high that Docker is part of your distributions package database. +Docker's [documentation](https://docs.docker.com/engine/install/) contains instructions for common distributions. + +Then, start the docker deamon. + +### 2. Open a Suitable Terminal +``` +# Windows Command Prompt: + - Press 'Windows Key + R' on your keyboard + - Type in 'cmd' + - Click 'OK' or press 'Enter' on your keyboard + +# Windows PowerShell: + - Open the search bar (Default: 'Windows Key') and search for 'PowerShell' + - Start the PowerShell + +# Linux: + - Press 'ctrl + alt + T' on your keyboard +``` + +Clone this repository to a directory of your choice using git: +```shell +git clone https://github.com/VariantSync/patching-with-matching-eval.git +``` + +### 3. Build the Docker Container +To build the Docker container you can run the `build` script: +``` +# Linux/Mac/WSL2 (bash): + ./build.sh +``` + +## 4. Verification & Replication + +### Running the Replication or Verification +To execute the replication you can run the `execute` script corresponding to your operating system with `reproduction` as first argument. + +`./execute.sh reproduction` + +> WARNING! +> TODO: RUNTIME WARNING +> TODO: DISK USAGE WARNING +> Therefore, we offer a short verification (TODO minutes) ... +> You can run it by providing "verification" as argument instead of "reproduction" (i.e., `./execute.sh verification`). +> If you want to stop the execution, you can call the provided script for stopping the container in a separate terminal. +> When restarted, the execution will continue processing by restarting at the last unfinished state. +> `./stop-execution.sh` + +You might see warnings or errors reported from SLF4J like `Failed to load class "org.slf4j.impl.StaticLoggerBinder"` which you can safely ignore. +Further troubleshooting advice can be found at the bottom of this file. + +The results of the verification will be stored in the [evaluation-workdir](evaluation-workdir) directory. + +### Expected Output of the Verification +TODO + +## Troubleshooting + +### 'Got permission denied while trying to connect to the Docker daemon socket' +`Problem:` This is a common problem under Linux, if the user trying to execute Docker commands does not have the permissions to do so. + +`Fix:` You can fix this problem by either following the [post-installation instructions](https://docs.docker.com/engine/install/linux-postinstall/), or by executing the scripts in the replication package with elevated permissions (i.e., `sudo`). + +### 'Unable to find image 'mpatch-reproduction:latest' locally' +`Problem:` The Docker container could not be found. This either means that the name of the container that was built does not fit the name of the container that is being executed (this only happens if you changed the provided scripts), or that the Docker container was not built yet. + +`Fix:` Follow the instructions described above in the section `Build the Docker Container`. + +### Failed to load class "org.slf4j.impl.StaticLoggerBinder" +`Problem:` An operation within the initialization phase of the logger library we use (tinylog) failed. + +`Fix:` Please ignore this warning. Tinylog will fall back onto a default implementation (`Defaulting to no-operation (NOP) logger implementation`) and logging will work as expected. diff --git a/LICENSE.LGPL3 b/LICENSE.LGPL3 deleted file mode 100644 index 153d416d..00000000 --- a/LICENSE.LGPL3 +++ /dev/null @@ -1,165 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. \ No newline at end of file diff --git a/LICENSE_APACHE b/LICENSE_APACHE new file mode 100644 index 00000000..4864f51e --- /dev/null +++ b/LICENSE_APACHE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2025 Alexander Schultheiß + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE_MIT b/LICENSE_MIT new file mode 100644 index 00000000..0669b4d3 --- /dev/null +++ b/LICENSE_MIT @@ -0,0 +1,7 @@ +Copyright 2025 Alexander Schultheiß + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 6715a9d8..f76479da 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,207 @@ -# Reproduction Package for Pushing the Boundaries of Patch Automation +# Decades of GNU Patch and Git Cherry-Pick: Can We Do Better? -This artifact comprises the files and data to reproduce our evaluation of various patchers, including mpatch. +This is the reproduction package for our paper _Decades of GNU Patch and Git Cherry-Pick: Can We Do Better?_ which has been accepted to the 48th International Conference on Software Engineering (ICSE 2026). ## Content -Our sample of GitHub repositories and our dataset of mined patch scenarios is located in the _evaluation-workdir/data_ directory of the evaluation's working directory. -The sample and dataset are compressed as zip archives that have to be unpacked before they can be used. +The reproduction package consists of three main parts: -Our implementation of mpatch was written in Rust and can be found under _mpatch_. In its _mpatch/README.md_, you can also find instructions on -how to generate the documentation for mpatch. +1. [__mpatch__](/mpatch/README.md): The implementation of our novel match-based patcher, written in Rust. +2. [__Mined cherries__](dataset/): Our dataset of cherry picks mined from 5,000 GitHub repositories. +3. [__Empirical evaluation__](src/main/kotlin/org/variantsync/evaluation/PatcherEvaluationMain.kt): Our empirical evaluation of different language-agnostic patchers. -The implementation of our evaluation setup can be found in the Java and Kotlin sources in the _src/main_ folder. -The main file of the evaluation is _src/main/kotlin/org/anon/evaluation/PatcherEvaluationMain.kt_. +## Requirements +Software Requirements +- [Docker](https://www.docker.com/) -Our scripts for applying the various metrics to the different patchers and analyzing the statistics can be found under _src/main/python/result_analysis_. -The raw results of our evaluation are archived under _evaluation-workdir/results_ +Hardware Requirements +- We recommend running the evaluation on a system with at least __64GB__ of primary memory (RAM). +- 100GB—2TB of free drive space, depending on the configuration of the Docker image. -## Results without outliers -In our paper, we mention that we re-analyzed our results after excluding outliers. -Specifically, we removed the top 0.05% of results with the highest number of required fixes for each patcher; thus, treating patchers equally in this regard. -The updated version of Table IV is shown below. +> [!WARNING] +> The used storage medium should be very fast, e.g., M.2 NVMe SSD with 5000 MB/s, otherwise the evaluation may take an extremely long time. + +Other Requirements +- A stable internet connection. + + +## Installation + +### [Optional] Configuration +Before building the Docker image, you may __optionally__ configure how the evaluation is executed. +To this end, we provide two configuration files: [config-reproduction.properties](docker/config-reproduction.properties) for the configuration of the reproduction of the entire evaluation, and [config-verification.properties](docker/config-verification) for the verification of the correct installation of the reproduction package. + +Depending on the available hardware, you may need to adjust the following settings: + +- The number of threads used (i.e., how many repositories are processed in parallel). Please note that each thread requires an additional `40GB` of free space on your drive. +- Whether all repositories should be cloned before the evaluation. This eliminates the need for a stable internet connection once all repositories have been cloned. +- Whether repositories should be deleted after they have been evaluated. This significantly reduces the amount of required free space on your drive (around 100GB should be enough). + +> [!WARNING] +> The entire set of repositories considered by our evaluation requires about 600 GBs of free space on our drive, if `clean-repositories` is set to `false`. + +> [!NOTE] +> Every change in the configuration must be followed by rebuilding the Docker image. + + +### Building the Docker image +The reproduction package is meant to be run in the Docker image that can be built using the provided Dockerfile. + +#### Linux +On Linux, you can execute the provided `build.sh` script to build the Docker image. + +> **Note:** The build process may take a while. (~5 minutes) + +> **Note:** The build process may require sudo privileges. + +```shell +./build.sh +``` + +#### Other OS +On other machines, you may call Docker directly. +In this case, you have to provide a USER_ID and GROUP_ID for the user in the Docker container: +```bash +# For example, under Linux, both variables are set as follows: +# USER_ID=$(id -u ${SUDO_USER:-$(whoami)}) +# GROUP_ID=$(id -g ${SUDO_USER:-$(whoami)}) + +docker build --network=host --build-arg USER_ID=$USER_ID --build-arg GROUP_ID=$GROUP_ID -t mpatch-reproduction . +``` +Ideally, the `USER_ID` and `GROUP_ID` match the ids of the user running the command (not root!). +Under Windows, you may provide any suitable id (e.g., `1000` for both) + +```shell +docker build --network=host --build-arg USER_ID=1000 --build-arg GROUP_ID=1000 -t mpatch-reproduction . +``` + + +### Verifying the correct installation +Once the building of the Docker image has completed, you can verify its correct installation. +By default, the verification will be executed within the [evaluation-workdir](evaluation-workdir) directory. + +#### Starting the verification +On Linux, you can execute the provided `execute.sh` script with the `verification` argument: +```shell +./execute.sh verification +``` + +On other machines, you may start a Docker container from the Docker image with the following command: +```bash +# Depending on your OS, you may have to change how the first path to evaluation-workdir is defined +docker run --rm -v "./evaluation-workdir/":"/home/user/evaluation-workdir" mpatch-reproduction verification +``` + +> [!NOTE] +> Depending on your hardware, the verification should require 5-30 minutes. + +#### Verification in a custom directory +> [!NOTE] +> You may provide any directory as first argument for `-v`, either by altering the `execute.sh` script or changing the command above. +> The `evaluation-workdir` is where the evaluation stores all its data while processing the repositories and evaluating patchers. +> The results will also be saved to this directory, once the evaluation or verification finishes. + + For example, your may start the evaluation with +```shell + docker run --rm -v "/home/YOUR_USERNAME/ICSE-reproduction/":"/home/user/evaluation-workdir" mpatch-reproduction verification +``` + +#### Expected outcome + +> [!NOTE] +> If you executed the evaluation in a custom directory, all mentioned files will be located there. + + +The verification should begin with output that looks similar to the following screenshot: +```shell +2025-08-21 14:36:30 [main] org.variantsync.evaluation.PatcherEvaluationMainKt.main() +INFO: Starting experiment initialization. +2025-08-21 14:36:30 [main] org.variantsync.evaluation.execution.EvalUtilsKt.createOrLoadSamples() +INFO: Loading dataset for C with 1 usable repositories +2025-08-21 14:36:30 [main] org.variantsync.evaluation.execution.EvalUtilsKt.createOrLoadSamples() +... +INFO: Loading dataset for TypeScript with 1 usable repositories +2025-08-21 14:36:30 [main] org.variantsync.evaluation.execution.EvalUtilsKt.createOrLoadSamples() +INFO: Done. + +2025-08-21 14:36:30 [main] org.variantsync.evaluation.PatcherEvaluationMainKt.main() +INFO: Processing 5 repos in parallel +2025-08-21 14:36:30 [main] org.variantsync.evaluation.PatcherEvaluationMainKt.main() +INFO: Already considered 0 repos. +2025-08-21 14:36:30 [main] org.variantsync.evaluation.PatcherEvaluationMainKt.main() +INFO: Already processed a total of 0 evaluation runs. + +2025-08-21 14:36:35 [main] org.variantsync.evaluation.PatcherEvaluationMainKt.main() +INFO: Considering a total of 85 cherry-picks for repetition 1 +2025-08-21 14:36:35 [pool-1-thread-3] org.variantsync.evaluation.execution.EvalUtilsKt.cloneGitHubRepo() +INFO: cloning https://github.com/tensorflow/serving.git into /home/user/evaluation-workdir/REPOS/tensorflow_serving +... +``` +The output shows that the dataset used for verification contains one repository for each project language. The projects are cloned into the `evaluation-workdir`. +Once a project has been cloned, the patchers are evaluated on the cherry picks (i.e., patches) that have been found for that repository. + +The verification should complete with the following output: +```shell +Latexmk: All targets (metrics-verification.pdf) are up-to-date + +++++++++++++++++++++++++++++++++++++ + Analysis done +++++++++++++++++++++++++++++++++++++ + +The result table can be found under evaluation-workdir/metrics-verification.pdf +``` + +After all repositories have been considered, the result analysis is executed. +The raw results can be found in the `evaluation-workdir/results` directory. + + + +In addition, the script generates a PDF file with a result table similar to the one presented in our paper. +This table can be found under `evaluation-workdir/metrics-verification.pdf`. +It should look similar to this: +![](misc/verification-results.png) + +> [!NOTE] +> The verification results shown are based on only a tiny portion of our dataset and are therefore not representative. + +# Starting the reproduction +Once you have verified the correct installation, you can start the reproduction similar to how you started the verification. +You may also change the working directory to a custom directory as described for the verification. + +On Linux, you can execute the provided `execute.sh` script with the `reproduction` argument: +```shell +./execute.sh reproduction +``` + +On other machines, you may start a Docker container from the Docker image with the following command: +```bash +# Depending on your OS, you may have to change how the first path to evaluation-workdir is defined +docker run --rm -v "./evaluation-workdir/":"/home/user/evaluation-workdir" mpatch-reproduction reproduction +``` + +> [!NOTE] +> The results of the reproduction will be stored in the same manner as the results of the verification. + +> [!NOTE] +> Our evaluation processes large amounts of data. +> The main bottleneck is not the available CPU but the speed of the drive in which the `evaluation-workdir` is located. +> Depending on your hardware, the full reproduction may require a very long time. The expected runtime are 5-10 days, but the reproduction may also require several weeks if the drive is too slow. + + + +## Troubleshooting + +### 'Got permission denied while trying to connect to the Docker daemon socket' +`Problem:` This is a common problem under Linux, if the user trying to execute Docker commands does not have the permissions to do so. + +`Fix:` You can fix this problem by either following the [post-installation instructions](https://docs.docker.com/engine/install/linux-postinstall/), or by executing the scripts in the replication package with elevated permissions (i.e., `sudo`). + +### 'Unable to find image 'mpatch-reproduction:latest' locally' +`Problem:` The Docker container could not be found. This either means that the name of the container that was built does not fit the name of the container that is being executed (this only happens if you changed the provided scripts), or that the Docker container was not built yet. + +`Fix:` Follow the instructions described above in the section `Build the Docker Container`. + +### Failed to load class "org.slf4j.impl.StaticLoggerBinder" +`Problem:` An operation within the initialization phase of the logger library we use (tinylog) failed. + +`Fix:` Please ignore this warning. Tinylog will fall back onto a default implementation (`Defaulting to no-operation (NOP) logger implementation`) and logging will work as expected. -![results-without-outliers.png](results/results-without-outliers.png) diff --git a/REQUIREMENTS.md b/REQUIREMENTS.md index d156e4ec..75be79c6 100644 --- a/REQUIREMENTS.md +++ b/REQUIREMENTS.md @@ -2,7 +2,7 @@ There are no special requirements regarding the CPU or GPU. ### Primary Memory -We recommend to run the evaluation on a system with at least __64GB__ of primary memory (RAM). +We recommend running the evaluation on a system with at least __64GB__ of primary memory (RAM). ### Secondary Memory I/O operations have a considerable impact on the total runtime of the evaluation. @@ -10,8 +10,8 @@ Therefore, we strongly recommend storing the repository on an SSD (M2 technology and to configure Docker to store its data (e.g., images and containers) on this SSD as well. Using an HDD can lead to severe runtime problems and thereby timeouts that threaten the validity of the results. -The evaluation requires about __2TB__ of space as it clones hundreds of repositories, which in turn are copied dozens of times for multi-threading. -The space requirement can be considerably reduced by changing the number of used threads (e.g., to __20GB__), but then the evaluation will require considerably more time as well. +The evaluation requires about __1TB__ of space as it considers hundreds of repositories, which in turn are copied dozens of times for multi-threaded patcher evaluation. +The space requirement can be considerably reduced by changing the number of used threads in the [reproduction config](docker/config-reproduction.properties) (e.g., to __20GB__), but then the evaluation will require considerably more time as well. ## Software Requirements The study does not require a certain operating system or prepared environment. diff --git a/STATUS.md b/STATUS.md new file mode 100644 index 00000000..171d3c14 --- /dev/null +++ b/STATUS.md @@ -0,0 +1,29 @@ +# STATUS +## Overview +The reproduction package for our paper _Decades of GNU Patch and Git Cherry-Pick: Can We Do Better?_ consists of three parts: + +1. [__mpatch__](/mpatch/README.md): The implementation of our novel match-based patcher, written in Rust. +2. [__Mined cherries__](evaluation-workdir/data): Our large dataset of cherry picks mined from 5,000 GitHub repositories. +3. [__Empirical evaluation__](src/main/kotlin/org/variantsync/evaluation/PatcherEvaluationMain.kt): Our empirical evaluation of different language-agnostic patchers. + +## Purpose +Our artifact has the following purposes: + +### **Reproducibility** +We provide replication instructions that allow to replicate the evaluation presented in Sections 4 through 6 in our paper. +The replication is executed in a Docker container. To replicate our results, we also provide a dataset of the relevant open-source repositories in the very state we performed our validation on. + +### **Reusability** +Our evaluation can be extended and reused to evaluate and compare additional patchers with the patchers considered in our paper. +To do so, the [Patcher](src/main/kotlin/org/variantsync/evaluation/patching/Patcher.kt) interface has to be implemented for each additional patcher. +Then, an instance of the implementing class can be added to the list of patchers during [evaluation initialization](src/main/kotlin/org/variantsync/evaluation/execution/EvalOperations.kt). + +Our novel patcher _mpatch_ is a fully functional tool that can be used as a patcher alternative to git cherry-pick or GNU patch. +It can also be integrated into other evaluation setups using its [library](mpatch/src/lib.rs) or [CLI](/mpatch/README.md). + +## Claims +We claim the _Artifacts Available_ badge as we made our artifacts publicly available on [Github](TODO) and [Zenodo](TODO) with an open-source license. +Our dataset and the repositories from which we mined it are also publicly available. + +We claim the _Artifacts Evaluated Reusable_ badge as our evaluation and our novel patcher can be reused by other researchers and practitioners. + diff --git a/build.gradle.kts b/build.gradle.kts index ec00c947..4f59fea0 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,12 +1,13 @@ import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar plugins { - kotlin("jvm") version "1.9.0" + kotlin("jvm") version "2.1.10" id("com.github.johnrengelman.shadow") version "8.1.1" application } group = "org.variantsync.core" + version = "0.2.0" repositories { @@ -19,17 +20,16 @@ dependencies { implementation("org.variantsync.vevos:simulation:2.0.0") // https://mvnrepository.com/artifact/org.apache.commons/commons-configuration2 implementation("org.apache.commons:commons-configuration2:2.9.0") + implementation("commons-io:commons-io:2.20.0") implementation("commons-logging:commons-logging:1.3.1") // https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils implementation("commons-beanutils:commons-beanutils:1.9.4") implementation("org.tinylog:tinylog-api-kotlin:2.6.2") implementation("org.tinylog:tinylog-impl:2.6.2") - implementation("de.ovgu:featureide.lib.fm:3.7.2") // https://mvnrepository.com/artifact/org.eclipse.jgit/org.eclipse.jgit implementation("org.eclipse.jgit:org.eclipse.jgit:6.8.0.202311291450-r") implementation("org.sat4j:core:2.3.5") implementation("org.jetbrains:annotations:24.0.1") - implementation("net.ssehub:kernel_haven:1.0.0") implementation("net.lingala.zip4j:zip4j:2.11.4") implementation("org.variantsync:diffdetective:1.0.0") implementation("org.slf4j:slf4j-api:2.0.5") @@ -56,15 +56,9 @@ tasks.create("Cherries") { configurations = listOf(project.configurations.runtimeClasspath.get()) - manifest { - attributes["Main-Class"] = "org.variantsync.evaluation.PatcherEvaluationMainKt" - } + manifest { attributes["Main-Class"] = "org.variantsync.evaluation.PatcherEvaluationMainKt" } } -tasks.test { - useJUnitPlatform() -} +tasks.test { useJUnitPlatform() } -application { - mainClass.set("Main") -} +application { mainClass.set("Main") } diff --git a/build.sh b/build.sh index db1022f0..ace24fc4 100755 --- a/build.sh +++ b/build.sh @@ -2,4 +2,4 @@ USER_ID=$(id -u ${SUDO_USER:-$(whoami)}) GROUP_ID=$(id -g ${SUDO_USER:-$(whoami)}) -docker build --network=host --build-arg USER_ID=$USER_ID --build-arg GROUP_ID=$GROUP_ID -t pwm-eval . +docker build --network=host --build-arg USER_ID=$USER_ID --build-arg GROUP_ID=$GROUP_ID -t mpatch-reproduction . diff --git a/clean-docker.sh b/clean-docker.sh index f3700666..588afa6b 100755 --- a/clean-docker.sh +++ b/clean-docker.sh @@ -1,9 +1,9 @@ #! /bin/bash echo "Cleaning all related Docker data. This may take a moment..." echo "Trying to stop running containers..." -docker stop "$(docker ps -a -q --filter "ancestor=pwm-eval")" -echo "Removing pwm-eval image..." -docker image rm pwm-eval -echo "Removing pwm-eval containers..." -docker container rm "$(docker ps -a -q --filter "ancestor=pwm-eval")" +docker stop "$(docker ps -a -q --filter "ancestor=mpatch-reproduction")" +echo "Removing mpatch image..." +docker image rm mpatch-reproduction +echo "Removing mpatch containers..." +docker container rm "$(docker ps -a -q --filter "ancestor=mpatch-reproduction")" echo "...done." diff --git a/evaluation-workdir/data/.gitignore b/dataset/.gitignore similarity index 91% rename from evaluation-workdir/data/.gitignore rename to dataset/.gitignore index d98d7412..6973f7d4 100644 --- a/evaluation-workdir/data/.gitignore +++ b/dataset/.gitignore @@ -14,3 +14,4 @@ !mined-cherries.z11 !mined-cherries.z12 !repo-sample.zip +!mined-cherries-verification/ diff --git a/dataset/mined-cherries-verification/C#_Unity-Technologies_ml-agents.yaml b/dataset/mined-cherries-verification/C#_Unity-Technologies_ml-agents.yaml new file mode 100644 index 00000000..8a545d76 --- /dev/null +++ b/dataset/mined-cherries-verification/C#_Unity-Technologies_ml-agents.yaml @@ -0,0 +1,347 @@ +- language: C# + total_number_of_results: '13' + repo_name: Unity-Technologies/ml-agents + total_number_of_commits: '7096' + total_number_of_branches: '426' + total_number_of_committers: '70' +- - search_method: MessageScan + cherry_and_target: + cherry: + id: 707730256a6797336ba749f05f7dbf10dadd8126 + parent_ids: + - 02b77dd7ac05c386004845f30c9a623845bfbe91 + message: "[bug-fix] Use correct memories for LSTM SAC (#5228)\n\n* Use correct memories for LSTM SAC\r\n\r\n* Add some comments" + author: Ervin T + committer: GitHub + time: 'Time { raw: git_time { time: 1617908980, offset: -240, sign: 45 } }' + target: + id: bae46c274b743900cdb87b654181e7b83ab8535c + parent_ids: + - efa8f344c1d6bd2c59c8b8df3989df1f1b791f61 + message: | + [bug-fix] Use correct memories for LSTM SAC (#5228) + + * Use correct memories for LSTM SAC + + * Add some comments + + (cherry picked from commit 707730256a6797336ba749f05f7dbf10dadd8126) + author: Ervin T + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617917937, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2c03d2b544d0c615e7b60d939f01532674d80753 + parent_ids: + - fce4ad3bdae981dc8f03f0e2d239792e61006ea8 + message: | + Buffer fixes + author: Ervin Teng + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1614122747, offset: -300, sign: 45 } }' + target: + id: f879b616a797bf54cf19b1a6c616aa7cd5f72389 + parent_ids: + - 9a0005314f2116def4c6232c72fa9fdd1f6d7fe0 + message: | + Buffer fixes + + (cherry picked from commit 2c03d2b544d0c615e7b60d939f01532674d80753) + author: Ervin Teng + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1614184238, offset: -300, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 8457c5bf87dcd943eb200a68735d48f84864448c + parent_ids: + - 2ca6a87c508b97f74ca5845a6eedca52429581a3 + message: |+ + Fixing the GymWrapper Logging issue (#5201) + + author: Vincent-Pierre BERGES + committer: GitHub + time: 'Time { raw: git_time { time: 1617159000, offset: -420, sign: 45 } }' + target: + id: cf18e0e4035cfc699531eadd2c38513848bc5a83 + parent_ids: + - 5d1c1e1f5fa70e7f44c5bcf6ce026a7520d47cde + message: | + Fixing the GymWrapper Logging issue (#5201) + + (cherry picked from commit 8457c5bf87dcd943eb200a68735d48f84864448c) + author: Vincent-Pierre BERGES + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617917860, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: ac4f43cf18b98d0fc7063b9b831e07429f7ea39e + parent_ids: + - 30fde2dffe2edd2562b24d1051f257b39d20a837 + message: "Load individual elements if state dict load fails (#5213)\n\n\r\nCo-authored-by: Vincent-Pierre BERGES \r\nCo-authored-by: Ervin T. " + author: andrewcoh <54679309+andrewcoh@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1617729234, offset: -240, sign: 45 } }' + target: + id: 5e4f279376616ab3096a819db0bed663483ddc73 + parent_ids: + - bae46c274b743900cdb87b654181e7b83ab8535c + message: | + Load individual elements if state dict load fails (#5213) + + Co-authored-by: Vincent-Pierre BERGES + Co-authored-by: Ervin T. + (cherry picked from commit ac4f43cf18b98d0fc7063b9b831e07429f7ea39e) + author: andrewcoh <54679309+andrewcoh@users.noreply.github.com> + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617918137, offset: -240, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: aac2ee6cb650e6969a6d8b9f7c966f69b9e2df04 + parent_ids: + - 21548e02e83a563c75a0b6bae0cbbe1ceb6552e0 + message: "[🐛 🔨 ] set_action_for_agent expects a ActionTuple with batch size 1. (#5208)\n\n* [Bug Fix] set_action_for_agent expects a ActionTuple with batch size 1.\r\n\r\n* moving a line around" + author: Vincent-Pierre BERGES + committer: GitHub + time: 'Time { raw: git_time { time: 1617222987, offset: -420, sign: 45 } }' + target: + id: 2aaf326bf8ff13d11590fc8ee33f7e862503fb08 + parent_ids: + - 7fae8734459a4ecd232bb8c63bfada7bed52ede3 + message: | + [🐛 🔨 ] set_action_for_agent expects a ActionTuple with batch size 1. (#5208) + + * [Bug Fix] set_action_for_agent expects a ActionTuple with batch size 1. + + * moving a line around + + (cherry picked from commit aac2ee6cb650e6969a6d8b9f7c966f69b9e2df04) + author: Vincent-Pierre BERGES + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617916611, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 272899a8a100b4ff1bbcaa575d2bc46965fc5938 + parent_ids: + - 18ff7bc85cc504296f5d6586afe261d490f39e53 + message: |+ + [bug fix] Fix warning using demo recorder (#5216) + + author: Vincent-Pierre BERGES + committer: GitHub + time: 'Time { raw: git_time { time: 1617309115, offset: -420, sign: 45 } }' + target: + id: f699d8db1a65e7a1435df819b7e3b3dee53b2593 + parent_ids: + - 7b800d15aee4750c084f97b1db95acd32f6b56c3 + message: | + [bug fix] Fix warning using demo recorder (#5216) + + (cherry picked from commit 272899a8a100b4ff1bbcaa575d2bc46965fc5938) + author: Vincent-Pierre BERGES + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617917324, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 875feb01509d8191fc3cc60d6a08c931546fb6dc + parent_ids: + - 41818c5e42f35e68d74e43385228a6bfd3278aa0 + message: |+ + Fix path to PushBlock demo (#5198) + + author: andrewcoh <54679309+andrewcoh@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1617122444, offset: -240, sign: 45 } }' + target: + id: deebc3dc904b8e66f37bb2e57186227c6ce51f06 + parent_ids: + - cf18e0e4035cfc699531eadd2c38513848bc5a83 + message: | + Fix path to PushBlock demo (#5198) + + (cherry picked from commit 875feb01509d8191fc3cc60d6a08c931546fb6dc) + author: andrewcoh <54679309+andrewcoh@users.noreply.github.com> + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617917880, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 63e7ad44d96b7663b91f005ca1d88f4f3b11dd2a + parent_ids: + - d17b735ea90616ad06ac536fd6e9de95ff398872 + message: "[bug-fix] When agent isn't training, don't clear update buffer (#5205)\n\n* Don't clear update buffer, but don't append to it either\r\n\r\n* Update changelog\r\n\r\n* Address comments\r\n\r\n* Make experience replay buffer saving more verbose" + author: Ervin T + committer: GitHub + time: 'Time { raw: git_time { time: 1617288882, offset: -240, sign: 45 } }' + target: + id: 7b800d15aee4750c084f97b1db95acd32f6b56c3 + parent_ids: + - 2aaf326bf8ff13d11590fc8ee33f7e862503fb08 + message: | + [bug-fix] When agent isn't training, don't clear update buffer (#5205) + + * Don't clear update buffer, but don't append to it either + + * Update changelog + + * Address comments + + * Make experience replay buffer saving more verbose + + (cherry picked from commit 63e7ad44d96b7663b91f005ca1d88f4f3b11dd2a) + author: Ervin T + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617917070, offset: -240, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 9c3dc4542ce2fa35f25dfa7809a7e1c2ee06001e + parent_ids: + - 73ba867a9a441585a00b3aa03de5c4cd1c72fc81 + message: "[🐛 🔨 ]Adding the ELO to the GlobalTrainingStatus (#5202)\n\n* Adding the ELO to the GlobalTrainingStatus\r\n\r\n* Update ml-agents/mlagents/trainers/ghost/trainer.py\r\n\r\nCo-authored-by: andrewcoh <54679309+andrewcoh@users.noreply.github.com>\r\n\r\nCo-authored-by: andrewcoh <54679309+andrewcoh@users.noreply.github.com>" + author: Vincent-Pierre BERGES + committer: GitHub + time: 'Time { raw: git_time { time: 1617222527, offset: -420, sign: 45 } }' + target: + id: 7fae8734459a4ecd232bb8c63bfada7bed52ede3 + parent_ids: + - 65c1550cfaee89c980a7b9f722e8925363507834 + message: | + [🐛 🔨 ]Adding the ELO to the GlobalTrainingStatus (#5202) + + * Adding the ELO to the GlobalTrainingStatus + + * Update ml-agents/mlagents/trainers/ghost/trainer.py + + Co-authored-by: andrewcoh <54679309+andrewcoh@users.noreply.github.com> + + Co-authored-by: andrewcoh <54679309+andrewcoh@users.noreply.github.com> + (cherry picked from commit 9c3dc4542ce2fa35f25dfa7809a7e1c2ee06001e) + author: Vincent-Pierre BERGES + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617916603, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 86a4070bad4f5bca201db57f29117362c62617d0 + parent_ids: + - 31e26e769cb286ef08c924a2320e5ec9202eab98 + message: "[debug] Require all behavior names to have a matching YAML entry (#5210)\n\n* Add strict check to settings.py\r\n\r\n* Remove warning from trainer factory, add test\r\n\r\n* Add changelog\r\n\r\n* Fix test\r\n\r\n* Update changelog\r\n\r\n* Remove strict CLI options\r\n\r\n* Remove strict option, rename, make strict default\r\n\r\n* Remove newline\r\n\r\n* Update comments\r\n\r\n* Set default dict to actually default to a default dict\r\n\r\n* Fix tests\r\n\r\n* Fix tests again\r\n\r\n* Default trainer dict to requiring all fields\r\n\r\n* Fix settings typing\r\n\r\n* Use logger\r\n\r\n* Add default_settings to error" + author: Ervin T + committer: GitHub + time: 'Time { raw: git_time { time: 1618868475, offset: -240, sign: 45 } }' + target: + id: 06488b0635ae0e8d85846e76b693844010b7154d + parent_ids: + - ee8036815fd34513c6e44f75b8e3ef950bb7eae6 + message: "[debug] Require all behavior names to have a matching YAML entry (#5210) (#5296)\n\n* Add strict check to settings.py\r\n\r\n* Remove warning from trainer factory, add test\r\n\r\n* Add changelog\r\n\r\n* Fix test\r\n\r\n* Update changelog\r\n\r\n* Remove strict CLI options\r\n\r\n* Remove strict option, rename, make strict default\r\n\r\n* Remove newline\r\n\r\n* Update comments\r\n\r\n* Set default dict to actually default to a default dict\r\n\r\n* Fix tests\r\n\r\n* Fix tests again\r\n\r\n* Default trainer dict to requiring all fields\r\n\r\n* Fix settings typing\r\n\r\n* Use logger\r\n\r\n* Add default_settings to error\r\n\r\n(cherry picked from commit 86a4070bad4f5bca201db57f29117362c62617d0)" + author: Ervin T + committer: GitHub + time: 'Time { raw: git_time { time: 1619118102, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2ce6810846ba9268e4fb5fb082fa54e90414c980 + parent_ids: + - 314f9e7e3355f6e5e1c6b017a39d156f1c01774b + message: "[bug-fix] Fix POCA LSTM, pad sequences in the back (#5206)\n\n* Pad buffer at the end\r\n\r\n* Fix padding in optimizer value estimate\r\n\r\n* Fix additional bugs and POCA\r\n\r\n* Fix groupmate obs, add tests\r\n\r\n* Update changelog\r\n\r\n* Improve tests\r\n\r\n* Address comments\r\n\r\n* Fix poca test\r\n\r\n* Fix buffer test\r\n\r\n* Increase entropy for Hallway\r\n\r\n* Add EOF newline\r\n\r\n* Fix Behavior Name\r\n\r\n* Address comments" + author: Ervin T + committer: GitHub + time: 'Time { raw: git_time { time: 1617662534, offset: -240, sign: 45 } }' + target: + id: efa8f344c1d6bd2c59c8b8df3989df1f1b791f61 + parent_ids: + - deebc3dc904b8e66f37bb2e57186227c6ce51f06 + message: | + [bug-fix] Fix POCA LSTM, pad sequences in the back (#5206) + + * Pad buffer at the end + + * Fix padding in optimizer value estimate + + * Fix additional bugs and POCA + + * Fix groupmate obs, add tests + + * Update changelog + + * Improve tests + + * Address comments + + * Fix poca test + + * Fix buffer test + + * Increase entropy for Hallway + + * Add EOF newline + + * Fix Behavior Name + + * Address comments + + (cherry picked from commit 2ce6810846ba9268e4fb5fb082fa54e90414c980) + author: Ervin T + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617917908, offset: -240, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2c03d2b544d0c615e7b60d939f01532674d80753 + parent_ids: + - fce4ad3bdae981dc8f03f0e2d239792e61006ea8 + message: | + Buffer fixes + author: Ervin Teng + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1614122747, offset: -300, sign: 45 } }' + target: + id: 2a26887b42ac2d514fc7cd54187402581a054275 + parent_ids: + - a5b324a30ad3162cbf078eb37a6602fd9c2c4127 + message: "Python Dataflow for Group Manager (#4926)\n\n* Make buffer type-agnostic\r\n\r\n* Edit types of Apped method\r\n\r\n* Change comment\r\n\r\n* Collaborative walljump\r\n\r\n* Make collab env harder\r\n\r\n* Add group ID\r\n\r\n* Add collab obs to trajectory\r\n\r\n* Fix bug; add critic_obs to buffer\r\n\r\n* Set group ids for some envs\r\n\r\n* Pretty broken\r\n\r\n* Less broken PPO\r\n\r\n* Update SAC, fix PPO batching\r\n\r\n* Fix SAC interrupted condition and typing\r\n\r\n* Fix SAC interrupted again\r\n\r\n* Remove erroneous file\r\n\r\n* Fix multiple obs\r\n\r\n* Update curiosity reward provider\r\n\r\n* Update GAIL and BC\r\n\r\n* Multi-input network\r\n\r\n* Some minor tweaks but still broken\r\n\r\n* Get next critic observations into value estimate\r\n\r\n* Temporarily disable exporting\r\n\r\n* Use Vince's ONNX export code\r\n\r\n* Cleanup\r\n\r\n* Add walljump collab YAML\r\n\r\n* Lower max height\r\n\r\n* Update prefab\r\n\r\n* Update prefab\r\n\r\n* Collaborative Hallway\r\n\r\n* Set num teammates to 2\r\n\r\n* Add config and group ids to HallwayCollab\r\n\r\n* Fix bug with hallway collab\r\n\r\n* Edits to HallwayCollab\r\n\r\n* Update onnx file meta\r\n\r\n* Make the env easier\r\n\r\n* Remove prints\r\n\r\n* Make Collab env harder\r\n\r\n* Fix group ID\r\n\r\n* Add cc to ghost trainer\r\n\r\n* Add comment to ghost trainer\r\n\r\n* Revert \"Add comment to ghost trainer\"\r\n\r\nThis reverts commit 292b6ce672a7a23ebfdd76bf07b11c50a825dcf4.\r\n\r\n* Actually add comment to ghosttrainer\r\n\r\n* Scale size of CC network\r\n\r\n* Scale value network based on num agents\r\n\r\n* Add 3rd symbol to hallway collab\r\n\r\n* Make comms one-hot\r\n\r\n* Fix S tag\r\n\r\n* Additional changes\r\n\r\n* Some more fixes\r\n\r\n* Self-attention Centralized Critic\r\n\r\n* separate entity encoder and RSA\r\n\r\n* clean up args in mha\r\n\r\n* more cleanups\r\n\r\n* fixed tests\r\n\r\n* entity embeddings work with no max\r\nIntegrate into CC\r\n\r\n* remove group id\r\n\r\n* very rough sketch for TeamManager interface\r\n\r\n* One layer for entity embed\r\n\r\n* Use 4 heads\r\n\r\n* add defaults to linear encoder, initialize ent encoders\r\n\r\n* add team manager id to proto\r\n\r\n* team manager for hallway\r\n\r\n* add manager to hallway\r\n\r\n* send and process team manager id\r\n\r\n* remove print\r\n\r\n* small cleanup\r\n\r\n* default behavior for baseTeamManager\r\n\r\n* add back statsrecorder\r\n\r\n* update\r\n\r\n* Team manager prototype (#4850)\r\n\r\n* remove group id\r\n\r\n* very rough sketch for TeamManager interface\r\n\r\n* add team manager id to proto\r\n\r\n* team manager for hallway\r\n\r\n* add manager to hallway\r\n\r\n* send and process team manager id\r\n\r\n* remove print\r\n\r\n* small cleanup\r\n\r\nCo-authored-by: Chris Elion \r\n\r\n* Remove statsrecorder\r\n\r\n* Fix AgentProcessor for TeamManager\r\nShould work for variable decision frequencies (untested)\r\n\r\n* team manager\r\n\r\n* New buffer layout, TeamObsUtil, pad dead agents\r\n\r\n* Use NaNs to get masks for attention\r\n\r\n* Add team reward to buffer\r\n\r\n* Try subtract marginalized value\r\n\r\n* Add Q function with attention\r\n\r\n* Some more progress - still broken\r\n\r\n* use singular entity embedding (#4873)\r\n\r\n* I think it's running\r\n\r\n* Actions added but untested\r\n\r\n* Fix issue with team_actions\r\n\r\n* Add next action and next team obs\r\n\r\n* separate forward into q_net and baseline\r\n\r\n* might be right\r\n\r\n* forcing this to work\r\n\r\n* buffer error\r\n\r\n* COMAA runs\r\n\r\n* add lambda return and target network\r\n\r\n* no target net\r\n\r\n* remove normalize advantages\r\n\r\n* add target network back\r\n\r\n* value estimator\r\n\r\n* update coma config\r\n\r\n* add target net\r\n\r\n* no target, increase lambda\r\n\r\n* remove prints\r\n\r\n* cloud config\r\n\r\n* use v return\r\n\r\n* use target net\r\n\r\n* adding zombie to coma2 brnch\r\n\r\n* add callbacks\r\n\r\n* cloud run with coma2 of held out zombie test env\r\n\r\n* target of baseline is returns_v\r\n\r\n* remove target update\r\n\r\n* Add team dones\r\n\r\n* ntegrate teammate dones\r\n\r\n* add value clipping\r\n\r\n* try again on cloud\r\n\r\n* clipping values and updated zombie\r\n\r\n* update configs\r\n\r\n* remove value head clipping\r\n\r\n* update zombie config\r\n\r\n* Add trust region to COMA updates\r\n\r\n* Remove Q-net for perf\r\n\r\n* Weight decay, regularizaton loss\r\n\r\n* Use same network\r\n\r\n* add base team manager\r\n\r\n* Remove reg loss, still stable\r\n\r\n* Black format\r\n\r\n* add team reward field to agent and proto\r\n\r\n* set team reward\r\n\r\n* add maxstep to teammanager and hook to academy\r\n\r\n* check agent by agent.enabled\r\n\r\n* remove manager from academy when dispose\r\n\r\n* move manager\r\n\r\n* put team reward in decision steps\r\n\r\n* use 0 as default manager id\r\n\r\n* fix setTeamReward\r\n\r\nCo-authored-by: Vincent-Pierre BERGES \r\n\r\n* change method name to GetRegisteredAgents\r\n\r\n* address comments\r\n\r\n* Revert C# env changes\r\n\r\n* Remove a bunch of stuff from envs\r\n\r\n* Remove a bunch of extra files\r\n\r\n* Remove changes from base-teammanager\r\n\r\n* Remove remaining files\r\n\r\n* Remove some unneeded changes\r\n\r\n* Make buffer typing neater\r\n\r\n* AgentProcessor fixes\r\n\r\n* Back out trainer changes\r\n\r\n* use delegate to avoid agent-manager cyclic reference\r\n\r\n* put team reward in decision steps\r\n\r\n* fix unregister agents\r\n\r\n* add teamreward to decision step\r\n\r\n* typo\r\n\r\n* unregister on disabled\r\n\r\n* remove OnTeamEpisodeBegin\r\n\r\n* change name TeamManager to MultiAgentGroup\r\n\r\n* more team -> group\r\n\r\n* fix tests\r\n\r\n* fix tests\r\n\r\n* Use attention tests from master\r\n\r\n* Revert \"Use attention tests from master\"\r\n\r\nThis reverts commit 78e052be8f36381bb6857817ff0f505716be83b9.\r\n\r\n* Use attention from master\r\n\r\n* Renaming fest\r\n\r\n* Use NamedTuples instead of attrs classes\r\n\r\n* Bug fixes\r\n\r\n* remove GroupMaxStep\r\n\r\n* add some doc\r\n\r\n* Fix mock brain\r\n\r\n* np float32 fixes\r\n\r\n* more renaming\r\n\r\n* Test for team obs in agentprocessor\r\n\r\n* Test for group and add team reward\r\n\r\n* doc improve\r\n\r\nCo-authored-by: Ervin T. \r\n\r\n* store registered agents in set\r\n\r\n* remove unused step counts\r\n\r\n* Global group ids\r\n\r\n* Fix Trajectory test\r\n\r\n* Remove duplicated files\r\n\r\n* Add team methods to AgentAction\r\n\r\n* Buffer fixes\r\n\r\n(cherry picked from commit 2c03d2b544d0c615e7b60d939f01532674d80753)\r\n\r\n* Add test for GroupObs\r\n\r\n* Change AgentAction back to 0 pad and add tests\r\n\r\n* Addressed some comments\r\n\r\n* Address some comments\r\n\r\n* Add more comments\r\n\r\n* Rename internal function\r\n\r\n* Move padding method to AgentBufferField\r\n\r\n* Fix slicing typing and string printing in AgentBufferField\r\n\r\n* Fix to-flat and add tests\r\n\r\n* Rename GroupmateStatus to AgentStatus\r\n\r\n* Update comments\r\n\r\n* Added GroupId, GlobalGroupId, GlobalAgentId types\r\n\r\n* Update comment\r\n\r\n* Make some agent processor properties internal\r\n\r\n* Rename add_group_status\r\n\r\n* Rename store_group_status, fix test\r\n\r\n* Rename clear_group_obs\r\n\r\nCo-authored-by: Andrew Cohen \r\nCo-authored-by: Ruo-Ping Dong \r\nCo-authored-by: Chris Elion \r\nCo-authored-by: andrewcoh <54679309+andrewcoh@users.noreply.github.com>\r\nCo-authored-by: Vincent-Pierre BERGES " + author: Ervin T + committer: GitHub + time: 'Time { raw: git_time { time: 1614885920, offset: -300, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 21548e02e83a563c75a0b6bae0cbbe1ceb6552e0 + parent_ids: + - 9c3dc4542ce2fa35f25dfa7809a7e1c2ee06001e + message: |+ + [Bug Fix] Issue 5204 : GymToUnityWrapper must call reset if done (#5207) + + author: Vincent-Pierre BERGES + committer: GitHub + time: 'Time { raw: git_time { time: 1617222539, offset: -420, sign: 45 } }' + target: + id: 5d1c1e1f5fa70e7f44c5bcf6ce026a7520d47cde + parent_ids: + - f699d8db1a65e7a1435df819b7e3b3dee53b2593 + message: | + [Bug Fix] Issue 5204 : GymToUnityWrapper must call reset if done (#5207) + + (cherry picked from commit 21548e02e83a563c75a0b6bae0cbbe1ceb6552e0) + author: Vincent-Pierre BERGES + committer: Ervin Teng + time: 'Time { raw: git_time { time: 1617917838, offset: -240, sign: 45 } }' + is_trivial: true diff --git a/dataset/mined-cherries-verification/C++_tensorflow_serving.yaml b/dataset/mined-cherries-verification/C++_tensorflow_serving.yaml new file mode 100644 index 00000000..25bad63e --- /dev/null +++ b/dataset/mined-cherries-verification/C++_tensorflow_serving.yaml @@ -0,0 +1,1370 @@ +- repo_name: tensorflow/serving + total_number_of_branches: '75' + total_number_of_commits: '8639' + language: C++ + total_number_of_committers: '91' + total_number_of_results: '49' +- - search_method: MessageScan + cherry_and_target: + cherry: + id: fdf23de9993c0fef83ce5a4029cd2ed8582039a8 + parent_ids: + - 281c665b2eaed9836da313b720611019a1a12c31 + message: | + Add missing include. + + PiperOrigin-RevId: 212981021 + author: awk + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1536936279, offset: -420, sign: 45 } }' + target: + id: 84ef62d579863d4f1b2fdc7151cd7b35136e89d3 + parent_ids: + - c81b2c26d5965d20d7b40c109a01ef53b226caf6 + message: | + Add missing include. + + PiperOrigin-RevId: 212981021 + (cherry picked from commit fdf23de9993c0fef83ce5a4029cd2ed8582039a8) + author: awk + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1536944261, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5c7740fc3d8d5c017643a8cc40a7202717b10dd6 + parent_ids: + - 831434474b34e0067a18b3b5bc0161b9e1c7b08b + message: | + Add git hash for version metadata of model server and add tags for dev and nightly builds. + + PiperOrigin-RevId: 217403451 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1539730648, offset: -420, sign: 45 } }' + target: + id: ff018e22b0fe1f4b98ae4b7a778b7194db2e2d9f + parent_ids: + - 02a6fc10216aed5644badac31f35cef3243688fa + message: | + Add git hash for version metadata of model server and add tags for dev and nightly builds. + + PiperOrigin-RevId: 217403451 + (cherry picked from commit 5c7740fc3d8d5c017643a8cc40a7202717b10dd6) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1540319108, offset: -240, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 6ff5d9f80ef4ae7b69af2e114fed969c163d6c47 + parent_ids: + - 72edc451d4166b4e3e022b1e242046e2bfdd8c4c + message: | + Fix erroneous formatting of numbers that are larger than 6 digits. + + Such numbers are converted to scientific notation (by StrCat()). + The code suffixed such numbers with '.0' yielding invalid numbers. + + As an example before this fix, 9000000 would get string converted + (incorrectly) to 9e+06.0 instead of 9e+06 -- the latter is correct. + Similarly .00003 gets converted incorrectly to 3e-5.0 instead of + 3e-5 + + Fixes https://github.com/tensorflow/serving/issues/989 + + PiperOrigin-RevId: 204536301 + author: awk + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1531520095, offset: -420, sign: 45 } }' + target: + id: fca72ce414b809dfacdc958e5988f98bad7e0dd7 + parent_ids: + - 3d2db30e3b413764c8d3fba04ea5e3a08261d4ee + message: | + Fix erroneous formatting of numbers that are larger than 6 digits. + + Such numbers are converted to scientific notation (by StrCat()). + The code suffixed such numbers with '.0' yielding invalid numbers. + + As an example before this fix, 9000000 would get string converted + (incorrectly) to 9e+06.0 instead of 9e+06 -- the latter is correct. + Similarly .00003 gets converted incorrectly to 3e-5.0 instead of + 3e-5 + + Fixes https://github.com/tensorflow/serving/issues/989 + + PiperOrigin-RevId: 204536301 + (cherry picked from commit 6ff5d9f80ef4ae7b69af2e114fed969c163d6c47) + author: awk + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533180860, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 3a0bdbe5c216e4eeab8110cad8d959d21a2e376a + parent_ids: + - 8b8434240e688ac78f1132f19c86951f99ed2abe + message: | + Add Python 3.7 as supported for tensorflow-serving-api package. + + The API package itself is a collection of .proto files, and should be + compatible with Python 3.7. Underlying dependencies: protobuf and grpcio + both support Python 3.7, so we can too. + + Fixes #1640 + + PiperOrigin-RevId: 313979135 + author: Abhijit Karmarkar + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1590891783, offset: -420, sign: 45 } }' + target: + id: f775bb25e80a6c7b3c66842eb9085d44d9752ec2 + parent_ids: + - 43fec910a84f5883f569e61a32d2392f2c648570 + message: | + Add Python 3.7 as supported for tensorflow-serving-api package. + + The API package itself is a collection of .proto files, and should be + compatible with Python 3.7. Underlying dependencies: protobuf and grpcio + both support Python 3.7, so we can too. + + Fixes #1640 + + PiperOrigin-RevId: 313979135 + (cherry picked from commit 3a0bdbe5c216e4eeab8110cad8d959d21a2e376a) + author: Abhijit Karmarkar + committer: netfs + time: 'Time { raw: git_time { time: 1590892674, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 739b2065a4bd7f08b6f4b61ba6604124c1513db0 + parent_ids: + - dba55c6f89c6478f775e8bb72b7f8682bb99c06c + message: | + Fix missing NCCL header path + + PiperOrigin-RevId: 215463591 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1538517343, offset: -420, sign: 45 } }' + target: + id: 1ac5f399de11f7abc1e0e114c36e5154dbd012e0 + parent_ids: + - 62e06f742a8adb6719ac6017dc4be7882d3289bf + message: | + Fix missing NCCL header path + + PiperOrigin-RevId: 215463591 + (cherry picked from commit 739b2065a4bd7f08b6f4b61ba6604124c1513db0) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1538596178, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: be7c70d779a39fad73a535185a4f4f991c1d859a + parent_ids: + - 99d32fc8379cfb1082894b1ac4e4602784aa1264 + message: | + Fix GPU build + + Fixes https://github.com/tensorflow/serving/issues/1150 + + PiperOrigin-RevId: 217890974 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1539970179, offset: -420, sign: 45 } }' + target: + id: a313d904abe7b298a9c0c3f6cb5fc80d0cba6bf5 + parent_ids: + - ff018e22b0fe1f4b98ae4b7a778b7194db2e2d9f + message: | + Fix GPU build + + Fixes https://github.com/tensorflow/serving/issues/1150 + + PiperOrigin-RevId: 217890974 + (cherry picked from commit be7c70d779a39fad73a535185a4f4f991c1d859a) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1540319144, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: e1ceccbde64272fbe09915cc34053ad44648447e + parent_ids: + - 83533141aa62f1e08237c5b0f6380819a20c67a0 + message: | + Return error strings that conform to JSON specs, to + prevent malformed JSON responses that fail to parse. + + Fixes #1600 + + PiperOrigin-RevId: 310921928 + author: Abhijit Karmarkar + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1589212514, offset: -420, sign: 45 } }' + target: + id: 1ff4d31cd9a0a736162813c149139cce0ccaaa2c + parent_ids: + - dae2bc6bcb622055bcf86f2f2b464d4950deff9f + message: | + Return error strings that conform to JSON specs, to + prevent malformed JSON responses that fail to parse. + + Fixes #1600 + + PiperOrigin-RevId: 310921928 + (cherry picked from commit e1ceccbde64272fbe09915cc34053ad44648447e) + author: Abhijit Karmarkar + committer: netfs + time: 'Time { raw: git_time { time: 1589828887, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 88ad2c62de3eb63846289534dfab500db1b2fb6b + parent_ids: + - e805d1b1fa2dc7eb47e913d3f35f391946102e01 + message: | + Internal change. + + PiperOrigin-RevId: 206635850 + author: awk + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1532982326, offset: -420, sign: 45 } }' + target: + id: 10a67ef61d44635bcc17341dac970107e8f55495 + parent_ids: + - 4383edee344026a8d5a111344c9cce8b426fce01 + message: | + Internal change. + + PiperOrigin-RevId: 206635850 + (cherry picked from commit 88ad2c62de3eb63846289534dfab500db1b2fb6b) + author: awk + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156883, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: fb7c5defe70a5f454809dc46a947fd470294b67d + parent_ids: + - b16d34129468e30571bb725dd8c9a66cb905f8ea + message: | + Add util for get model status + author: ynqa + committer: ynqa + time: 'Time { raw: git_time { time: 1531906826, offset: 540, sign: 43 } }' + target: + id: af72316214f857f50bdf62dac36f79a6659bfeff + parent_ids: + - 79f621daa9642f011a8234a55cb7f9252f89e45a + message: | + Add util for get model status + + (cherry picked from commit fb7c5defe70a5f454809dc46a947fd470294b67d) + author: ynqa + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156827, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: a173809f40b81299a79663d757a02ba201381e97 + parent_ids: + - f8a607a11035732c3a3287cdd1ce2cb46856459b + message: | + Add old/beta gRPC API signature and route it to GA gRPC API. + This prevents breaking existing clients, along with a warning + that in (near) furture the old API will be removed. + + PiperOrigin-RevId: 207878049 + author: awk + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1533736870, offset: -420, sign: 45 } }' + target: + id: b7179d8e426ecbd0f0937e53c73b0ed7c7a6529d + parent_ids: + - c4f4bbe5484779ed277d240c73c32045d85e719f + message: | + Add old/beta gRPC API signature and route it to GA gRPC API. + This prevents breaking existing clients, along with a warning + that in (near) furture the old API will be removed. + + PiperOrigin-RevId: 207878049 + (cherry picked from commit a173809f40b81299a79663d757a02ba201381e97) + author: awk + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533759581, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1277991345f2144f194c6d5b87dc862a02068c5d + parent_ids: + - fd0a3d0bd8d0adc7ce2e52268d11189c4be126fc + message: | + Install TF Serving API in development Docker images + + PiperOrigin-RevId: 213676487 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1537386297, offset: -420, sign: 45 } }' + target: + id: defd37d1383739335983ebf00dd0899d0c0d3a2a + parent_ids: + - dc8d417c884d5af0d9a25f6a14d954d96756fb2a + message: | + Install TF Serving API in development Docker images + + PiperOrigin-RevId: 213676487 + (cherry picked from commit 1277991345f2144f194c6d5b87dc862a02068c5d) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1538596178, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 844b751d6325ef294b806d7549eb79eddafeb132 + parent_ids: + - 5b78ebf5ecb123574d7414822088ce5998b6b0f8 + message: Update build_pip_package.sh + author: gautamvasudevan <12356580+gautamvasudevan@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1532452971, offset: -420, sign: 45 } }' + target: + id: 71127a782ab0113a4ec9776c29e7857c31fa4123 + parent_ids: + - f34e8440208629f94a41ca08d0b9cbee2a607d01 + message: | + Update build_pip_package.sh + + (cherry picked from commit 844b751d6325ef294b806d7549eb79eddafeb132) + author: gautamvasudevan <12356580+gautamvasudevan@users.noreply.github.com> + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156883, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2b276fc328e47c51e5c1d14d0e64ba2009773eca + parent_ids: + - b6810971dad0401b5457f4aa7b6eb6c22e7e5f13 + message: | + Adds LICENSE file to tensorflow-serving-api python package. + + It is recommended to include LICENSE file in the package.[1] But tensorflow-serving-api doesn't have one in it. + + We need LICENSE file and recent versions of `wheel` will automatically pickup LICENSE file in local directory. + + [1] https://packaging.python.org/tutorials/packaging-projects/#creating-a-license + + PiperOrigin-RevId: 343562364 + author: Chao Xie + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1605910310, offset: -480, sign: 45 } }' + target: + id: 93668685296f7ab70b8ebdc477684e808806bec9 + parent_ids: + - c48d83c55b0268334a4ae4b528bf32de5857f2df + message: | + Adds LICENSE file to tensorflow-serving-api python package. + + It is recommended to include LICENSE file in the package.[1] But tensorflow-serving-api doesn't have one in it. + + We need LICENSE file and recent versions of `wheel` will automatically pickup LICENSE file in local directory. + + [1] https://packaging.python.org/tutorials/packaging-projects/#creating-a-license + + PiperOrigin-RevId: 343562364 + (cherry picked from commit 2b276fc328e47c51e5c1d14d0e64ba2009773eca) + author: Chao Xie + committer: Chao Xie + time: 'Time { raw: git_time { time: 1605910557, offset: -480, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 01c2fb804161125bebda0b5880657ad046ab2da7 + parent_ids: + - 1cceb78579f51a20d07447436180e55ef769774d + message: | + Handle range checking of floating point numbers correctly. + + Underlying rapidjson::IsLosslessFloat() does not work correctly + (e.g. IsLosslessFloat(0.2) returns false!). We now split the + range check into two parts: one to check if a integral value + can fit in float/double without loss and other part to do usual + decimal compare against the defined limits. The latter is used + for non-integer values. + + Fixes https://github.com/tensorflow/serving/issues/1136 + + PiperOrigin-RevId: 217166796 + author: awk + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1539624903, offset: -420, sign: 45 } }' + target: + id: 02a6fc10216aed5644badac31f35cef3243688fa + parent_ids: + - 7c9a8f6e46227d0f7e74d489d91f169ce86bc0af + message: | + Handle range checking of floating point numbers correctly. + + Underlying rapidjson::IsLosslessFloat() does not work correctly + (e.g. IsLosslessFloat(0.2) returns false!). We now split the + range check into two parts: one to check if a integral value + can fit in float/double without loss and other part to do usual + decimal compare against the defined limits. The latter is used + for non-integer values. + + Fixes https://github.com/tensorflow/serving/issues/1136 + + PiperOrigin-RevId: 217166796 + (cherry picked from commit 01c2fb804161125bebda0b5880657ad046ab2da7) + author: awk + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1540318779, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 140bbe7d4f6440449b61b360f227da6851b6332b + parent_ids: + - 7a2fecf1695cfb4fc743adbde0be35c2dd31e82e + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + author: Sungsoon Cho + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1643700923, offset: -480, sign: 45 } }' + target: + id: cc66d958ded8aaaa568dbf4aedd6d0a2d2ca19c9 + parent_ids: + - d69e89280c94d986e6cfd51e395b694b1a3afd14 + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + (cherry picked from commit 140bbe7d4f6440449b61b360f227da6851b6332b) + author: Sungsoon Cho + committer: RaviTeja Gorijala + time: 'Time { raw: git_time { time: 1644633204, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: fc8c5301125903a3e2ddc8b743f78f0d539bf61b + parent_ids: + - 2a3f866d21a2883b4a25fb7153b9847bcc9c39ef + message: | + Keep pypi package from overwriting TF package + + PiperOrigin-RevId: 208718607 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1534283662, offset: -420, sign: 45 } }' + target: + id: f66b9fda721008f06eb0739eba1491220136647f + parent_ids: + - 7fb55e4aaf0a1caf3d0726b744fa351ff4289555 + message: | + Keep pypi package from overwriting TF package + + PiperOrigin-RevId: 208718607 + (cherry picked from commit fc8c5301125903a3e2ddc8b743f78f0d539bf61b) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1534284820, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: f92ac94bf5d597582065198e9ff50e6eeb50c078 + parent_ids: + - 5fb57805b9987d1727c7326718d2c0c2bcfc7be1 + message: | + Make Internal Change + + PiperOrigin-RevId: 355546760 + author: Chao Xie + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1612416042, offset: -480, sign: 45 } }' + target: + id: 644baeb482741e96d2ca0a5acdc2d7d2ab038710 + parent_ids: + - 8300bd1e8878b7fd8b6cbf604d7feef45eb42ab7 + message: | + Make Internal Change + + PiperOrigin-RevId: 355546760 + (cherry picked from commit f92ac94bf5d597582065198e9ff50e6eeb50c078) + author: Chao Xie + committer: Chao Xie + time: 'Time { raw: git_time { time: 1612467092, offset: -480, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 52ff24de9a0da84aba6065ca41c9fc060a301391 + parent_ids: + - ba1539e3afa14784345e233f0660b07a8536e453 + message: | + Add GPU serving Docker image, change source of binary + + PiperOrigin-RevId: 206344761 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1532715585, offset: -420, sign: 45 } }' + target: + id: 4383edee344026a8d5a111344c9cce8b426fce01 + parent_ids: + - f7d69e1430048e71d7b5b523bddd83cd379af967 + message: | + Add GPU serving Docker image, change source of binary + + PiperOrigin-RevId: 206344761 + (cherry picked from commit 52ff24de9a0da84aba6065ca41c9fc060a301391) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156883, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 74ea413db4407c1affe2b9fa69dc53ecdba61fa6 + parent_ids: + - baab6180231009e9ebd86d5d0df301ce360e445e + message: | + Install cuda-nvrtc 10.1 into release tf serving gpu docker image. + + PiperOrigin-RevId: 314861537 + author: Haiming Bao + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1591331416, offset: -420, sign: 45 } }' + target: + id: 9b74939d6bd45486a3e10168f9863a33e6a488eb + parent_ids: + - 997223c8329a17df4308d96d15d67eeda08dc9a9 + message: | + Install cuda-nvrtc 10.1 into release tf serving gpu docker image. + + PiperOrigin-RevId: 314861537 + (cherry picked from commit 74ea413db4407c1affe2b9fa69dc53ecdba61fa6) + author: Haiming Bao + committer: Haiming Bao + time: 'Time { raw: git_time { time: 1591379545, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 012e0b992d4b6ff003e0bc02226681ae526c9059 + parent_ids: + - b153d43f956a468bf6a72ca6bd708cdb257f2d08 + message: | + Remove old bazel workarounds + + PiperOrigin-RevId: 205460242 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1532124413, offset: -420, sign: 45 } }' + target: + id: 79f621daa9642f011a8234a55cb7f9252f89e45a + parent_ids: + - 58dee83387647bf1b144b3114292edeab0ac5b59 + message: | + Remove old bazel workarounds + + PiperOrigin-RevId: 205460242 + (cherry picked from commit 012e0b992d4b6ff003e0bc02226681ae526c9059) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156818, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: a09b6e7850f47121df0b9df01e720ea2b8dde331 + parent_ids: + - 6169bd7d3e27d6b04ee871138815e6c150a81059 + message: | + Update Dockerfile.devel* with py3.7 installed. + + PiperOrigin-RevId: 403429336 + author: Sanjay Kumar Chotakur + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1634323588, offset: -420, sign: 45 } }' + target: + id: 498196b1869d84e2f0c5a4a206545084916d2b0a + parent_ids: + - 1fc1b44105a329294c4552c6d2c83857e560daf0 + message: | + Update Dockerfile.devel* with py3.7 installed. + + PiperOrigin-RevId: 403429336 + (cherry picked from commit a09b6e7850f47121df0b9df01e720ea2b8dde331) + author: Sanjay Kumar Chotakur + committer: jay90099 + time: 'Time { raw: git_time { time: 1634324862, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 01ea36b337073ed3e6949d078c7e742373321d70 + parent_ids: + - 3fdc9d312c1ab5a62d1ceacdfc435a6789582be6 + message: | + Minor documentation updates and fix docker gpu build + Fixes #987 + + PiperOrigin-RevId: 204508857 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1531509188, offset: -420, sign: 45 } }' + target: + id: 58dee83387647bf1b144b3114292edeab0ac5b59 + parent_ids: + - d90e0ae0dfab4886da4fe8b519c97418afb88f9b + message: | + Minor documentation updates and fix docker gpu build + Fixes #987 + + PiperOrigin-RevId: 204508857 + (cherry picked from commit 01ea36b337073ed3e6949d078c7e742373321d70) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156805, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: d8555724ed54f3102952f9eca5a7d2b695899e20 + parent_ids: + - cda26f6065753167ac83e3b1aad7485d3d1d6db0 + message: | + Upgrade to ubuntu20.04 for docker images, which defaults to gcc-9. + + PiperOrigin-RevId: 488422181 + author: Dero Gharibian + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1668454092, offset: -480, sign: 45 } }' + target: + id: 32631bc7a524dec7b38c5671ea181961f7a3b72a + parent_ids: + - 4a824caf4cb63f68071c3cdb20247612f58ed910 + message: | + Upgrade to ubuntu20.04 for docker images, which defaults to gcc-9. + + PiperOrigin-RevId: 488422181 + (cherry picked from commit d8555724ed54f3102952f9eca5a7d2b695899e20) + author: Dero Gharibian + committer: Shawn Lu + time: 'Time { raw: git_time { time: 1680035108, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 64469a0da89e314680509df21eb8b2f7efd91060 + parent_ids: + - 7105d7affbe752e9d577082d564617fa9b5725a6 + message: | + add flags to config max_num_load_retries and load_retry_interval_micros + author: Yu Zheng + committer: Yu Zheng + time: 'Time { raw: git_time { time: 1537377866, offset: -240, sign: 45 } }' + target: + id: dc8d417c884d5af0d9a25f6a14d954d96756fb2a + parent_ids: + - a278817d0f826030fe3e47110ef1d970340b0323 + message: | + add flags to config max_num_load_retries and load_retry_interval_micros + + (cherry picked from commit 64469a0da89e314680509df21eb8b2f7efd91060) + author: Yu Zheng + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1538596178, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 140bbe7d4f6440449b61b360f227da6851b6332b + parent_ids: + - 7a2fecf1695cfb4fc743adbde0be35c2dd31e82e + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + author: Sungsoon Cho + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1643700923, offset: -480, sign: 45 } }' + target: + id: 2202d1183c44b68b0f142bce774f6e1e92c897a9 + parent_ids: + - 0e8f0cd5932e8b8c435938d7b0713647c333fe36 + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + (cherry picked from commit 140bbe7d4f6440449b61b360f227da6851b6332b) + author: Sungsoon Cho + committer: RaviTeja Gorijala + time: 'Time { raw: git_time { time: 1644368692, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: c1ce0752f1076bd6f92e1af5f73e3a3c552f4691 + parent_ids: + - 6ad81047a282f0e67ceebe9e25d7f0201e636a84 + message: | + Fix broken GetModelMetadata request processing. + + Fixes #1612 + + PiperOrigin-RevId: 312723939 + author: Abhijit Karmarkar + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1590091029, offset: -420, sign: 45 } }' + target: + id: 55c40374b548b89e8de6d899ef2b0b355c0fa9e5 + parent_ids: + - dae2bc6bcb622055bcf86f2f2b464d4950deff9f + message: | + Fix broken GetModelMetadata request processing. + + Fixes #1612 + + PiperOrigin-RevId: 312723939 + (cherry picked from commit c1ce0752f1076bd6f92e1af5f73e3a3c552f4691) + author: Abhijit Karmarkar + committer: netfs + time: 'Time { raw: git_time { time: 1590093828, offset: -420, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 140bbe7d4f6440449b61b360f227da6851b6332b + parent_ids: + - 7a2fecf1695cfb4fc743adbde0be35c2dd31e82e + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + author: Sungsoon Cho + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1643700923, offset: -480, sign: 45 } }' + target: + id: 34f4d3c932d5c8785a84bde965db5d8cc15d6ce0 + parent_ids: + - fd5650a50f9287eab0d39f6b8df9285e8671b3ce + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + (cherry picked from commit 140bbe7d4f6440449b61b360f227da6851b6332b) + author: Sungsoon Cho + committer: RaviTeja Gorijala + time: 'Time { raw: git_time { time: 1644428421, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 48b71603188701bcfcba24fc3fc2ba86f15f2a0b + parent_ids: + - df5ad1540993bda08303436f9b72d8b2547ff1e3 + message: | + Update to cuDNN 7.2 in Docker images + + PiperOrigin-RevId: 215313756 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1538441397, offset: -420, sign: 45 } }' + target: + id: 62e06f742a8adb6719ac6017dc4be7882d3289bf + parent_ids: + - dceb6e4e3854dd1fc5ff17dc32ee37969c3f7f29 + message: | + Update to cuDNN 7.2 in Docker images + + PiperOrigin-RevId: 215313756 + (cherry picked from commit 48b71603188701bcfcba24fc3fc2ba86f15f2a0b) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1538596178, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5b78ebf5ecb123574d7414822088ce5998b6b0f8 + parent_ids: + - fb7c5defe70a5f454809dc46a947fd470294b67d + message: | + Add config, sources + author: ynqa + committer: ynqa + time: 'Time { raw: git_time { time: 1531934960, offset: 540, sign: 43 } }' + target: + id: f34e8440208629f94a41ca08d0b9cbee2a607d01 + parent_ids: + - af72316214f857f50bdf62dac36f79a6659bfeff + message: | + Add config, sources + + (cherry picked from commit 5b78ebf5ecb123574d7414822088ce5998b6b0f8) + author: ynqa + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156836, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: d6914ba17ff2557ad91809807d61699db18255cd + parent_ids: + - 1a18b724dd24a1bf325182e423efe85cc19db184 + message: | + Accept integers in input requests where float/doubles are allowed, + converting them to corresponding decimal type. The conversion fails + (and so does the request) if the integer is too big to fit in the + decimal type (and incurring a precision loss). + + PiperOrigin-RevId: 215515883 + author: awk + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1538545554, offset: -420, sign: 45 } }' + target: + id: bd3c1a0a4e268fe2ceffb9ae59655e252c3be412 + parent_ids: + - 1ac5f399de11f7abc1e0e114c36e5154dbd012e0 + message: | + Accept integers in input requests where float/doubles are allowed, + converting them to corresponding decimal type. The conversion fails + (and so does the request) if the integer is too big to fit in the + decimal type (and incurring a precision loss). + + PiperOrigin-RevId: 215515883 + (cherry picked from commit d6914ba17ff2557ad91809807d61699db18255cd) + author: awk + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1538596178, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 46694d03ef9287d72f856b2a4a159381ff645815 + parent_ids: + - 134361f9c9d0fe83ecfcffa0447d5f70a37c3cc2 + message: | + Fix broken GPU build by add TF cuda options: + + https://github.com/tensorflow/tensorflow/blob/aa44812e3e40b1c95586466bc177b07525b787ea/.bazelrc#L227 + + to TF Serving. This causes TF sources to be built with correct + options (bazel options are not picked from relevant dependent + repo (TF in this case), when building code from that repo). + + PiperOrigin-RevId: 369022405 + author: Abhijit Karmarkar + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1618682627, offset: -420, sign: 45 } }' + target: + id: 05377a99a61217976cb4ddd2ea2c7f1ce71ae0eb + parent_ids: + - f144361e527dcbebd6a97dca244412581d24e43c + message: | + Fix broken GPU build by add TF cuda options: + + https://github.com/tensorflow/tensorflow/blob/aa44812e3e40b1c95586466bc177b07525b787ea/.bazelrc#L227 + + to TF Serving. This causes TF sources to be built with correct + options (bazel options are not picked from relevant dependent + repo (TF in this case), when building code from that repo). + + PiperOrigin-RevId: 369022405 + (cherry picked from commit 46694d03ef9287d72f856b2a4a159381ff645815) + author: Abhijit Karmarkar + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1618696951, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5bd88e01b357fd874b00b13726e45d4c03a09959 + parent_ids: + - f1e33715403d2674d1a968181275c5b4db2f6338 + message: | + Internal change + + PiperOrigin-RevId: 207179369 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1533250004, offset: -420, sign: 45 } }' + target: + id: 5b16f90875e54107d1355eafe32694f59cd266e8 + parent_ids: + - 08edcf95c6aaebcc4190fcc82a973be942ed5a98 + message: | + Internal change + + PiperOrigin-RevId: 207179369 + (cherry picked from commit 5bd88e01b357fd874b00b13726e45d4c03a09959) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533759581, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 97978adfc4fa982d2448a9fc358dffe7a2945c32 + parent_ids: + - aee599e833ba2a6b0b2994985f66e9ff68dacab9 + message: "Add `-N` to useradd. \n\nFixes #1137" + author: vfdev + committer: GitHub + time: 'Time { raw: git_time { time: 1539327159, offset: 120, sign: 43 } }' + target: + id: 7c9a8f6e46227d0f7e74d489d91f169ce86bc0af + parent_ids: + - a3eef519d1f6ca843daafac94e1c68ac9cece69c + message: "Add `-N` to useradd. \n\nFixes #1137\n\n(cherry picked from commit 97978adfc4fa982d2448a9fc358dffe7a2945c32)\n" + author: vfdev + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1540318754, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 140bbe7d4f6440449b61b360f227da6851b6332b + parent_ids: + - 7a2fecf1695cfb4fc743adbde0be35c2dd31e82e + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + author: Sungsoon Cho + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1643700923, offset: -480, sign: 45 } }' + target: + id: 6f38402a82225b13713dbe81ab5ca2f325e6efaf + parent_ids: + - 55f8331c8e4f614fc08378231ac80a3cdd90e232 + message: | + Fix the bug running get-pip.py on python3.6. + + PiperOrigin-RevId: 425544723 + (cherry picked from commit 140bbe7d4f6440449b61b360f227da6851b6332b) + author: Sungsoon Cho + committer: RaviTeja Gorijala + time: 'Time { raw: git_time { time: 1644368181, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5adb5ed5c061cd48ae2f448eb83c64c1b165d0fd + parent_ids: + - b8564c0a6a9c3145b555d8be25823f40d3322c09 + message: | + Update docker instructions to simplify process using Docker Hub + + PiperOrigin-RevId: 203004750 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1530665082, offset: -420, sign: 45 } }' + target: + id: 26539333978b5e7ec95a32357a2f720c6438d58d + parent_ids: + - 0d219b72c01d45af8c5d8079950b09df839d9e15 + message: | + Update docker instructions to simplify process using Docker Hub + + PiperOrigin-RevId: 203004750 + (cherry picked from commit 5adb5ed5c061cd48ae2f448eb83c64c1b165d0fd) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156555, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 9186b1a465211fc7f334025a922914949c5e2471 + parent_ids: + - 42afdc98579e43522a66682713bcf3dfaedddcb9 + message: | + Set cuda compute capabilities for `cuda` build config. + + PiperOrigin-RevId: 325552145 + author: Abhijit Karmarkar + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1596854221, offset: -420, sign: 45 } }' + target: + id: 731a34f0b3f43a6f7a8da85655d3a4a5c72d066a + parent_ids: + - f57632facd4f74644b36e81cadac5bf4fee43424 + message: | + Set cuda compute capabilities for `cuda` build config. + + PiperOrigin-RevId: 325552145 + (cherry picked from commit 9186b1a465211fc7f334025a922914949c5e2471) + author: Abhijit Karmarkar + committer: netfs + time: 'Time { raw: git_time { time: 1597085969, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: a40cf2176dbdc0454fa9c32e7ab3c0d6da0c738e + parent_ids: + - 6bb32ee0ced800c9afa155594335719df1b1a830 + message: | + Install TensorRT in TF Serving's docker build. + + PiperOrigin-RevId: 215241014 + author: laigd + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1538416167, offset: -420, sign: 45 } }' + target: + id: dceb6e4e3854dd1fc5ff17dc32ee37969c3f7f29 + parent_ids: + - defd37d1383739335983ebf00dd0899d0c0d3a2a + message: | + Install TensorRT in TF Serving's docker build. + + PiperOrigin-RevId: 215241014 + (cherry picked from commit a40cf2176dbdc0454fa9c32e7ab3c0d6da0c738e) + author: laigd + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1538596178, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4cbac38c307ea11527d0e45a3b18fd41f1b67601 + parent_ids: + - ce382aeb104b5ed61e9ee8a1c452fe32be357dd0 + message: | + Update docker files + + PiperOrigin-RevId: 206050867 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1532552656, offset: -420, sign: 45 } }' + target: + id: f7d69e1430048e71d7b5b523bddd83cd379af967 + parent_ids: + - 71127a782ab0113a4ec9776c29e7857c31fa4123 + message: | + Update docker files + + PiperOrigin-RevId: 206050867 + (cherry picked from commit 4cbac38c307ea11527d0e45a3b18fd41f1b67601) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156883, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b2d2b87d67ec065ae1e4df4ca5b36c151d6457f6 + parent_ids: + - 153e75ab838c252b3a8f66824754dd4f30519601 + message: | + Update docker builds + + PiperOrigin-RevId: 208535314 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1534192897, offset: -420, sign: 45 } }' + target: + id: 129fc505b7d053f8c5e51f80bbac015493cc3bc2 + parent_ids: + - f344c92e50189dc4d336c21086992f98e04e6e9f + message: | + Update docker builds + + PiperOrigin-RevId: 208535314 + (cherry picked from commit b2d2b87d67ec065ae1e4df4ca5b36c151d6457f6) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1534201258, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: aed1ae7e406d511e648709239411fcd0681b5ac0 + parent_ids: + - 5a6936324059fcdfedeb2ac9f5f1d53544bbc8ef + message: | + Update docker images + + PiperOrigin-RevId: 208891157 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1534371083, offset: -420, sign: 45 } }' + target: + id: 17bec134e3e62a2e97cb5211fa630a7a81acf5f9 + parent_ids: + - 274e97a4b473c4ecbfb7da670dd1a0ad07d2b836 + message: | + Update docker images + + PiperOrigin-RevId: 208891157 + (cherry picked from commit aed1ae7e406d511e648709239411fcd0681b5ac0) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1534438551, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 68d92ff3fdca0641f465cc3ba3858a619c8b82a6 + parent_ids: + - 77807bf783a3a18a68c0260b27444c2c93a6dc3d + message: | + Update Dockerfile.devel* with py3.8 installed. + + PiperOrigin-RevId: 519799120 + author: Shawn Lu + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1679945804, offset: -420, sign: 45 } }' + target: + id: c688ac1d81e02c80f16a7f8fed241b2180a76170 + parent_ids: + - 4a824caf4cb63f68071c3cdb20247612f58ed910 + message: | + Update Dockerfile.devel* with py3.8 installed. + + PiperOrigin-RevId: 519799120 + (cherry picked from commit 68d92ff3fdca0641f465cc3ba3858a619c8b82a6) + author: Shawn Lu + committer: Shawn Lu + time: 'Time { raw: git_time { time: 1680040207, offset: -420, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: ab1372b0a2537cde1d4597f7468dce9bcbdc0ee5 + parent_ids: + - 110845a40a6670067aa7cc24ed889ea829999063 + message: | + o Restructure test to start one model server for every unique + variation of model server flags (model name, path and other + params). This results in majority of the tests (all predict/ + classify/regress over grpc and rest) sharing the same server + and executing faster. This change shaves off ~15-20 seconds + in overall test run. + + o Add retry for REST API calls. + + PiperOrigin-RevId: 207606701 + author: awk + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1533590364, offset: -420, sign: 45 } }' + target: + id: c4f4bbe5484779ed277d240c73c32045d85e719f + parent_ids: + - b5a84e39c48f9ca2724344dcda0f706532285696 + message: | + o Restructure test to start one model server for every unique + variation of model server flags (model name, path and other + params). This results in majority of the tests (all predict/ + classify/regress over grpc and rest) sharing the same server + and executing faster. This change shaves off ~15-20 seconds + in overall test run. + + o Add retry for REST API calls. + + PiperOrigin-RevId: 207606701 + (cherry picked from commit ab1372b0a2537cde1d4597f7468dce9bcbdc0ee5) + author: awk + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533759581, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1bcc01132df0cc835a9d3388ee6fce65d2f2fbe9 + parent_ids: + - 101730f8ea8218955a02e0474d1d7e643cba1263 + message: | + Internal change + + PiperOrigin-RevId: 206864789 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1533087941, offset: -420, sign: 45 } }' + target: + id: 3d2db30e3b413764c8d3fba04ea5e3a08261d4ee + parent_ids: + - 10a67ef61d44635bcc17341dac970107e8f55495 + message: | + Internal change + + PiperOrigin-RevId: 206864789 + (cherry picked from commit 1bcc01132df0cc835a9d3388ee6fce65d2f2fbe9) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533156883, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1bcc01132df0cc835a9d3388ee6fce65d2f2fbe9 + parent_ids: + - 101730f8ea8218955a02e0474d1d7e643cba1263 + message: | + Internal change + + PiperOrigin-RevId: 206864789 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1533087941, offset: -420, sign: 45 } }' + target: + id: 08edcf95c6aaebcc4190fcc82a973be942ed5a98 + parent_ids: + - e16a67da804053d78bdcc57b6b7fdba2dc1a017f + message: | + Internal change + + PiperOrigin-RevId: 206864789 + (cherry picked from commit 1bcc01132df0cc835a9d3388ee6fce65d2f2fbe9) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533759581, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 07f9ba50ebf75991ef27541c8a318de76886c5f6 + parent_ids: + - 793304c0e9d6dd7d3d031e21c32aeea3d7336a03 + message: | + Update Dockerfile.devel* with py3.9 installed. + + PiperOrigin-RevId: 559891667 + author: Shawn Lu + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1692916877, offset: -420, sign: 45 } }' + target: + id: 89e76e785ebe458264e6a40b76b8258e32377103 + parent_ids: + - f620b474db18ecd9dbcabcaed0d541fc7f275c40 + message: "Update Dockerfile.devel* with py3.9 installed. (#2178)\n\nPiperOrigin-RevId: 559891667\r\n(cherry picked from commit 07f9ba50ebf75991ef27541c8a318de76886c5f6)\r\n\r\nCo-authored-by: Shawn Lu " + author: Raviteja Gorijala <36429068+rtg0795@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1692993729, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: ebbbc87b925008da25bfb53f8fb0d8398c6767a7 + parent_ids: + - c304f2ca35894fe7cbb6905d987a9f013987abda + message: | + Enable download of TF Serving sources at arbitrary commit for GPU/MKL docker images. + + PiperOrigin-RevId: 345132448 + author: Abhijit Karmarkar + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1606872119, offset: -480, sign: 45 } }' + target: + id: 4725d9095919c64bb3f6c60b55bfbb103fe6a52e + parent_ids: + - ac3a2eb190bbe1dd9054391b293a2172a8b07bec + message: | + Enable download of TF Serving sources at arbitrary commit for GPU/MKL docker images. + + PiperOrigin-RevId: 345132448 + (cherry picked from commit ebbbc87b925008da25bfb53f8fb0d8398c6767a7) + author: Abhijit Karmarkar + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1613521210, offset: -480, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 535821151b9ea03464050e6f19690ece5a26be81 + parent_ids: + - 1b62e2e5d1800a0e1ce856e96ef29a5e80e0962b + message: | + Internal change + + PiperOrigin-RevId: 207300092 + author: gvasudevan + committer: TensorFlower Gardener + time: 'Time { raw: git_time { time: 1533322101, offset: -420, sign: 45 } }' + target: + id: b5a84e39c48f9ca2724344dcda0f706532285696 + parent_ids: + - 5b16f90875e54107d1355eafe32694f59cd266e8 + message: | + Internal change + + PiperOrigin-RevId: 207300092 + (cherry picked from commit 535821151b9ea03464050e6f19690ece5a26be81) + author: gvasudevan + committer: Abhijit Karmarkar + time: 'Time { raw: git_time { time: 1533759581, offset: -240, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 7359725873a4f5356893309b9075da09b1fb23f2 + parent_ids: + - 10ef8d4cebf2ee2cf9a77066208f1ada2016336e + message: | + Updates the test model for TF Text v2.6.0. + + PiperOrigin-RevId: 427871547 + author: Robert Neale + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1644539347, offset: -480, sign: 45 } }' + target: + id: 86e35d7c52259d6ab1ef2a34ccc98fba998d28a2 + parent_ids: + - 462a36f9d606d6959683a0070eecb38ae207865b + message: | + Updates the test model for TF Text v2.6.0. + + PiperOrigin-RevId: 427871547 + (cherry picked from commit 7359725873a4f5356893309b9075da09b1fb23f2) + author: Robert Neale + committer: RaviTeja Gorijala + time: 'Time { raw: git_time { time: 1644618708, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4aa0cfc24098000163fdfe270c4eb205e98790b1 + parent_ids: + - 5a3c268d3fe508466b8ccc7c7785032a1f1b5c1d + message: | + Remove hyphen from version numbers to conform with PIP. + + PiperOrigin-RevId: 250520004 + author: Abhijit Karmarkar + committer: tensorflow-copybara + time: 'Time { raw: git_time { time: 1559151471, offset: -420, sign: 45 } }' + target: + id: 7074db64efdfb2d0cd9250aa1854717778933307 + parent_ids: + - d1ff44989d50c3e61cb10f68cd3a9b3aab67323b + message: | + Remove hyphen from version numbers to conform with PIP. + + PiperOrigin-RevId: 250520004 + (cherry picked from commit 4aa0cfc24098000163fdfe270c4eb205e98790b1) + author: Abhijit Karmarkar + committer: netfs + time: 'Time { raw: git_time { time: 1559153066, offset: -420, sign: 45 } }' + is_trivial: false diff --git a/dataset/mined-cherries-verification/C_yshui_picom.yaml b/dataset/mined-cherries-verification/C_yshui_picom.yaml new file mode 100644 index 00000000..63aeba2f --- /dev/null +++ b/dataset/mined-cherries-verification/C_yshui_picom.yaml @@ -0,0 +1,736 @@ +- language: C + total_number_of_commits: '2559' + total_number_of_branches: '47' + total_number_of_committers: '78' + total_number_of_results: '22' + repo_name: yshui/picom +- - search_method: MessageScan + cherry_and_target: + cherry: + id: 642a43acbb5a92f00c31bd7487c07fab59ab18a9 + parent_ids: + - 755996a42c88483b452e7b9133c8b69c808d3b7a + message: | + Update README.md and CHANGELOG.md + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707617227, offset: 0, sign: 43 } }' + target: + id: 5984747912daf94dbc5ab417b8da270151e9e93d + parent_ids: + - 5db2ae08e1d7539271ecf41e61909d55f33e64c2 + message: | + Update README.md + + (cherry picked from commit 642a43acbb5a92f00c31bd7487c07fab59ab18a9) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819989, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1bfb1299857dea9b058ba6934529c6e62a59ea29 + parent_ids: + - f3bdd01dc8afcc1e311ec6e81d999f2ec2b9965a + message: | + backend: don't choose SGI_video_sync vblank if opengl is not enabled + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1706562156, offset: 0, sign: 43 } }' + target: + id: d457348972e56719ddbc077c5b6d77686f8c0c48 + parent_ids: + - c4139a94cac06b9a7517fcfb6384fa877e5718cc + message: | + backend: don't choose SGI_video_sync vblank if opengl is not enabled + + (cherry picked from commit 1bfb1299857dea9b058ba6934529c6e62a59ea29) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819708, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4a79e7b7779297db9a622523953a6f207fe38cde + parent_ids: + - a655730e490a57f53bb38a740aa6dbaf8f872ecc + message: | + render: fix binding the root background pixmap in case of depth mismatch + + fix the same issue in the legacy backends, see the previous commit for + details. this commit also removes the x_validate_pixmap function because + it was used only in the get_root_tile function and the fix pretty much + implies and embeds it. + author: Maxim Solovyov + committer: Maxim Solovyov + time: 'Time { raw: git_time { time: 1706828685, offset: 180, sign: 43 } }' + target: + id: 96399a01d1ac5626e8065461b1085363a99b996d + parent_ids: + - 6d44ef41a7201ef8ebd157e15ecc0db58d0205d7 + message: | + render: fix binding the root background pixmap in case of depth mismatch + + fix the same issue in the legacy backends, see the previous commit for + details. this commit also removes the x_validate_pixmap function because + it was used only in the get_root_tile function and the fix pretty much + implies and embeds it. + + (cherry picked from commit 4a79e7b7779297db9a622523953a6f207fe38cde) + Signed-off-by: Yuxuan Shui + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819734, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5fba210ad64b6a15e415c872e552c3b158adc7bc + parent_ids: + - 1bfb1299857dea9b058ba6934529c6e62a59ea29 + message: | + vblank: make schedule fallible + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1706566024, offset: 0, sign: 43 } }' + target: + id: 2f79a5d75a451fd0ac0bb02d68b45c94ae2b3a21 + parent_ids: + - d457348972e56719ddbc077c5b6d77686f8c0c48 + message: | + vblank: make schedule fallible + + (cherry picked from commit 5fba210ad64b6a15e415c872e552c3b158adc7bc) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819717, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: a655730e490a57f53bb38a740aa6dbaf8f872ecc + parent_ids: + - 4401666cfb06f9d76a1bf109feda42730a6da9aa + message: | + picom: fix binding the root background pixmap in case of depth mismatch + + if the root background pixmap's depth doesn't match the root window's + depth, find a suitable visual for the root background pixmap's depth and + use it instead of the root window's visual + author: Maxim Solovyov + committer: Maxim Solovyov + time: 'Time { raw: git_time { time: 1706828675, offset: 180, sign: 43 } }' + target: + id: 6d44ef41a7201ef8ebd157e15ecc0db58d0205d7 + parent_ids: + - d3a108b3db8732e01bfb3970f7e5772fc5d0008d + message: | + picom: fix binding the root background pixmap in case of depth mismatch + + if the root background pixmap's depth doesn't match the root window's + depth, find a suitable visual for the root background pixmap's depth and + use it instead of the root window's visual + + (cherry picked from commit a655730e490a57f53bb38a740aa6dbaf8f872ecc) + Signed-off-by: Yuxuan Shui + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819732, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: f3bdd01dc8afcc1e311ec6e81d999f2ec2b9965a + parent_ids: + - 71e29c4128725856ab1546a2501823bdac32d819 + message: | + vblank: don't use symbols from backend/gl/glx.h + + sgi video sync runs in a separate thread, we don't want to have + potential races between threads. + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1706550764, offset: 0, sign: 43 } }' + target: + id: c4139a94cac06b9a7517fcfb6384fa877e5718cc + parent_ids: + - fc1d1d40e5ffe73cb35a6b09b3cd713a14d242f6 + message: | + vblank: don't use symbols from backend/gl/glx.h + + sgi video sync runs in a separate thread, we don't want to have + potential races between threads. + + (cherry picked from commit f3bdd01dc8afcc1e311ec6e81d999f2ec2b9965a) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819698, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 14a345a81755943f85e7e1fa76f2d341633ad2ba + parent_ids: + - 5fba210ad64b6a15e415c872e552c3b158adc7bc + message: | + vblank: make init fallible + + Improve error handling in sgi_video_sync_scheduler_init a bit. + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1706566261, offset: 0, sign: 43 } }' + target: + id: 4e6dc55ec84c871f9954391506e18eca0acd98f6 + parent_ids: + - 2f79a5d75a451fd0ac0bb02d68b45c94ae2b3a21 + message: | + vblank: make init fallible + + Improve error handling in sgi_video_sync_scheduler_init a bit. + + (cherry picked from commit 14a345a81755943f85e7e1fa76f2d341633ad2ba) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819724, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: eb39426b08a77563fbbe8e74320a3799663b8c47 + parent_ids: + - bbc657e4bc099ec2f958a222b658ab5a5345c055 + message: | + backend: gl: inherit image's inner properties in the gl_image_decouple function + + Image decouple should keep all the image properies from the source + image, so shader must be copied. And there are also some internal + properties what should be inherited but wasn't. + + In particular this prevents images from losing their shaders when + alpha is applied. + + Fixes #1174 + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707224099, offset: 0, sign: 43 } }' + target: + id: baf78e733f05fb3257398193a6374693baded93e + parent_ids: + - d27d3773ce5deb24de6072c49052e31ac8e9d4e0 + message: | + backend: gl: inherit image's inner properties in the gl_image_decouple function + + Image decouple should keep all the image properies from the source + image, so shader must be copied. And there are also some internal + properties what should be inherited but wasn't. + + In particular this prevents images from losing their shaders when + alpha is applied. + + Fixes #1174 + + (cherry picked from commit eb39426b08a77563fbbe8e74320a3799663b8c47) + Signed-off-by: Yuxuan Shui + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819738, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4401666cfb06f9d76a1bf109feda42730a6da9aa + parent_ids: + - 90f5f4ca2944a7705e43b31b680fcf311efb3b4d + message: | + x: add the x_get_visual_for_depth function + + it returns the first found visual for the given depth + author: Maxim Solovyov + committer: Maxim Solovyov + time: 'Time { raw: git_time { time: 1706743889, offset: 180, sign: 43 } }' + target: + id: d3a108b3db8732e01bfb3970f7e5772fc5d0008d + parent_ids: + - e0aa6f9107f4df1b54e27dc90c7f5e8503ef05bd + message: | + x: add the x_get_visual_for_depth function + + it returns the first found visual for the given depth + + (cherry picked from commit 4401666cfb06f9d76a1bf109feda42730a6da9aa) + Signed-off-by: Yuxuan Shui + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819731, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 755996a42c88483b452e7b9133c8b69c808d3b7a + parent_ids: + - eb723eee296e4095cd4f03de5be3ed7048762851 + message: | + backend: gl: use libepoxy's has_*_extension + + So we don't need maintain our own version. + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707617225, offset: 0, sign: 43 } }' + target: + id: 0d5afb16d4f8cac7d1b016de93a289827722eee0 + parent_ids: + - 0bb467b61fae5b9c723df334cf33758bd899569f + message: | + backend: gl: use libepoxy's has_*_extension + + So we don't need maintain our own version. + + (cherry picked from commit 755996a42c88483b452e7b9133c8b69c808d3b7a) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819841, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: baeafb3a3b4ad3de3690929e68245b5cd54b9366 + parent_ids: + - 037be5cca20366a8bd26fe90865f225a471859b1 + message: | + event: tweak ev_reparent_notify + + Instead of change window attributes back and forth, calculate the evmask + and set it just once. And also make sure the request is flushed. + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707686417, offset: 0, sign: 43 } }' + target: + id: 5f20ee0e9ba1ea2096173e96d5439fcd32a8e9a3 + parent_ids: + - 5984747912daf94dbc5ab417b8da270151e9e93d + message: | + event: tweak ev_reparent_notify + + Instead of change window attributes back and forth, calculate the evmask + and set it just once. And also make sure the request is flushed. + + (cherry picked from commit baeafb3a3b4ad3de3690929e68245b5cd54b9366) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707820488, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: bdc0943399584275c5bfcf01e84e4c38ecb2d515 + parent_ids: + - baeafb3a3b4ad3de3690929e68245b5cd54b9366 + message: | + event: make sure ev_property_notify flushes its requests + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707686419, offset: 0, sign: 43 } }' + target: + id: de9724f81434589e614d772f10d3a2f0f691b7ae + parent_ids: + - 5f20ee0e9ba1ea2096173e96d5439fcd32a8e9a3 + message: | + event: make sure ev_property_notify flushes its requests + + (cherry picked from commit bdc0943399584275c5bfcf01e84e4c38ecb2d515) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707820511, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: efb7a1430f2c530c7b9cc0cb6d6d6cff95d8a4d9 + parent_ids: + - d111e1640a7262f5476bce1d4e8fe0080b711545 + message: | + x: add the x_set_region function + + it sets an x region to a pixman region + author: Maxim Solovyov + committer: Maxim Solovyov + time: 'Time { raw: git_time { time: 1706463312, offset: 180, sign: 43 } }' + target: + id: a4ec70982c824c172f2958bfe26827be3a68783c + parent_ids: + - 0ab3e0740e61849bb230e5f60290905eecfd0c43 + message: | + x: add the x_set_region function + + it sets an x region to a pixman region + + (cherry picked from commit efb7a1430f2c530c7b9cc0cb6d6d6cff95d8a4d9) + Signed-off-by: Yuxuan Shui + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707820061, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: a5826b6fb0dc4888ce922df5d82151972ff4b247 + parent_ids: + - bdc0943399584275c5bfcf01e84e4c38ecb2d515 + message: | + event: fix dumb bug in repair_win + + Basically we won't call xcb_damage_subtract if show_all_xerrors is set, + which is very bad. + + Fixes that, and also make sure the damage subtract request is flushed in + all branches. + + Fixes: 1307d9ec709c9fbbe99939d46ad04c57d5e4b501 + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707692753, offset: 0, sign: 43 } }' + target: + id: 41f9a5816c1a74d41e18cf273e6e76f96dba267a + parent_ids: + - de9724f81434589e614d772f10d3a2f0f691b7ae + message: | + event: fix dumb bug in repair_win + + Basically we won't call xcb_damage_subtract if show_all_xerrors is set, + which is very bad. + + Fixes that, and also make sure the damage subtract request is flushed in + all branches. + + (cherry picked from commit a5826b6fb0dc4888ce922df5d82151972ff4b247) + Fixes: 1307d9ec709c9fbbe99939d46ad04c57d5e4b501 + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707820544, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: eb723eee296e4095cd4f03de5be3ed7048762851 + parent_ids: + - fcd51e7373d14881e13ad086304d3511355b5f1b + message: | + backend: gl: use libepoxy + + There is actually no specification what symbols are exported from a + libGL implementation. The (extremely outdated) OpenGL ABI specification + says only GL 1.2 functions are guaranteed. Don't know how relevant that + is now, but different libGL implementations do export different set of + symbols. On Linux we are most likely to be linked with libglvnd, which + has everything we need. But on other platforms this is not necessarily + the case, for example on OpenBSD we are missing glGetQueryObjectui64v. + + Use libepoxy so we can outsource this problem and never worry about it + ever again. Plus it also saves us from calling GetProcAddress ourselves. + + Changes other than trivial build fixes I have to make: + + 1. Can't use eglCreatePlatformWindowSurface/eglGetPlatformDisplay. + libepoxy checks for EGL 1.5 when resolving these functions. But + without a current context, libepoxy assumes we only have EGL 1.4. + This creates a chicken and egg problem - we need a display to call + eglGetPlatformDisplay. We have to use the *EXT version instead. + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707597362, offset: 0, sign: 43 } }' + target: + id: 0bb467b61fae5b9c723df334cf33758bd899569f + parent_ids: + - 71f0d5b92383b24e84988a4ac6c937936ad8340b + message: | + backend: gl: use libepoxy + + There is actually no specification what symbols are exported from a + libGL implementation. The (extremely outdated) OpenGL ABI specification + says only GL 1.2 functions are guaranteed. Don't know how relevant that + is now, but different libGL implementations do export different set of + symbols. On Linux we are most likely to be linked with libglvnd, which + has everything we need. But on other platforms this is not necessarily + the case, for example on OpenBSD we are missing glGetQueryObjectui64v. + + Use libepoxy so we can outsource this problem and never worry about it + ever again. Plus it also saves us from calling GetProcAddress ourselves. + + Changes other than trivial build fixes I have to make: + + 1. Can't use eglCreatePlatformWindowSurface/eglGetPlatformDisplay. + libepoxy checks for EGL 1.5 when resolving these functions. But + without a current context, libepoxy assumes we only have EGL 1.4. + This creates a chicken and egg problem - we need a display to call + eglGetPlatformDisplay. We have to use the *EXT version instead. + + (cherry picked from commit eb723eee296e4095cd4f03de5be3ed7048762851) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819837, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 023103c620749fd5eb1956c255a9cc5fcd6903a4 + parent_ids: + - 0f22b70705eece1ec534663af515b3219f01ed38 + message: | + core: use pthread_setschedparam on OpenBSD + + OpenBSD don't have support for sched_getparam(), sched_setparam(), or + sched_setscheduler() functions (yet). In this case, we need use + pthead-equivalents for real-time sched for picom. Theses changes add + this support. + + Authored-by: Jose Maldonado aka Yukiteru + Signed-off-by: Yuxuan Shui + author: Jose Maldonado aka Yukiteru + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707560308, offset: 0, sign: 43 } }' + target: + id: 33c20c57c115f7ee9408c0929c3e5af7e8e60a91 + parent_ids: + - baf78e733f05fb3257398193a6374693baded93e + message: | + core: use pthread_setschedparam on OpenBSD + + OpenBSD don't have support for sched_getparam(), sched_setparam(), or + sched_setscheduler() functions (yet). In this case, we need use + pthead-equivalents for real-time sched for picom. Theses changes add + this support. + + (cherry picked from commit 023103c620749fd5eb1956c255a9cc5fcd6903a4) + Authored-by: Jose Maldonado aka Yukiteru + Signed-off-by: Yuxuan Shui + author: Jose Maldonado aka Yukiteru + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819741, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: fcd51e7373d14881e13ad086304d3511355b5f1b + parent_ids: + - dff77aae27cf064941798c066a248283b1996318 + message: | + build: add libepoxy + + Add libepoxy dependency to CI manifest and Nix. + + For Nix, we need to set shellHook to workaround a NixOS limitation, see: + + https://github.com/NixOS/nixpkgs/issues/287763 + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707597324, offset: 0, sign: 43 } }' + target: + id: 71f0d5b92383b24e84988a4ac6c937936ad8340b + parent_ids: + - 33c20c57c115f7ee9408c0929c3e5af7e8e60a91 + message: | + build: add libepoxy + + Add libepoxy dependency to CI manifest and Nix. + + For Nix, we need to set shellHook to workaround a NixOS limitation, see: + + https://github.com/NixOS/nixpkgs/issues/287763 + + (cherry picked from commit fcd51e7373d14881e13ad086304d3511355b5f1b) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819743, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5a1990b236c85f1222098ef147398855cbb3af69 + parent_ids: + - efb7a1430f2c530c7b9cc0cb6d6d6cff95d8a4d9 + message: | + backend: xrender: cache the present region + + to avoid creating and destroying it every frame + author: Maxim Solovyov + committer: Maxim Solovyov + time: 'Time { raw: git_time { time: 1706463312, offset: 180, sign: 43 } }' + target: + id: 9392829d84d9b63f411e2af8191d1161e90cb984 + parent_ids: + - a4ec70982c824c172f2958bfe26827be3a68783c + message: | + backend: xrender: cache the present region + + to avoid creating and destroying it every frame + + (cherry picked from commit 5a1990b236c85f1222098ef147398855cbb3af69) + Signed-off-by: Yuxuan Shui + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707820071, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 709f0168d96fa42e40261c65b345c5fa14418300 + parent_ids: + - 726b8d0e285e7d7e7809df4f0ada4c4943fc8cea + message: | + ci: build on OpenBSD + + Building on OpenBSD fails currently. + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707560536, offset: 0, sign: 43 } }' + target: + id: 826e1d72f59372c2f15ce4b3b2333bc30b4be417 + parent_ids: + - eaf72a7a9354322b36f77ac5afdd451734f98dc2 + message: | + ci: build on OpenBSD + + Building on OpenBSD fails currently. + + (cherry picked from commit 709f0168d96fa42e40261c65b345c5fa14418300) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707821410, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 238c3cc8336fe24370a3ca44ff0d891ca8907c94 + parent_ids: + - 0638de5c568dab4215140a37bb7be702301a038b + message: | + vblank: reset SGI_video_sync scheduler if it's getting nonsense + + If resetting fails, set a flag so all future schedule calls will fail. + + Fixes #1168 + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1706654463, offset: 0, sign: 43 } }' + target: + id: e0aa6f9107f4df1b54e27dc90c7f5e8503ef05bd + parent_ids: + - 4e6dc55ec84c871f9954391506e18eca0acd98f6 + message: | + vblank: reset SGI_video_sync scheduler if it's getting nonsense + + If resetting fails, set a flag so all future schedule calls will fail. + + Fixes #1168 + + (cherry picked from commit 238c3cc8336fe24370a3ca44ff0d891ca8907c94) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819729, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 75d0b7ba1e84e5057efaa1e61398f003a8e9e246 + parent_ids: + - a5826b6fb0dc4888ce922df5d82151972ff4b247 + message: | + core: don't flush X connection before go to sleep + + See the added comments for details. + + Fixes #1145 + Fixes #1166 + Fixes #1040? + + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707693070, offset: 0, sign: 43 } }' + target: + id: 89c2c8530319f72394522336e483b31275d1ef7c + parent_ids: + - 41f9a5816c1a74d41e18cf273e6e76f96dba267a + message: | + core: don't flush X connection before go to sleep + + See the added comments for details. + + Fixes #1145 + Fixes #1166 + Fixes #1040? + + (cherry picked from commit 75d0b7ba1e84e5057efaa1e61398f003a8e9e246) + Signed-off-by: Yuxuan Shui + author: Yuxuan Shui + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707820562, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 0ed8d0cadfb6fc60ee77cd2c2e24e153175e5547 + parent_ids: + - bbc657e4bc099ec2f958a222b658ab5a5345c055 + message: | + picom: post-process and free the corner radius rules list + + to make conditions based on non-standard atoms in this list work. + author: Maxim Solovyov + committer: Maxim Solovyov + time: 'Time { raw: git_time { time: 1706882778, offset: 180, sign: 43 } }' + target: + id: d27d3773ce5deb24de6072c49052e31ac8e9d4e0 + parent_ids: + - 96399a01d1ac5626e8065461b1085363a99b996d + message: | + picom: post-process and free the corner radius rules list + + to make conditions based on non-standard atoms in this list work. + + (cherry picked from commit 0ed8d0cadfb6fc60ee77cd2c2e24e153175e5547) + Signed-off-by: Yuxuan Shui + author: Maxim Solovyov + committer: Yuxuan Shui + time: 'Time { raw: git_time { time: 1707819736, offset: 0, sign: 43 } }' + is_trivial: true diff --git a/dataset/mined-cherries-verification/Go_quay_clair.yaml b/dataset/mined-cherries-verification/Go_quay_clair.yaml new file mode 100644 index 00000000..466313d7 --- /dev/null +++ b/dataset/mined-cherries-verification/Go_quay_clair.yaml @@ -0,0 +1,658 @@ +- total_number_of_branches: '16' + repo_name: quay/clair + total_number_of_committers: '121' + total_number_of_commits: '1988' + total_number_of_results: '22' + language: Go +- - search_method: MessageScan + cherry_and_target: + cherry: + id: 5446e49ff9de13a0d95ce5937f1ce722e59304f5 + parent_ids: + - a433c93c349f63e7b8cc6f4d5a95a2394fe1dd31 + message: | + notifier: Avoid double reference + + I guess the pgx lib changed to not account for this. + + Signed-off-by: crozzy + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1680014860, offset: -420, sign: 45 } }' + target: + id: ffa4556d0f251cc984ed34594356625b9b747744 + parent_ids: + - e02aba27de01cb461f79bee9644aac80c2f9bd65 + message: | + notifier: Avoid double reference + + I guess the pgx lib changed to not account for this. + + Signed-off-by: crozzy + (cherry picked from commit 5446e49ff9de13a0d95ce5937f1ce722e59304f5) + author: crozzy + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1680703562, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 94757c7d8cda907902d1020a5c2fe74b2e5ccba9 + parent_ids: + - d726e15796882e6f2adba6a84a0aef419bc59849 + message: | + airgap: Remove libindex Airgap option + + This option no longer exists in claircore and is handled by clair + when creating the client that is passed to claircore. + + Signed-off-by: crozzy + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1679673855, offset: -420, sign: 45 } }' + target: + id: e02aba27de01cb461f79bee9644aac80c2f9bd65 + parent_ids: + - 9e8eacf51b2a45f967036396b3dc14a52edc480d + message: | + airgap: Remove libindex Airgap option + + This option no longer exists in claircore and is handled by clair + when creating the client that is passed to claircore. + + Signed-off-by: crozzy + (cherry picked from commit 94757c7d8cda907902d1020a5c2fe74b2e5ccba9) + author: crozzy + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1680703562, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5faf0fc9edba86cef87bff4e9941fd2a93a2889a + parent_ids: + - 5446e49ff9de13a0d95ce5937f1ce722e59304f5 + message: | + chore: Bump Claircore to v1.4.21 + + Bump Claircore to the latest tag. + + Signed-off-by: crozzy + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1680124140, offset: -420, sign: 45 } }' + target: + id: e676671c17d2612470cd8de05aa668312fbb3036 + parent_ids: + - ffa4556d0f251cc984ed34594356625b9b747744 + message: | + chore: Bump Claircore to v1.4.21 + + Bump Claircore to the latest tag. + + Signed-off-by: crozzy + (cherry picked from commit 5faf0fc9edba86cef87bff4e9941fd2a93a2889a) + author: crozzy + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1680703626, offset: -300, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: eb54b8896f6fd93bd67d6b6c0875cfff426698d7 + parent_ids: + - 6969e003d05dfb7777afa0d5a628f8d445dfa2d3 + message: | + docs: add dropins to prose documentation + + This change explains how to use the dropins and updates the local-dev + config to do so. + + Closes: #1783 + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1695153440, offset: -300, sign: 45 } }' + target: + id: 8a2d99f4da70de6bdf14858c6163a2e3b8042782 + parent_ids: + - 0729ad2a36721f9fe99196370a9f3cd31fdbd4b7 + message: | + docs: add dropins to prose documentation + + This change explains how to use the dropins and updates the local-dev + config to do so. + + Closes: #1783 + See-also: #1806 + (cherry picked from commit eb54b8896f6fd93bd67d6b6c0875cfff426698d7) + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1695154148, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: ce462ea41a81be2c13f9a4a85847b7f93570db48 + parent_ids: + - ae7675af1e68c4e5cf2301480b7a3b99ae6faf89 + message: | + httptransport: handle no notifier in "combo" mode + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1647885591, offset: -300, sign: 45 } }' + target: + id: d314e41234292084dc125c3c9489f3958ca772ae + parent_ids: + - cc5a916ef11f5de53af0b87b9ad75d940a615beb + message: | + httptransport: handle no notifier in "combo" mode + + Signed-off-by: Hank Donnay + Backports: #1531 + (cherry picked from commit ce462ea41a81be2c13f9a4a85847b7f93570db48) + Signed-off-by: test + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1649098199, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: ab3a754e1408d8fed0160ce999536c8fd2f452f7 + parent_ids: + - f783b356ce3c7903641913ee25d0a384c19602db + message: | + 'chore: update claircore to v1.5.19 + + Update claircore to latest release. + ' + + Signed-off-by: Claircore-CI + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1696363450, offset: -420, sign: 45 } }' + target: + id: 9a3cde3b16bb0c0e02fb7e128ff86e255ec6112f + parent_ids: + - 04f36991e19cfb44423cbb2e96ccfcd786dd85c5 + message: | + 'chore: update claircore to v1.5.19 + + Update claircore to latest release. + ' + + Signed-off-by: Claircore-CI + (cherry picked from commit ab3a754e1408d8fed0160ce999536c8fd2f452f7) + Signed-off-by: crozzy + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1696881603, offset: -420, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5c44e70c92331f4150b8c2474043e6d92f125d31 + parent_ids: + - c473c92c31d3e38027b35395fb951524e03f84c7 + message: | + chore: update claircore version + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1636125871, offset: -300, sign: 45 } }' + target: + id: 5e060135111e07fcc3d0c2fc6c6570a98021dde7 + parent_ids: + - bfd971861e88bb21e4480ac98b8b0b7e1abf1501 + message: | + chore: update claircore version + + Backports: #1418 + Signed-off-by: Hank Donnay + (cherry picked from commit 5c44e70c92331f4150b8c2474043e6d92f125d31) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1636126021, offset: -300, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: e1833161315d45484f433fdb3a3692a9a617f3bd + parent_ids: + - e5cb6a91484254ab647989ab7761ae4d0f85a5f4 + message: | + go.mod: update claircore version + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1653511970, offset: -300, sign: 45 } }' + target: + id: 67f32bff3f3ef655ff24313ccc7905d6d2a0a719 + parent_ids: + - 363dca4d771d7e36e2925552cce102e458193c4f + message: | + go.mod: update claircore version + + Backports: #1571 + Signed-off-by: Hank Donnay + (cherry picked from commit e1833161315d45484f433fdb3a3692a9a617f3bd) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1653512650, offset: -300, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: b18f989c7869a480b7bbfc8181d515227e701a39 + parent_ids: + - 12f38e45cec579f92438059702884fa4284bb93c + message: | + httputil: fix ParseIP usage + + The string fed into the ParseIP function needs to not have a port. + This does that and adds a test to check the desired behavior. + + Closes: #1689 + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1676386605, offset: -360, sign: 45 } }' + target: + id: 9e8eacf51b2a45f967036396b3dc14a52edc480d + parent_ids: + - 36de97ccf619113b1ef4dff6bfd0e0c692252544 + message: | + httputil: fix ParseIP usage + + The string fed into the ParseIP function needs to not have a port. + This does that and adds a test to check the desired behavior. + + Closes: #1689 + Signed-off-by: Hank Donnay + (cherry picked from commit b18f989c7869a480b7bbfc8181d515227e701a39) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1680703562, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 378a4b5f35ceb452654e7d9dca12fc455ac1697c + parent_ids: + - 5fd265634d162dc0acba6c28e36d35dd0a90aec0 + message: | + httptransport: fix request_id logging + + The final HTTP status message was getting an earlier Context which was + missing the request_id key. + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1674771374, offset: -360, sign: 45 } }' + target: + id: 922f33d18919578049fbf2ccb756e6990b66f280 + parent_ids: + - 5fd265634d162dc0acba6c28e36d35dd0a90aec0 + message: | + httptransport: fix request_id logging + + The final HTTP status message was getting an earlier Context which was + missing the request_id key. + + Signed-off-by: Hank Donnay + (cherry picked from commit 378a4b5f35ceb452654e7d9dca12fc455ac1697c) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1680703562, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b1fe4db1d1b295b5377a0a70dbe5ae977e393eb5 + parent_ids: + - 422d6b4a60fcb3ae7c059b97d89dcd456ed01d28 + message: | + go.mod: update claircore + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1642179260, offset: -360, sign: 45 } }' + target: + id: 8562653ac42933bbf0e73d82abde09c2a337ea22 + parent_ids: + - 844bfd2436fb5acb1d08975acad152d78367364a + message: | + go.mod: update claircore + + Closes: PROJQUAY-3037 + Signed-off-by: Hank Donnay + (cherry picked from commit b1fe4db1d1b295b5377a0a70dbe5ae977e393eb5) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1642181328, offset: -360, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 670376a29dad524e8ebea8f2acd22220053e6ec9 + parent_ids: + - 72417962880862b986872709317a39fa0582f143 + message: | + go.mod: update json (de)serializer + + This project notes that there's a fix for go1.20 support in the latest + version. + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1675880410, offset: -360, sign: 45 } }' + target: + id: 36de97ccf619113b1ef4dff6bfd0e0c692252544 + parent_ids: + - 922f33d18919578049fbf2ccb756e6990b66f280 + message: | + go.mod: update json (de)serializer + + This project notes that there's a fix for go1.20 support in the latest + version. + + Signed-off-by: Hank Donnay + (cherry picked from commit 670376a29dad524e8ebea8f2acd22220053e6ec9) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1680703562, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 3273a96981b007d8c4e271aec0371cb7e4f45baf + parent_ids: + - eea6fea1966b4d599e8eb900150c2b90bff47e37 + message: | + chore: bump claircore to v1.3.2 + + Signed-off-by: crozzy + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1647902124, offset: -420, sign: 45 } }' + target: + id: cc5a916ef11f5de53af0b87b9ad75d940a615beb + parent_ids: + - c7075aa46dfffbbd9b09393d5db42938cda2a615 + message: | + chore: bump claircore to v1.3.2 + + Backports: #1537 + Signed-off-by: crozzy + (cherry picked from commit 3273a96981b007d8c4e271aec0371cb7e4f45baf) + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1648054630, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 953fa97b70878dfbe2ccf0b1183c902b8618119b + parent_ids: + - b2d68b39a8ae3095d2cbcc471f8efcd30be43ea9 + message: | + chore: update claircore version + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1637355167, offset: -360, sign: 45 } }' + target: + id: 8849c61360520230b6c987bdc243db03b7340c9f + parent_ids: + - dddb910b6f51e7b69042ba3db98c2a0d6cc1caa2 + message: | + chore: update claircore version + + Backports: #1437 + Signed-off-by: Hank Donnay + (cherry picked from commit 953fa97b70878dfbe2ccf0b1183c902b8618119b) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1637355563, offset: -360, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5354f1073cf073202c7c98388c13abc6b6743d08 + parent_ids: + - af6a1f49b35f10faff1102a5d776050eb74cd0d0 + message: | + chore: bump claircore version + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1623769007, offset: -300, sign: 45 } }' + target: + id: a3a8020c2225de42e248352503b06704d7167839 + parent_ids: + - ad9eccf9c93a00bda4300ae783b42753d2c10d35 + message: | + chore: bump claircore version + + Signed-off-by: Hank Donnay + (cherry picked from commit 5354f1073cf073202c7c98388c13abc6b6743d08) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1623770892, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 28647ba13fd38162ebdbc2f878db2691431f197a + parent_ids: + - 7c258d395ab5474696f50a1785ff51eabb69272e + message: | + chore: update claircore version + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1635444119, offset: -300, sign: 45 } }' + target: + id: ec26f33a54fb2995a5898f1bd42484bf90da14fd + parent_ids: + - ce63ff2615cef36804346acc016c625d2fcfd630 + message: | + chore: update claircore version + + Backports: #1404 + Signed-off-by: Hank Donnay + (cherry picked from commit 28647ba13fd38162ebdbc2f878db2691431f197a) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1635445063, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: d583395ec2a03655f44c8eea7e1052e04d6ff889 + parent_ids: + - 12b676d4862a97629acf5bc501189da9728d690d + message: | + go.mod: update claircore version + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1654537951, offset: -300, sign: 45 } }' + target: + id: 51c63e323cc27824cdb59b942b66af110400d5b3 + parent_ids: + - edc65d667261fbc08d54bbc4057151f47ce6d4b7 + message: | + go.mod: update claircore version + + Backports: #1580 + Signed-off-by: Hank Donnay + (cherry picked from commit d583395ec2a03655f44c8eea7e1052e04d6ff889) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1654538744, offset: -300, sign: 45 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 74210ca1bea67f0369bc9b26d40354410317627b + parent_ids: + - eddad2e61984defca5b0e9f842d2df44c1ea11fc + message: | + chore: update changelog to cope with submodule tags + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1636134677, offset: -300, sign: 45 } }' + target: + id: 41d25933ad25b8ae5deb4b6da84336c2a4d400fe + parent_ids: + - 4aca7b5a8bd82826d4c6e41dcafcb52d72696d3a + message: | + chore: update changelog to cope with submodule tags + + Backports: #1421 + Signed-off-by: Hank Donnay + (cherry picked from commit 74210ca1bea67f0369bc9b26d40354410317627b) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1636134774, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 180fa4f444ff0affcc6ee9dd4436f97ea75c2dab + parent_ids: + - a478ce918566144d58b50ffb892cb33a11992036 + message: | + chore: bump claircore to v1.5.16 + + Update claircore dep to latest tag. + + Signed-off-by: crozzy + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1692045611, offset: -420, sign: 45 } }' + target: + id: 0729ad2a36721f9fe99196370a9f3cd31fdbd4b7 + parent_ids: + - 25ab0f4e01ac08870b6e8fe9cccc134a011a4f4f + message: | + chore: bump claircore to v1.5.16 + + Update claircore dep to latest tag. + + Signed-off-by: crozzy + (cherry picked from commit 180fa4f444ff0affcc6ee9dd4436f97ea75c2dab) + author: crozzy + committer: Joseph Crosland + time: 'Time { raw: git_time { time: 1692048676, offset: -420, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: bd8160c908a6ec27420144c5c39397c8e866bd97 + parent_ids: + - 28647ba13fd38162ebdbc2f878db2691431f197a + message: | + go.mod: update go-containerregistry + + The previous version was causing MVS to pick incompatible versions of + github.com/docker/docker and github.com/opencontainers/runc. + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1635518424, offset: -300, sign: 45 } }' + target: + id: f8dff8b8691ad58508457b2022425a7c533fca3c + parent_ids: + - 6ddf86205e558df705e0f21dd12c582a67566b3d + message: | + go.mod: update go-containerregistry + + The previous version was causing MVS to pick incompatible versions of + github.com/docker/docker and github.com/opencontainers/runc. + + Backports: #1407 + Signed-off-by: Hank Donnay + (cherry picked from commit bd8160c908a6ec27420144c5c39397c8e866bd97) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1635520706, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b3e490dbc1022a5d08f7ceef8565928fdd19be98 + parent_ids: + - 5354f1073cf073202c7c98388c13abc6b6743d08 + message: | + services: disable transport compression in matcher + + Some servers (e.g. Github) return weak HTTP validators when the + Content-Encoding is not "identity". This disables automatically + negotiating compression, which breaks Updaters' conditional requests. + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1623779760, offset: -300, sign: 45 } }' + target: + id: bc60dcc29d4440636f68911e79b08b188ac8e81e + parent_ids: + - 6528f738a2a0cf303625040459d5590050f75294 + message: | + services: disable transport compression in matcher + + Some servers (e.g. Github) return weak HTTP validators when the + Content-Encoding is not "identity". This disables automatically + negotiating compression, which breaks Updaters' conditional requests. + + Signed-off-by: Hank Donnay + (cherry picked from commit b3e490dbc1022a5d08f7ceef8565928fdd19be98) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1623787777, offset: -300, sign: 45 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 472e70b6c4c6a65f7afd5ef4ecd2c4d722578ba5 + parent_ids: + - f0d6a35763d5589f17c1a40dca5e155188a79b1e + message: | + webhook: clone headers on request + + Signed-off-by: Hank Donnay + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1651671969, offset: -300, sign: 45 } }' + target: + id: edc65d667261fbc08d54bbc4057151f47ce6d4b7 + parent_ids: + - 2a4694bff671a9e41c3c5c5c77eb1a53afebf971 + message: | + webhook: clone headers on request + + Backports: #1557 + Signed-off-by: Hank Donnay + (cherry picked from commit 472e70b6c4c6a65f7afd5ef4ecd2c4d722578ba5) + author: Hank Donnay + committer: Hank Donnay + time: 'Time { raw: git_time { time: 1654200331, offset: -300, sign: 45 } }' + is_trivial: true diff --git a/dataset/mined-cherries-verification/JavaScript_knex_knex.yaml b/dataset/mined-cherries-verification/JavaScript_knex_knex.yaml new file mode 100644 index 00000000..bb60c850 --- /dev/null +++ b/dataset/mined-cherries-verification/JavaScript_knex_knex.yaml @@ -0,0 +1,465 @@ +- total_number_of_results: '19' + language: JavaScript + total_number_of_branches: '31' + repo_name: knex/knex + total_number_of_committers: '182' + total_number_of_commits: '2787' +- - search_method: MessageScan + cherry_and_target: + cherry: + id: a8a074bed46cf5c3779e7e08885355ec2ffa70a7 + parent_ids: + - 82361d12c1383fdb5ecd8ff9fe72d2b9f1d8763c + message: |+ + Add types for `.distinctOn` (#3784) + + author: Victor Andrée + committer: GitHub + time: 'Time { raw: git_time { time: 1586970102, offset: 120, sign: 43 } }' + target: + id: ef3ba0170403795fe93abf3a5fa386fe1b7f6aaa + parent_ids: + - ce20afdf18e30eb8fea998dda44e56b6d69020b5 + message: | + Add types for `.distinctOn` (#3784) + + (cherry picked from commit a8a074bed46cf5c3779e7e08885355ec2ffa70a7) + author: Victor Andrée + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586990911, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: e552fbda78ba9e84c58ca15e5901f0941a15b087 + parent_ids: + - 078b749892f30d445292257bc6ecc61ae6abf7fc + message: |+ + Make protocol length check more defensive (#3744) + + author: Igor Savin + committer: GitHub + time: 'Time { raw: git_time { time: 1584917502, offset: 60, sign: 43 } }' + target: + id: 7c38d2be7d2451cd84b5b0e251149f30e47966d4 + parent_ids: + - 73cd4344c6a3d5f166934701c9997937b6486de9 + message: | + Make protocol length check more defensive (#3744) + + (cherry picked from commit e552fbda78ba9e84c58ca15e5901f0941a15b087) + author: Igor Savin + committer: Igor Savin + time: 'Time { raw: git_time { time: 1584917647, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 540ab8c020e572581b1db9898b1e21ba1da32923 + parent_ids: + - f2f858111c6f5641ee2cc5e94d380454e418a1aa + message: |+ + Insert lock row fix during migration (#4865) + + author: Olivier Cavadenti + committer: GitHub + time: 'Time { raw: git_time { time: 1638315678, offset: 60, sign: 43 } }' + target: + id: 6ef46450f69b291eff6b26bca1427f8f4c12f817 + parent_ids: + - 54934babbee0031485d8c7543080b9bfb6b46500 + message: |- + Insert lock row fix during migration (#4865) + + (cherry picked from commit 540ab8c020e572581b1db9898b1e21ba1da32923) + author: Olivier Cavadenti + committer: GitHub + time: 'Time { raw: git_time { time: 1638523326, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1fcc40d86860844075408622bde7bac07af437bd + parent_ids: + - 7568ab51c4d6cc176cc9dcf86ae616545f705ac6 + message: |+ + `interface Knex` and `function Knex` should have the same types (#3787) + + author: Tanguy Krotoff + committer: GitHub + time: 'Time { raw: git_time { time: 1586715147, offset: 120, sign: 43 } }' + target: + id: 142807c973390f581f136e74f8d883a32c1d31eb + parent_ids: + - 8961b21b0598bfc35ac3d8cf19bd348411ba8c26 + message: | + `interface Knex` and `function Knex` should have the same types (#3787) + + (cherry picked from commit 1fcc40d86860844075408622bde7bac07af437bd) + author: Tanguy Krotoff + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586792209, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 336b41e148c07174b68c2f2d81097628872ab400 + parent_ids: + - ea92e94f023e27cb8674ad8aaa4d13687277c011 + message: |+ + Add clearGroup method (#3771) + + author: Edvaldo Szymonek + committer: GitHub + time: 'Time { raw: git_time { time: 1586732991, offset: 120, sign: 43 } }' + target: + id: 6111ccccce7bb4caaa045d266d4e31840594f404 + parent_ids: + - 142807c973390f581f136e74f8d883a32c1d31eb + message: | + Add clearGroup method (#3771) + + (cherry picked from commit 336b41e148c07174b68c2f2d81097628872ab400) + author: Edvaldo Szymonek + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586792209, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 89bd0a034fc22413a1f8328bb09193d0a1b5d113 + parent_ids: + - fdad316f01b15a25276effb30c304abdbd4e88ee + message: |+ + Mysql2 validate connection fix #4794 (#4812) + + author: Olivier Cavadenti + committer: GitHub + time: 'Time { raw: git_time { time: 1636405954, offset: 60, sign: 43 } }' + target: + id: 337178fbff78bfd8a138138a93266d88edab6c5e + parent_ids: + - 29ac476b2bfe7593118da2884fae23a2e6004414 + message: | + Mysql2 validate connection fix #4794 (#4812) + + (cherry picked from commit 89bd0a034fc22413a1f8328bb09193d0a1b5d113) + author: Olivier Cavadenti + committer: Olivier Cavadenti + time: 'Time { raw: git_time { time: 1636406208, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 82361d12c1383fdb5ecd8ff9fe72d2b9f1d8763c + parent_ids: + - 336b41e148c07174b68c2f2d81097628872ab400 + message: |+ + Fix minor issues around typings (#3765) + + author: Lorefnon + committer: GitHub + time: 'Time { raw: git_time { time: 1586733096, offset: 120, sign: 43 } }' + target: + id: c4c527a4408d0716c5a32c004b0b9605b646154b + parent_ids: + - 6111ccccce7bb4caaa045d266d4e31840594f404 + message: | + Fix minor issues around typings (#3765) + + (cherry picked from commit 82361d12c1383fdb5ecd8ff9fe72d2b9f1d8763c) + author: Lorefnon + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586792209, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: ec2351b7a89ec39e8ff2112bc608ccc84d186a7a + parent_ids: + - 74501e612ad7434cc4e6020a07e71a74f75ec155 + message: |+ + Minor test internal enhancements (#3747) + + author: Kabir Baidhya + committer: GitHub + time: 'Time { raw: git_time { time: 1584997741, offset: 60, sign: 43 } }' + target: + id: f2d513edba19f9b74ac8dfded045e79a2b48416d + parent_ids: + - a81f40af83737df40ea3bed8d54e0e47bf4e4464 + message: | + Minor test internal enhancements (#3747) + + (cherry picked from commit ec2351b7a89ec39e8ff2112bc608ccc84d186a7a) + author: Kabir Baidhya + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586732941, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 078b749892f30d445292257bc6ecc61ae6abf7fc + parent_ids: + - a7031c3cef8910daa7e7f2ae03ae6ce527f76057 + message: |+ + Add unit tests for escape utility functions (#3742) + + author: Kabir Baidhya + committer: GitHub + time: 'Time { raw: git_time { time: 1584899091, offset: 60, sign: 43 } }' + target: + id: 73cd4344c6a3d5f166934701c9997937b6486de9 + parent_ids: + - 935687ef65f05075e974f8cd599df67e88ac59d8 + message: | + Add unit tests for escape utility functions (#3742) + + (cherry picked from commit 078b749892f30d445292257bc6ecc61ae6abf7fc) + author: Kabir Baidhya + committer: Igor Savin + time: 'Time { raw: git_time { time: 1584917647, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: c35a66d31bcd32e301b3a15d55a1f38e5fc7d0aa + parent_ids: + - 2fbe91da1a12c2db89def8a223ab59f06b3a33f3 + message: |- + Update signature of orderBy to support QueryBuilder inside array (#3757) + + Fixes #3738 + author: Lorefnon + committer: GitHub + time: 'Time { raw: git_time { time: 1585170758, offset: 60, sign: 43 } }' + target: + id: 1888a7c40cb0bf6b8bee7fc1c1577fbdc6593697 + parent_ids: + - 474968bfb7437e8ae6f7dafb01afb2315fb4f63e + message: | + Update signature of orderBy to support QueryBuilder inside array (#3757) + + Fixes #3738 + + (cherry picked from commit c35a66d31bcd32e301b3a15d55a1f38e5fc7d0aa) + author: Lorefnon + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586732941, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 998c92ca97c323c7e7c1cb8788dd4c5e4ac3e6bd + parent_ids: + - e7e7a07ce90f73a29faabc6c094cd0b114c32622 + message: |+ + Typescript: Makes the ChainableInterface conform to Promise (#3724) + + author: Florent Vilmart <364568+flovilmart@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1584656397, offset: 60, sign: 43 } }' + target: + id: 935687ef65f05075e974f8cd599df67e88ac59d8 + parent_ids: + - 90eac8f0da1e71041b9bf98fa1a3c6bba852d521 + message: | + Typescript: Makes the ChainableInterface conform to Promise (#3724) + + (cherry picked from commit 998c92ca97c323c7e7c1cb8788dd4c5e4ac3e6bd) + author: Florent Vilmart <364568+flovilmart@users.noreply.github.com> + committer: Igor Savin + time: 'Time { raw: git_time { time: 1584917646, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 8d5715a03accdd9331584c336663eb6489f1cdf4 + parent_ids: + - a8a074bed46cf5c3779e7e08885355ec2ffa70a7 + message: |+ + Fix: Support for `.finally(..)` on knex's Promise-alikes (#3800) + + author: Brian Lauber + committer: GitHub + time: 'Time { raw: git_time { time: 1586977540, offset: 120, sign: 43 } }' + target: + id: 46b6f5c5d46df7a10bdbe7bd498cde8dc9c6ab79 + parent_ids: + - ef3ba0170403795fe93abf3a5fa386fe1b7f6aaa + message: | + Fix: Support for `.finally(..)` on knex's Promise-alikes (#3800) + + (cherry picked from commit 8d5715a03accdd9331584c336663eb6489f1cdf4) + author: Brian Lauber + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586990911, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2fbe91da1a12c2db89def8a223ab59f06b3a33f3 + parent_ids: + - 3df39aa48ecf7f3e0cf1d70838735d7fefd2dbad + message: |+ + Add typings for MigrationSource (#3756) + + author: Lorefnon + committer: GitHub + time: 'Time { raw: git_time { time: 1585156958, offset: 60, sign: 43 } }' + target: + id: 474968bfb7437e8ae6f7dafb01afb2315fb4f63e + parent_ids: + - efca60515474bc8d6d9ea2ea5c74f3ddbd5ce7a0 + message: | + Add typings for MigrationSource (#3756) + + (cherry picked from commit 2fbe91da1a12c2db89def8a223ab59f06b3a33f3) + author: Lorefnon + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586732941, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 74501e612ad7434cc4e6020a07e71a74f75ec155 + parent_ids: + - e552fbda78ba9e84c58ca15e5901f0941a15b087 + message: |+ + typings: support Raw types for insert, where, update (#3730) + + author: maximelkin + committer: GitHub + time: 'Time { raw: git_time { time: 1584995174, offset: 60, sign: 43 } }' + target: + id: a81f40af83737df40ea3bed8d54e0e47bf4e4464 + parent_ids: + - 707749851bccbeaf4d24eed95198f093b970a93b + message: | + typings: support Raw types for insert, where, update (#3730) + + (cherry picked from commit 74501e612ad7434cc4e6020a07e71a74f75ec155) + author: maximelkin + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586732941, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 41d02ba5c1a3cde7eebb9442e30d4a75abe2df34 + parent_ids: + - a6551559f07bcb91be7a66fd8d3ae3ede3ba15bf + message: |+ + CLI: adds support for asynchronous knexfile loading (#3748) + + author: Florent Vilmart <364568+flovilmart@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1585066025, offset: 60, sign: 43 } }' + target: + id: efca60515474bc8d6d9ea2ea5c74f3ddbd5ce7a0 + parent_ids: + - 7575946a1066f71c5a4d84cb983cc07b05407394 + message: | + CLI: adds support for asynchronous knexfile loading (#3748) + + (cherry picked from commit 41d02ba5c1a3cde7eebb9442e30d4a75abe2df34) + author: Florent Vilmart <364568+flovilmart@users.noreply.github.com> + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586732941, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 72aaf6c6e176e74cf445c722364a23d9f255569e + parent_ids: + - c35a66d31bcd32e301b3a15d55a1f38e5fc7d0aa + message: |- + Add toSQL and toString to SchemaBuilder (#3758) + + Fixes #3751 + author: Lorefnon + committer: GitHub + time: 'Time { raw: git_time { time: 1585170816, offset: 60, sign: 43 } }' + target: + id: 9e9d3c5162d041e43b3f0290b0a70158fb5bfafc + parent_ids: + - 1888a7c40cb0bf6b8bee7fc1c1577fbdc6593697 + message: | + Add toSQL and toString to SchemaBuilder (#3758) + + Fixes #3751 + + (cherry picked from commit 72aaf6c6e176e74cf445c722364a23d9f255569e) + author: Lorefnon + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586732941, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: a6551559f07bcb91be7a66fd8d3ae3ede3ba15bf + parent_ids: + - ec2351b7a89ec39e8ff2112bc608ccc84d186a7a + message: |+ + Minor improvements on the usage of fs utilities - unify all the fs functions into the same util/fs to simplify things (#3749) + + author: Kabir Baidhya + committer: GitHub + time: 'Time { raw: git_time { time: 1585061447, offset: 60, sign: 43 } }' + target: + id: 7575946a1066f71c5a4d84cb983cc07b05407394 + parent_ids: + - f2d513edba19f9b74ac8dfded045e79a2b48416d + message: | + Minor improvements on the usage of fs utilities - unify all the fs functions into the same util/fs to simplify things (#3749) + + (cherry picked from commit a6551559f07bcb91be7a66fd8d3ae3ede3ba15bf) + author: Kabir Baidhya + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586732941, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 7568ab51c4d6cc176cc9dcf86ae616545f705ac6 + parent_ids: + - 72aaf6c6e176e74cf445c722364a23d9f255569e + message: |+ + Split tests in groups (#3785) + + author: Igor Savin + committer: GitHub + time: 'Time { raw: git_time { time: 1586287660, offset: 120, sign: 43 } }' + target: + id: 8961b21b0598bfc35ac3d8cf19bd348411ba8c26 + parent_ids: + - 9e9d3c5162d041e43b3f0290b0a70158fb5bfafc + message: | + Split tests in groups (#3785) + + (cherry picked from commit 7568ab51c4d6cc176cc9dcf86ae616545f705ac6) + author: Igor Savin + committer: Igor Savin + time: 'Time { raw: git_time { time: 1586792209, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 998c92ca97c323c7e7c1cb8788dd4c5e4ac3e6bd + parent_ids: + - e7e7a07ce90f73a29faabc6c094cd0b114c32622 + message: |+ + Typescript: Makes the ChainableInterface conform to Promise (#3724) + + author: Florent Vilmart <364568+flovilmart@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1584656397, offset: 60, sign: 43 } }' + target: + id: 0d775cff2980257fe3419545ec81d80355214e0a + parent_ids: + - 90eac8f0da1e71041b9bf98fa1a3c6bba852d521 + message: "Cherry-pick fixes from master (#3745)\n\n* Typescript: Makes the ChainableInterface conform to Promise (#3724)\r\n\r\n(cherry picked from commit 998c92ca97c323c7e7c1cb8788dd4c5e4ac3e6bd)\r\n\r\n* Add unit tests for escape utility functions (#3742)\r\n\r\n(cherry picked from commit 078b749892f30d445292257bc6ecc61ae6abf7fc)\r\n\r\n* Make protocol length check more defensive (#3744)\r\n\r\n(cherry picked from commit e552fbda78ba9e84c58ca15e5901f0941a15b087)\r\n\r\nCo-authored-by: Florent Vilmart <364568+flovilmart@users.noreply.github.com>\r\nCo-authored-by: Kabir Baidhya " + author: Igor Savin + committer: GitHub + time: 'Time { raw: git_time { time: 1584921238, offset: 60, sign: 43 } }' + is_trivial: false diff --git a/dataset/mined-cherries-verification/Java_neo4j_neo4j.yaml b/dataset/mined-cherries-verification/Java_neo4j_neo4j.yaml new file mode 100644 index 00000000..674aab3f --- /dev/null +++ b/dataset/mined-cherries-verification/Java_neo4j_neo4j.yaml @@ -0,0 +1,1213 @@ +- total_number_of_results: '44' + repo_name: neo4j/neo4j + language: Java + total_number_of_commits: '69684' + total_number_of_branches: '52' + total_number_of_committers: '342' +- - search_method: MessageScan + cherry_and_target: + cherry: + id: 291f7be09ccfc561a1de3dbda4d18ecc43ce8094 + parent_ids: + - 1e38511eda0ae9f77a67495d8278d50ff6d73adf + message: | + Uses a DEV_NULL StringLogger instead of SYSTEM + + due to, most likely, a problem with the test or even SubProcess that the + piping of System.out/System.err from the sub process where those buffers + fill up in the sub process since the parent process doesn't pull that + data. When those buffers have filled up, then any write to its System.xxx + will block awaiting the buffer to drain, which it doesn't. + + The current fix is just to not use System.out in this test, however a + future fix should be to look at and fix the issue in SubProcess itself. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1391165395, offset: 60, sign: 43 } }' + target: + id: 2b4e821f2a6a8ceed225ef13d7dde517dbae3dca + parent_ids: + - d9a5529a68a2ec455d76afbc1a116ed16ec4559c + message: | + Uses a DEV_NULL StringLogger instead of SYSTEM + + due to, most likely, a problem with the test or even SubProcess that the + piping of System.out/System.err from the sub process where those buffers + fill up in the sub process since the parent process doesn't pull that + data. When those buffers have filled up, then any write to its System.xxx + will block awaiting the buffer to drain, which it doesn't. + + The current fix is just to not use System.out in this test, however a + future fix should be to look at and fix the issue in SubProcess itself. + + (cherry picked from commit 291f7be09ccfc561a1de3dbda4d18ecc43ce8094) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1391165869, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 6e351f7318cb75d6a17b1d7f7cd1d401cd0ca551 + parent_ids: + - 69ec08cc8ca2188348a06e17297705351d31b58a + message: | + Parallelizes calculation of dense nodes in import tool + + by radix splitting batches by node id so that each batch (i.e. each + thread processing it) is guaranteed to not touch any same item in cache as + any other thread. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1426628203, offset: 60, sign: 43 } }' + target: + id: d2bcc565c2e658ee88a9dd27280ed9fcbefe0ef0 + parent_ids: + - e3d2a0734ff581cf81b5229a6a0b8956fc566925 + message: | + Parallelizes calculation of dense nodes in import tool + + by radix splitting batches by node id so that each batch (i.e. each + thread processing it) is guaranteed to not touch any same item in cache as + any other thread. + + (cherry picked from commit 6e351f7318cb75d6a17b1d7f7cd1d401cd0ca551) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428572970, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 39be5a3a284c8be60cf058482863b7436d09351b + parent_ids: + - ab2f26ebd31d522c705ee68ea6d5a2f515513b09 + message: | + Prevents local slave lock acquisition waiting after granted on master + + A slave in an HA cluster acquires locks by first acquiring it on its + master, and if granted go and acquire it locally. Given that the master is + the authorative instances w/ regards to locks in the cluster it is + expected that acquiring a lock that the master just granted should not + have to await any condition - it's expected to be free to acquire. However + that is apparently not the case since sometimes the slave comes back from + the master having just acquired it and it goes and blocks, awaiting the + lock to be free since some other transactions holds it presumably. + + The problem described above in many cases results in such a lock being + stuck forever on the master, where the only way to release it would be to + restart that database. + + This commit doesn't fix the issue that is the confusion that the master is + not the single authorative entity of locks. The fix here is to prevent a + local lock acquisition, after master acquired such, to go and wait for that + lock to be free locally. If that would happen then it will not wait, but + instead throw a DeadlockDetectedException (actually a subclass thereof) + containing diagnostics about which locks are present in the local lock + manager. These deadlock detected exceptions will walk and quack the same + way a "real" such deadlock and a normal retry of the transaction will put + things right again. + + The root cause will have to be fixed, but having a few more deadlock + detected exceptions thrown to the client instead of leaving unreleasable + locks on the master behind is a massive improvement. + + For reference, this issue caused many unexpected BlockingReadTimeoutExceptions + to be thrown, exceptions of which there should be much less of after this commit. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1384961606, offset: 60, sign: 43 } }' + target: + id: d3bf6bb2308bf4e209f55ad301a3c4a197887e77 + parent_ids: + - e8b6ea169ad850f4fd84259413cec6addd678ef7 + message: "Prevents local slave lock acquisition waiting after granted on master\n\nA slave in an HA cluster acquires locks by first acquiring it on its\nmaster, and if granted go and acquire it locally. Given that the master is\nthe authorative instances w/ regards to locks in the cluster it is\nexpected that acquiring a lock that the master just granted should not\nhave to await any condition - it's expected to be free to acquire. However\nthat is apparently not the case since sometimes the slave comes back from\nthe master having just acquired it and it goes and blocks, awaiting the\nlock to be free since some other transactions holds it presumably.\n\nThe problem described above in many cases results in such a lock being\nstuck forever on the master, where the only way to release it would be to\nrestart that database.\n\nThis commit doesn't fix the issue that is the confusion that the master is\nnot the single authorative entity of locks. The fix here is to prevent a\nlocal lock acquisition, after master acquired such, to go and wait for that\nlock to be free locally. If that would happen then it will not wait, but\ninstead throw a DeadlockDetectedException (actually a subclass thereof)\ncontaining diagnostics about which locks are present in the local lock\nmanager. These deadlock detected exceptions will walk and quack the same\nway a \"real\" such deadlock and a normal retry of the transaction will put\nthings right again.\n\nThe root cause will have to be fixed, but having a few more deadlock\ndetected exceptions thrown to the client instead of leaving unreleasable\nlocks on the master behind is a massive improvement.\n\nFor reference, this issue caused many unexpected BlockingReadTimeoutExceptions\nto be thrown, exceptions of which there should be much less of after this commit.\n\n(cherry picked from commit 39be5a3a284c8be60cf058482863b7436d09351b)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/transaction/LockManager.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/transaction/LockManagerImpl.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/transaction/RWLock.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/LockManagerModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/SlaveLockManager.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1385985427, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 44c3e320330ecd71b687268c35ea7b436b553fbb + parent_ids: + - 4eb84e8507fe6e663132b5d4cfc0c6258ebeece4 + message: | + Cope with version check of very short store files. + + Fixes a bug where the server would be unable to start if specific store files were + very short and where the database had not been shutdown cleanly. + author: Mark Needham + committer: Mark Needham + time: 'Time { raw: git_time { time: 1386863628, offset: 0, sign: 43 } }' + target: + id: d9451eebf53ea3820514c2111003c5059b089d84 + parent_ids: + - a773a802681dcda5d059c17db520a52d66d140f0 + message: "Cope with version check of very short store files.\n\nFixes a bug where the server would be unable to start if specific store files were\nvery short and where the database had not been shutdown cleanly.\n\n(cherry picked from commit 44c3e320330ecd71b687268c35ea7b436b553fbb)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/nioneo/store/StoreFactory.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/storemigration/CurrentDatabase.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/storemigration/UpgradableDatabase.java\n\tcommunity/kernel/src/test/java/org/neo4j/kernel/impl/storemigration/StoreUpgraderInterruptionTestIT.java\n\tcommunity/kernel/src/test/java/org/neo4j/kernel/impl/storemigration/StoreUpgraderTestIT.java\n" + author: Mark Needham + committer: Chris Leishman + time: 'Time { raw: git_time { time: 1390314578, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 0a5eb2aba58c9caf5061d88d20818157011a9add + parent_ids: + - 153ad6293a0879b61aafbf1b6542b4d32c450e0c + message: | + Completes the change where FileLock throws IOException or returns an instance, i.e. stops using null for failure signal + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386064496, offset: 60, sign: 43 } }' + target: + id: 0d4213e93567ee41db650eacda3a4cab666c4661 + parent_ids: + - fcecb93836f0c7754bfb085a0da98ab91833a4bf + message: "Completes the change where FileLock throws IOException or returns an instance, i.e. stops using null for failure signal\n\n(cherry picked from commit 0a5eb2aba58c9caf5061d88d20818157011a9add)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/nioneo/store/CommonAbstractStore.java\n\tcommunity/kernel/src/test/java/org/neo4j/kernel/StoreLockerTest.java\n\tcommunity/kernel/src/test/java/org/neo4j/kernel/impl/nioneo/store/TestOsSpecificLocks.java\n\tcommunity/kernel/src/test/java/org/neo4j/unsafe/batchinsert/BatchInserterImplTest.java\n\tenterprise/cluster/src/test/java/org/neo4j/cluster/protocol/heartbeat/HeartbeatContextTest.java\n\tenterprise/cluster/src/test/java/org/neo4j/cluster/protocol/heartbeat/HeartbeatStateTest.java\n\tenterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcherTest.java\n" + author: Mattias Persson + committer: Chris Leishman + time: 'Time { raw: git_time { time: 1390314578, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: e87c2a855d5439659c0b1951cc79c7d3c1749001 + parent_ids: + - 90d130a08e1c68297cf1a9224805810d3cc423cb + message: | + Uses a snapshot of the reference to Master instead of a proxy + + so that the master cannot change all of a sudden in the middle of + operations. This fixes a problem where a service which was instantiated + for one specific master continued to run for a different master after a + sudden master switch. Specifically this resulted in misuse of id ranges, + potentially received from a different master than expected. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387187205, offset: 60, sign: 43 } }' + target: + id: 4252a4057d96c21ac4946c2884c96261b9abc627 + parent_ids: + - 317d80f8d562328f3dddb3fb6f79e22b8f038b6d + message: "Uses a snapshot of the reference to Master instead of a proxy\n\nso that the master cannot change all of a sudden in the middle of\noperations. This fixes a problem where a service which was instantiated\nfor one specific master continued to run for a different master after a\nsudden master switch. Specifically this resulted in misuse of id ranges,\npotentially received from a different master than expected.\n\n(cherry picked from commit e87c2a855d5439659c0b1951cc79c7d3c1749001)\n\nConflicts:\n\tcommunity/kernel/src/test/java/org/neo4j/test/OtherThreadExecutor.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/HighlyAvailableGraphDatabase.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/LabelTokenCreatorModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/PropertyKeyCreatorModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/RelationshipTypeCreatorModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/LockManagerModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/transaction/TxHookModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/transaction/TxIdGeneratorModeSwitcher.java\n\tenterprise/ha/src/test/java/org/neo4j/ha/TransactionConstraintsIT.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387487218, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 45a16ed812a2694b1b5dba589c630425f8b3db57 + parent_ids: + - 5ecf9c66676d513a3ae6b320cfc41f5bbfe9f360 + message: | + Fixes #1697 + + Fixes #1697 which is an issue where replacing a property on an entity + using batch inserter where the property would be bigger than the one it + overwrote so that it would need to go into a new property record. There + would be a NullPointerException due to removing that property block from + the property record two times. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387199272, offset: 60, sign: 43 } }' + target: + id: c100d9cd72f118270a4e3f3e8085e6b2858a0966 + parent_ids: + - a2cccfb63704ff40d72c5316efba6fbdeb638571 + message: "Fixes #1697\n\nFixes #1697 which is an issue where replacing a property on an entity\nusing batch inserter where the property would be bigger than the one it\noverwrote so that it would need to go into a new property record. There\nwould be a NullPointerException due to removing that property block from\nthe property record two times.\n\n(cherry picked from commit 45a16ed812a2694b1b5dba589c630425f8b3db57)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/unsafe/batchinsert/BatchInserterImpl.java\n\tcommunity/kernel/src/test/java/org/neo4j/unsafe/batchinsert/TestBatchInsert.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387499346, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 8b01cb463967388a1d69bc726a45b2e754a9956e + parent_ids: + - f4ab0f8d42f3dc6cd594b43b48baff8ca831461a + message: | + This will use the embedded jgit instead of whatever git happens to be available in your environment. + + "As rule of thumb - stay on jgit (keep this false) until you notice performance problems." + author: Mark Needham + committer: Mark Needham + time: 'Time { raw: git_time { time: 1448311548, offset: 0, sign: 43 } }' + target: + id: 6bcc2e5ad97a37a72804220619950648c0439994 + parent_ids: + - 5a42c855c92fc3de58c06a9757265e3f6d8be877 + message: | + This will use the embedded jgit instead of whatever git happens to be available in your environment. + + "As rule of thumb - stay on jgit (keep this false) until you notice performance problems." + + (cherry picked from commit 8b01cb463967388a1d69bc726a45b2e754a9956e) + author: Mark Needham + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1449213835, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 9c34b48a20d34c2aaf837d5e6394e547130b39f0 + parent_ids: + - dcc72fa83e0f3742c9ddebaad625254b9f3c9404 + message: | + Removes a test that isn't complete and has been ignored over a year + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386776951, offset: 60, sign: 43 } }' + target: + id: 9c846b5c82f269a824ee97409f80ac2ff6a43267 + parent_ids: + - 4203296c55ad2a639baffe6329b55b1a811b5614 + message: | + Removes a test that isn't complete and has been ignored over a year + + (cherry picked from commit 9c34b48a20d34c2aaf837d5e6394e547130b39f0) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387467809, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: a56eb15e7b59f2fdb838d9e783f283ccb87dae6c + parent_ids: + - b46f14326a056c8a4350023a545f6301402e0d34 + message: | + Import tool has ability to skip duplicate nodes + + rather nodes that have the same input ids in the same group. This ability + is controlled with `--skip-duplicate-nodes<=true/false>`, whereas the + skipping of bad relationships `--skip-bad-relationships<=true/false>`. All + and any allowed bad entities (both relationships and duplicate nodes) are + collected and controlled using `--bad` and `--bad-tolerance`. + + Duplicate nodes are detected after node stage, when preparing IdMapper + and deleted as a side-effect in node->relationship stage. For this the + handling of not-in-use records is controlled using inUse record flag as + opposed to null record due to a detail in the BatchingPageCache. It's + generally better to not leave a record to chance, rather write it as inUse + or not. + author: Mattias Persson + committer: Mark Needham + time: 'Time { raw: git_time { time: 1427962805, offset: 60, sign: 43 } }' + target: + id: f79caebfa4a848c0cdfc9d7703ae3a2424e744dd + parent_ids: + - e38dab9af179f6b519d2a6f2f74cdb62575a8dde + message: "Import tool has ability to skip duplicate nodes\n\nrather nodes that have the same input ids in the same group. This ability\nis controlled with `--skip-duplicate-nodes<=true/false>`, whereas the\nskipping of bad relationships `--skip-bad-relationships<=true/false>`. All\nand any allowed bad entities (both relationships and duplicate nodes) are\ncollected and controlled using `--bad` and `--bad-tolerance`.\n\nDuplicate nodes are detected after node stage, when preparing IdMapper\nand deleted as a side-effect in node->relationship stage. For this the\nhandling of not-in-use records is controlled using inUse record flag as\nopposed to null record due to a detail in the BatchingPageCache. It's\ngenerally better to not leave a record to chance, rather write it as inUse\nor not.\n\n(cherry picked from commit a56eb15e7b59f2fdb838d9e783f283ccb87dae6c)\n\nConflicts:\n\tcommunity/import-tool/src/main/java/org/neo4j/tooling/ImportTool.java\n\tcommunity/import-tool/src/test/java/org/neo4j/tooling/ImportToolDocIT.java\n\tcommunity/import-tool/src/test/java/org/neo4j/tooling/ImportToolTest.java\n\tcommunity/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/ParallelBatchImporter.java\n\tcommunity/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/RelationshipEncoderStep.java\n\tcommunity/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/cache/idmapping/string/EncodingIdMapper.java\n\tcommunity/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/input/Group.java\n\tcommunity/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/input/Inputs.java\n\tcommunity/kernel/src/test/java/org/neo4j/unsafe/impl/batchimport/cache/idmapping/string/EncodingIdMapperTest.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1429620117, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 06da3315ccfa8aec43266234db2fede07a8424ae + parent_ids: + - 4304c9fc628648ee326c85a01a5e045b0e0d263a + message: | + Enables sending batches at different pace than processing + + ProcessorStep used to have the invariant that one received/processed + batch resulted in one batch sent downstream. This made certain processing + impossible where the items in each batch were split up by a certain + criteria and sent downstream when batch size reached per part. + + This commit changes that so that instead of returning a batch object to + send downstream as part of processing it, processing has access to a + batch sender allowing for more flexible sending downstream. + + As part of doing this, TaskExecutor got a generic parameter for a + thread-local state and using its own Task instead of Callable. + + Also cleaning up configuration of steps into a Configuration instead of + passing in the individual configuration into all step constructors. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1426628061, offset: 60, sign: 43 } }' + target: + id: f9ff718dd908a83219f0f8bf4a7bc656bfdc7ab6 + parent_ids: + - aafdd8bb8d5be4b928edd8d672d54097f3f8dda9 + message: | + Enables sending batches at different pace than processing + + ProcessorStep used to have the invariant that one received/processed + batch resulted in one batch sent downstream. This made certain processing + impossible where the items in each batch were split up by a certain + criteria and sent downstream when batch size reached per part. + + This commit changes that so that instead of returning a batch object to + send downstream as part of processing it, processing has access to a + batch sender allowing for more flexible sending downstream. + + As part of doing this, TaskExecutor got a generic parameter for a + thread-local state and using its own Task instead of Callable. + + Also cleaning up configuration of steps into a Configuration instead of + passing in the individual configuration into all step constructors. + + (cherry picked from commit 06da3315ccfa8aec43266234db2fede07a8424ae) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428572840, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5581112812f76e96bf9f683d13f323e997c09b0e + parent_ids: + - 24260e7aff65a705aa2d22801ca871ee25e152bd + message: | + Throws more specific exceptions from TxManager#resume + + There are two possible failures that can happen in #resume: + (1) The current thread has already got another transaction associated with it + (2) The transaction to be resumed is already associated with another thread + + These two failures got their own exceptions: ThreadAssociatedWithOtherTransactionException and + TransactionAlreadyActiveException so that callers can react appropriately + to the two scenarios. Typically there's special handling of (2). Doing + this made UnableToResumeTransactionException redundant, and it was too + generic to begin with as it represented both (1) and (2). + + This commit also tidies up and corrects logging regarding old transactions + on the master which are to reaped, so less logging and more relevant + logging there. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1385042342, offset: 60, sign: 43 } }' + target: + id: 92919e65808099e711a032c48106da15dc6f99df + parent_ids: + - 5419c20cc12751478a396b6d6c8a502708f9ea3d + message: "Throws more specific exceptions from TxManager#resume\n\nThere are two possible failures that can happen in #resume:\n (1) The current thread has already got another transaction associated with it\n (2) The transaction to be resumed is already associated with another thread\n\nThese two failures got their own exceptions: ThreadAssociatedWithOtherTransactionException and\nTransactionAlreadyActiveException so that callers can react appropriately\nto the two scenarios. Typically there's special handling of (2). Doing\nthis made UnableToResumeTransactionException redundant, and it was too\ngeneric to begin with as it represented both (1) and (2).\n\nThis commit also tidies up and corrects logging regarding old transactions\non the master which are to reaped, so less logging and more relevant\nlogging there.\n\n(cherry picked from commit 5581112812f76e96bf9f683d13f323e997c09b0e)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/transaction/TxManager.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/MasterClient20.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/DefaultMasterImplSPI.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterServer.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386009470, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: b76f8e34f7184e1457e37f6da6137bfbde79c3e5 + parent_ids: + - 6deaa8d1740e012916c9f4d8cafe8dbaf6d68e38 + message: | + Master generates and propagates an epoch + + that slaves must use for all further communication with it, or fail with + InvalidEpochException. This is to prevent the scenario where one part of a + transaction or commit communicates with one master, there's a master + switch where the new master instance uses the same host and port as the old one. + If a transaction would continue through such a scenario then unexpected + things and bad things might happen. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1389181738, offset: 60, sign: 43 } }' + target: + id: e47e18cacb66a950f4ceafaa6cde0486e62690a0 + parent_ids: + - f8eb0c8dba4b2d52cb76081ee8a335eb4443c4a0 + message: "Master generates and propagates an epoch\n\nthat slaves must use for all further communication with it, or fail with\nInvalidEpochException. This is to prevent the scenario where one part of a\ntransaction or commit communicates with one master, there's a master\nswitch where the new master instance uses the same host and port as the old one.\nIf a transaction would continue through such a scenario then unexpected\nthings and bad things might happen.\n\n(cherry picked from commit b76f8e34f7184e1457e37f6da6137bfbde79c3e5)\n\nConflicts:\n\tcommunity/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneDocumentStructure.java\n\tcommunity/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneDocumentStructureTest.java\n\tenterprise/com/src/test/java/org/neo4j/com/MadeUpClient.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/HaRequestType20.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/HighlyAvailableGraphDatabase.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/MasterClient20.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcher.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/RequestContextFactory.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterServer.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/slave/MasterClient153.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/slave/MasterClientResolver.java\n\tenterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcherTest.java\n\tenterprise/ha/src/test/java/org/neo4j/kernel/ha/com/master/MasterImplTest.java\n\tenterprise/ha/src/test/java/org/neo4j/kernel/ha/id/HaIdGeneratorFactoryTest.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1389366377, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: ea33317a4c9cce0b4a7945a920806333e864827d + parent_ids: + - 54592b9b558a322c51bb4250882cbd10343a1717 + message: | + Ability to fixate dynamic NumberArray + + when it has done all it's dynamic growing. Fixating a dynamic NumberArray + returns a new instance with better performance, but one which cannot + dynamically grow anymore. + + ParallelBatchImporter now fixates the node->relationship cache after + relationship import for faster relationship->relationship stage. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1427184571, offset: 60, sign: 43 } }' + target: + id: 1eb61315d43f24f43740c36dacd6643ae49a653a + parent_ids: + - 7b29a29e473ea3df951d6ba066a1e4e9f80fb417 + message: | + Ability to fixate dynamic NumberArray + + when it has done all it's dynamic growing. Fixating a dynamic NumberArray + returns a new instance with better performance, but one which cannot + dynamically grow anymore. + + ParallelBatchImporter now fixates the node->relationship cache after + relationship import for faster relationship->relationship stage. + + (cherry picked from commit ea33317a4c9cce0b4a7945a920806333e864827d) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428573055, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: d3da2c164fd1c8d22e3a5d499c4775dad85bdb28 + parent_ids: + - 4203296c55ad2a639baffe6329b55b1a811b5614 + message: | + added status endpoint to indicate ability to process transactions, additional to the existing master/ slave endpoints + author: Lasse Westh-Nielsen + committer: Lasse Westh-Nielsen + time: 'Time { raw: git_time { time: 1387353704, offset: 60, sign: 43 } }' + target: + id: 2cf0b4661d99bb1c4860d93258d29207cfaa8e8f + parent_ids: + - d9451eebf53ea3820514c2111003c5059b089d84 + message: | + added status endpoint to indicate ability to process transactions, additional to the existing master/ slave endpoints + + (cherry picked from commit d3da2c164fd1c8d22e3a5d499c4775dad85bdb28) + author: Lasse Westh-Nielsen + committer: Chris Leishman + time: 'Time { raw: git_time { time: 1390314578, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: c8d4c5f5cb10c31c41d8046e38f35c9f97ed101a + parent_ids: + - abc75138c1643e66f462543b68528780eda27675 + message: | + Re-adds I/O monitor to writer step in importer + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428322413, offset: 120, sign: 43 } }' + target: + id: 95a4798ff8e157cd51c630b79725f4dbe3791f20 + parent_ids: + - c33520cebdd3ec6547aa55225b358701281706a5 + message: | + Re-adds I/O monitor to writer step in importer + + (cherry picked from commit c8d4c5f5cb10c31c41d8046e38f35c9f97ed101a) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428573268, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 6d39a48b91da5939e780a30d72db20ec20661ef9 + parent_ids: + - 218e00b1eebc4f8018e18907c92865b9a5606926 + message: | + Uses the full 16 bits for keeping the type id value + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1355841599, offset: 60, sign: 43 } }' + target: + id: 5daeba96757c7ec8e702d47ab8daf33feaae4522 + parent_ids: + - c44f040ffdc44dc21b14024ce74535dad0ac9b44 + message: | + Uses the full 16 bits for keeping the type id value + (cherry picked from commit 6d39a48b91da5939e780a30d72db20ec20661ef9) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1355860274, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 3bc742f2664c381d3076840e1f9ee53dcb922483 + parent_ids: + - c768496aaaba1c1fe9249f38e056baa5ff027dbd + message: | + Make sure multiline fields are enabled by default for LOAD CSV + author: Davide Grohmann + committer: Davide Grohmann + time: 'Time { raw: git_time { time: 1432740574, offset: 120, sign: 43 } }' + target: + id: 0e68a0393f87a55f17ff1e756ac6d5bca2617e43 + parent_ids: + - 85df2250786b7e615e69e3a09b6e1825443f52bc + message: | + Make sure multiline fields are enabled by default for LOAD CSV + + (cherry picked from commit 3bc742f2664c381d3076840e1f9ee53dcb922483) + author: Davide Grohmann + committer: Davide Grohmann + time: 'Time { raw: git_time { time: 1438604992, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: efda6177fd0596377758d465b2e1a134777af66f + parent_ids: + - 45293a05ca15b807f28db6bf019bc3be6be4c83d + message: | + Fixed a counting issues for trailing batches + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1427039204, offset: 60, sign: 43 } }' + target: + id: 7b29a29e473ea3df951d6ba066a1e4e9f80fb417 + parent_ids: + - d2bcc565c2e658ee88a9dd27280ed9fcbefe0ef0 + message: | + Fixed a counting issues for trailing batches + + (cherry picked from commit efda6177fd0596377758d465b2e1a134777af66f) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428573034, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 69ec08cc8ca2188348a06e17297705351d31b58a + parent_ids: + - 06da3315ccfa8aec43266234db2fede07a8424ae + message: | + Synchronized adding of chunks in DynamicNumberArray + + this to support concurrent writing to it, at least if touching different + items in the array. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1426628123, offset: 60, sign: 43 } }' + target: + id: e3d2a0734ff581cf81b5229a6a0b8956fc566925 + parent_ids: + - f9ff718dd908a83219f0f8bf4a7bc656bfdc7ab6 + message: | + Synchronized adding of chunks in DynamicNumberArray + + this to support concurrent writing to it, at least if touching different + items in the array. + + (cherry picked from commit 69ec08cc8ca2188348a06e17297705351d31b58a) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428572867, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1d93b20e29c22fb0ee49348a1d43a9333dafe438 + parent_ids: + - b43442fb63f0bb85d9fe2f10594634468e1129ef + message: | + Ability to specify max processors for each step + + which suits interaction with DynamicTaskExecutor better and also useful + for "calculate dense node stage", where batches are split up by radix (%10) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1427362548, offset: 60, sign: 43 } }' + target: + id: c33520cebdd3ec6547aa55225b358701281706a5 + parent_ids: + - 6763221c7d8c928fd60d6d4348c317abc4fd507e + message: | + Ability to specify max processors for each step + + which suits interaction with DynamicTaskExecutor better and also useful + for "calculate dense node stage", where batches are split up by radix (%10) + + (cherry picked from commit 1d93b20e29c22fb0ee49348a1d43a9333dafe438) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428573200, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4773dced901c38d289f51f4a0c376dbd5b6a3b92 + parent_ids: + - 0716dfc5561f1b8152273ee2a8c85a83d498dacd + message: | + Properly closes resources created by some tests + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1421760947, offset: 60, sign: 43 } }' + target: + id: d65e56be3541319ce70de1a36cb8cc45b49fc0f7 + parent_ids: + - 34e104f4aeb0311f863d9cb5cd3aad598e1d0ff5 + message: "Properly closes resources created by some tests\n\n(cherry picked from commit 4773dced901c38d289f51f4a0c376dbd5b6a3b92)\n\nConflicts:\n\tcommunity/kernel/src/test/java/org/neo4j/kernel/impl/index/TestIndexProviderStore.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1421780689, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 059bb13af0d492a95c68a5027e746ae07153bbd2 + parent_ids: + - a6d3fa1a2d35d51d13a92c571f7187e30e924d08 + message: | + Performance optimizations for best-first selector + + Mostly revolving around finding single paths. This gives a good + improvement when calling PathFinder#findSinglePath for those graph + algorithms using the traversal framework and more specifically the + BestFirstSelector. Currently this will benefit Dijkstra and A*, although + the default implementation of A* is a custom non-traversal-framework + version that only supports single paths anyways so already has got a + similar performance behaviour. Look at TraversalAStar for a A* version + that uses the traversal framework. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1382954415, offset: 60, sign: 43 } }' + target: + id: 3812f79a3090ef277cd73eb5f4f9c37d945fef30 + parent_ids: + - d218994cff7ea740c72ec9e45df700e8de239a10 + message: | + Performance optimizations for best-first selector + + Mostly revolving around finding single paths. This gives a good + improvement when calling PathFinder#findSinglePath for those graph + algorithms using the traversal framework and more specifically the + BestFirstSelector. Currently this will benefit Dijkstra and A*, although + the default implementation of A* is a custom non-traversal-framework + version that only supports single paths anyways so already has got a + similar performance behaviour. Look at TraversalAStar for a A* version + that uses the traversal framework. + + (cherry picked from commit 059bb13af0d492a95c68a5027e746ae07153bbd2) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1383657719, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 0b2e53301f1cf16f996894b48c6800a90963ce3d + parent_ids: + - bd965755fe82c91e5a3b4ab66435505e46ccf7fd + message: | + Proper naming of parameterized values + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1426326490, offset: 60, sign: 43 } }' + target: + id: d359f9e1bb98b98376eee9e29e912b6d8eb68940 + parent_ids: + - 712f808ccb5845ad314474c4ce7601896426285c + message: | + Proper naming of parameterized values + + (cherry picked from commit 0b2e53301f1cf16f996894b48c6800a90963ce3d) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1427291555, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: e9a88b91d0c59627a25f81d0b523ba44cea3890b + parent_ids: + - f7c09f6cc7d111b9b97d0ea51eac851ce5a1307a + message: | + Keeps explicit state whether a transaction is remotely initialize or not + + instead of relying on lock information (whether any lock was grabbed or + not). There was a problem where the "old" transaction state was checked + for locks and not the new one (the one in the Kernel API), or preferrably + both should have been checked. But that approach was error prone so an + explicit flag is better and safer. + + Also renamed TxHook --> RemoteTxHook because that name and its method + names were so misguiding that it slowed down reasoning around problems + where it was involved. + + The fix in this commit is for a regression from 1.9, but may also fix some + other corner cases with remote transaction management. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1384873347, offset: 60, sign: 43 } }' + target: + id: e8b6ea169ad850f4fd84259413cec6addd678ef7 + parent_ids: + - ef4040fee9c8f47855cd8d808e6b79a1293f626e + message: "Keeps explicit state whether a transaction is remotely initialize or not\n\ninstead of relying on lock information (whether any lock was grabbed or\nnot). There was a problem where the \"old\" transaction state was checked\nfor locks and not the new one (the one in the Kernel API), or preferrably\nboth should have been checked. But that approach was error prone so an\nexplicit flag is better and safer.\n\nAlso renamed TxHook --> RemoteTxHook because that name and its method\nnames were so misguiding that it slowed down reasoning around problems\nwhere it was involved.\n\nThe fix in this commit is for a regression from 1.9, but may also fix some\nother corner cases with remote transaction management.\n\nAlso had to pull in some constructs from\ne5cdeec2b7706aefd1bb88cd62409a0591ca2f0f due to differences in data source\nenlisting timing between 1.9 and 2.0.\n\n(cherry picked from commit e9a88b91d0c59627a25f81d0b523ba44cea3890b)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/helpers/Exceptions.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/DefaultTxHook.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/InternalAbstractGraphDatabase.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/core/NoTransactionState.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/core/TransactionState.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/core/WritableTransactionState.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/nioneo/store/NeoStore.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/transaction/TransactionStateFactory.java\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/transaction/TxManager.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/HighlyAvailableGraphDatabase.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1385978509, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: d62194d2c9f04003ae9c695c8d1c51cfb0f8951d + parent_ids: + - 1461f2a4d0f74db6cf998d1af47e0a10e81670a4 + message: | + Adds tests for state rollback in HAMSM + author: Chris Gioran + committer: Chris Gioran + time: 'Time { raw: git_time { time: 1386257546, offset: 60, sign: 43 } }' + target: + id: e45508b48c36cc14055b67511934820de8b01a2a + parent_ids: + - f463cd282c0af93a5893392e29418a819d0a6f0e + message: | + Adds tests for state rollback in HAMSM + + (cherry picked from commit d62194d2c9f04003ae9c695c8d1c51cfb0f8951d) + author: Chris Gioran + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387467853, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: e9d762bacf0d3d5c355069e25efdff09d2d68c29 + parent_ids: + - 0a5eb2aba58c9caf5061d88d20818157011a9add + message: | + Gets rid of even some more null handling for FileLock + + which is nowadays unnecessary. Also throws the expected IOException, + instead of an IllegalArgumentException if the lock couldn't be had from + the FileChannel. May bad, introduced in + 0a5eb2aba58c9caf5061d88d20818157011a9add. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386082129, offset: 60, sign: 43 } }' + target: + id: a773a802681dcda5d059c17db520a52d66d140f0 + parent_ids: + - 0d4213e93567ee41db650eacda3a4cab666c4661 + message: "Gets rid of even some more null handling for FileLock\n\nwhich is nowadays unnecessary. Also throws the expected IOException,\ninstead of an IllegalArgumentException if the lock couldn't be had from\nthe FileChannel. May bad, introduced in\n0a5eb2aba58c9caf5061d88d20818157011a9add.\n\n(cherry picked from commit e9d762bacf0d3d5c355069e25efdff09d2d68c29)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/nioneo/store/FileLock.java\n" + author: Mattias Persson + committer: Chris Leishman + time: 'Time { raw: git_time { time: 1390314578, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1461f2a4d0f74db6cf998d1af47e0a10e81670a4 + parent_ids: + - 83e53d5d83c3bd4b53645745a2da0dbc6b150a16 + message: | + Reverts HA state transitions if one or more state transition listeners fail + + This will make the state machine more resilient and a retried event will + be able to succeed. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386239015, offset: 60, sign: 43 } }' + target: + id: 627e1dce4eca0285511b4111a0817faa1e8ca89f + parent_ids: + - 9c846b5c82f269a824ee97409f80ac2ff6a43267 + message: | + Reverts HA state transitions if one or more state transition listeners fail + + This will make the state machine more resilient and a retried event will + be able to succeed. + + (cherry picked from commit 1461f2a4d0f74db6cf998d1af47e0a10e81670a4) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387467823, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1a42148dd347b83f69f87e12f9524c0db9e44b9e + parent_ids: + - 90d130a08e1c68297cf1a9224805810d3cc423cb + message: | + Passes along phase1Timeout payload in all places + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387196222, offset: 60, sign: 43 } }' + target: + id: a2cccfb63704ff40d72c5316efba6fbdeb638571 + parent_ids: + - 4252a4057d96c21ac4946c2884c96261b9abc627 + message: | + Passes along phase1Timeout payload in all places + + (cherry picked from commit 1a42148dd347b83f69f87e12f9524c0db9e44b9e) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387488730, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 79c4c3f18940bbe5d7a358dc65857751d6345f0a + parent_ids: + - a56aae6a0166274d08059f6706a66a796dc0c4d2 + message: | + Fixes an issue of unexpected message payload in a timeout message + + Also added some more sanity checks. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386604707, offset: 60, sign: 43 } }' + target: + id: 44ad5d00bf08708f2042f77770d11a1743a0d372 + parent_ids: + - e45508b48c36cc14055b67511934820de8b01a2a + message: "Fixes an issue of unexpected message payload in a timeout message\n\nAlso added some more sanity checks.\n\n(cherry picked from commit 79c4c3f18940bbe5d7a358dc65857751d6345f0a)\n\nConflicts:\n\tenterprise/cluster/src/test/java/org/neo4j/cluster/protocol/atomicbroadcast/multipaxos/ProposerStateTest.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387467907, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4ed6f7be6b6218473e1d6da5b78d5074b19e08d7 + parent_ids: + - 5ecf9c66676d513a3ae6b320cfc41f5bbfe9f360 + message: | + Enabled more tests for tx event handlers + + so that more is tested, also afterCommit(). Some issues was exposed which + were also fixed. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387208069, offset: 60, sign: 43 } }' + target: + id: 750b7d6e489b79fde136f3b742feee8270a1731e + parent_ids: + - c100d9cd72f118270a4e3f3e8085e6b2858a0966 + message: "Enabled more tests for tx event handlers\n\nso that more is tested, also afterCommit(). Some issues was exposed which\nwere also fixed.\n\n(cherry picked from commit 4ed6f7be6b6218473e1d6da5b78d5074b19e08d7)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/core/WritableTransactionState.java\n\tcommunity/kernel/src/test/java/org/neo4j/kernel/impl/event/VerifyingTransactionEventHandler.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387499354, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: c82701d2a0bddf39b77e795638bd98555fc46540 + parent_ids: + - 06cb2a8c2439ba31ff1fef674d0f6393c10c9a8f + message: | + Suspends the transaction in a finally block + + since we must uphold the contract where the transaction must have been + suspended no matter what happens before leaving that method. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1385040316, offset: 60, sign: 43 } }' + target: + id: 5419c20cc12751478a396b6d6c8a502708f9ea3d + parent_ids: + - 2c1968a93b35d3d6d6cead8b2ea9db552945e34b + message: "Suspends the transaction in a finally block\n\nsince we must uphold the contract where the transaction must have been\nsuspended no matter what happens before leaving that method.\n\n(cherry picked from commit c82701d2a0bddf39b77e795638bd98555fc46540)\n\nConflicts:\n\tcommunity/kernel/src/main/java/org/neo4j/kernel/impl/transaction/TransactionImpl.java\n\tenterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386008305, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 0b2e53301f1cf16f996894b48c6800a90963ce3d + parent_ids: + - bd965755fe82c91e5a3b4ab66435505e46ccf7fd + message: | + Proper naming of parameterized values + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1426326490, offset: 60, sign: 43 } }' + target: + id: c938e08a9319b22eb93300b254227c65498109f1 + parent_ids: + - 0233772526fbd7f7eefac22451a0e2d578a49faa + message: "Proper naming of parameterized values\n\n(cherry picked from commit 0b2e53301f1cf16f996894b48c6800a90963ce3d)\n\nConflicts:\n\tcommunity/neo4j/src/test/java/org/neo4j/index/IndexTxStateLookupTest.java\n" + author: Mattias Persson + committer: Ben Butler-Cole + time: 'Time { raw: git_time { time: 1433320377, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: d3da2c164fd1c8d22e3a5d499c4775dad85bdb28 + parent_ids: + - 4203296c55ad2a639baffe6329b55b1a811b5614 + message: | + added status endpoint to indicate ability to process transactions, additional to the existing master/ slave endpoints + author: Lasse Westh-Nielsen + committer: Lasse Westh-Nielsen + time: 'Time { raw: git_time { time: 1387353704, offset: 60, sign: 43 } }' + target: + id: ddb4d0585f4fc8936ac05db06741f0c2604b5790 + parent_ids: + - 2cf0b4661d99bb1c4860d93258d29207cfaa8e8f + message: | + Fixes problem with parser not being thread safe + + (cherry picked from commit d3da2c164fd1c8d22e3a5d499c4775dad85bdb28) + author: Andres Taylor + committer: Chris Leishman + time: 'Time { raw: git_time { time: 1390314578, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 564e177a3935632b28a819e1b9686bee92c7fe6a + parent_ids: + - 8669f23b27ce55d8d273d1d00c9bf45401cfdf6d + message: | + Moved API level usage of internal class GraphDatabaseAPI into internals. + author: Jacob Hansson + committer: Jacob Hansson + time: 'Time { raw: git_time { time: 1389282285, offset: 60, sign: 43 } }' + target: + id: 6e60687053418c8e839d4d406099ac5dec720b46 + parent_ids: + - dad16774f8eb20eded6f18982b7b16e1c1b52dd4 + message: | + Moved API level usage of internal class GraphDatabaseAPI into internals. + + (cherry picked from commit 564e177a3935632b28a819e1b9686bee92c7fe6a) + author: Jacob Hansson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1390387828, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 3636952ae2c5d89bf4d3e9917ec3cacca774e103 + parent_ids: + - c90a96b89377390abd2dd0714e54995f156485b4 + message: | + Updated rolling upgrade 'quick verification' IT test so that it can be used again + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1390221808, offset: 60, sign: 43 } }' + target: + id: a84abd4d5d748590fb32be2f9be2d363677d5734 + parent_ids: + - 86d1333cd890cffb9d70d2aca4f5c1bae018bf07 + message: "Updated rolling upgrade 'quick verification' IT test so that it can be used again\n\n(cherry picked from commit 3636952ae2c5d89bf4d3e9917ec3cacca774e103)\n\nConflicts:\n\tenterprise/ha/src/test/java/org/neo4j/ha/upgrade/RollingUpgradeIT.java\n\nFixed some issues with the test itself\n\n(cherry picked from commit 6d5aed4d27520bdb0179356aedf29a8abc05cad3)\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1390226472, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: b453f98dba1e5bcb8e19d5f5f2efcc68eb09c75a + parent_ids: + - 932be349c9a54c808ea8f5f9ab7b8c4ce28765cf + message: | + Test and fix for failing to start transaction in MasterImpl. + author: Jacob Hansson + committer: Jacob Hansson + time: 'Time { raw: git_time { time: 1384466593, offset: 60, sign: 43 } }' + target: + id: f8eb0c8dba4b2d52cb76081ee8a335eb4443c4a0 + parent_ids: + - 94e6771466327c81ecdd258506d61e0ceb6f2db0 + message: | + Test and fix for failing to start transaction in MasterImpl. + + (cherry picked from commit b453f98dba1e5bcb8e19d5f5f2efcc68eb09c75a) + author: Jacob Hansson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1389365668, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4773dced901c38d289f51f4a0c376dbd5b6a3b92 + parent_ids: + - 0716dfc5561f1b8152273ee2a8c85a83d498dacd + message: | + Properly closes resources created by some tests + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1421760947, offset: 60, sign: 43 } }' + target: + id: 9977e5b96e48ab6101b121db40d024303c45d002 + parent_ids: + - d7bf9d329782fc809d892ccd726af7a046586d8c + message: "Properly closes resources created by some tests\n\n(cherry picked from commit 4773dced901c38d289f51f4a0c376dbd5b6a3b92)\n\nConflicts:\n\tcommunity/kernel/src/test/java/org/neo4j/kernel/impl/index/TestIndexProviderStore.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1421772047, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 72b24acc95b93e7928e76dd69a2d40628176843c + parent_ids: + - 652e25390a31834378af48289da200a1c75dd062 + message: | + Minor fix to clustering tutorial formating. + author: Ben Butler-Cole + committer: Alistair Jones + time: 'Time { raw: git_time { time: 1386242212, offset: 0, sign: 43 } }' + target: + id: f463cd282c0af93a5893392e29418a819d0a6f0e + parent_ids: + - 627e1dce4eca0285511b4111a0817faa1e8ca89f + message: | + Minor fix to clustering tutorial formating. + + (cherry picked from commit 72b24acc95b93e7928e76dd69a2d40628176843c) + author: Ben Butler-Cole + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387467842, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 4304c9fc628648ee326c85a01a5e045b0e0d263a + parent_ids: + - 04d50090ceb0a836b861f9c90f2e24a699cbc2ac + message: | + Renames ExecutorServiceStep --> ProcessorsStep and proper step packages + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1426626560, offset: 60, sign: 43 } }' + target: + id: aafdd8bb8d5be4b928edd8d672d54097f3f8dda9 + parent_ids: + - eb5f1b8e484120eeb57fba79a57f65e7a9cbd8ea + message: | + Renames ExecutorServiceStep --> ProcessorsStep and proper step packages + + (cherry picked from commit 4304c9fc628648ee326c85a01a5e045b0e0d263a) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428572323, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: b40b8b6ebdb86de126ee4c3a48eeb361f32a16e5 + parent_ids: + - 4f94cb5c606fe4e6fd7d2fe9c845f7dad11062d0 + message: | + Passes correct payload for phase1Timeout messages + + There was an issue where a phase1Timeout occured, but no payload passed + with it. The consumer of such a timeout used the payload and made it + available for later usage even, which then would unexpectedly fail. + + Now, wherever applicable, the correct payload is sent with such timeout + messages. + + co-author: Rickard Öberg, @rickardoberg + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1386851943, offset: 60, sign: 43 } }' + target: + id: 317d80f8d562328f3dddb3fb6f79e22b8f038b6d + parent_ids: + - 44ad5d00bf08708f2042f77770d11a1743a0d372 + message: "Passes correct payload for phase1Timeout messages\n\nThere was an issue where a phase1Timeout occured, but no payload passed\nwith it. The consumer of such a timeout used the payload and made it\navailable for later usage even, which then would unexpectedly fail.\n\nNow, wherever applicable, the correct payload is sent with such timeout\nmessages.\n\nco-author: Rickard Öberg, @rickardoberg\n(cherry picked from commit b40b8b6ebdb86de126ee4c3a48eeb361f32a16e5)\n\nConflicts:\n\tenterprise/cluster/src/test/java/org/neo4j/cluster/protocol/atomicbroadcast/multipaxos/ProposerStateTest.java\n\tenterprise/cluster/src/test/java/org/neo4j/cluster/protocol/cluster/ClusterStateTest.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387468161, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: e50473ec0ff19786fae997c4f9b09bca368d5bae + parent_ids: + - ad00506f15a4d9fe392cfe336594368ea4657c46 + message: | + Refactorings and improvements on batch importer staging + + The staging framework for the ParallelBatchImporter has been + changed to reduce time spent in the framework itself as well + as reporing much more accurate statistics. + + - Simplified Stage to only have #add(Step), instead of input/add + - Simplified and streamlined waiting of conditions in individual steps + where conditions are first busy waited a little while, to then + back off to a sleep strategy. + - Improved accuracy of statistics of upstream/downstream idling, + possible since better placed for measuring was found. + - DetailedExecutionMonitor will find and print which step is the + current most likely step to be the bottle neck in the stage. + - Correctly and efficiently follows the contract of a step, where + batches must be sent downstream in the order they arrived, which + implies total ordering as a whole between steps. Previously there + was too strict checks such that some parallelization was hindered. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1411678829, offset: 120, sign: 43 } }' + target: + id: 4876372f130f6372a852985e58406a8961d5b872 + parent_ids: + - b602940a447b7ee4ac6c3e206d1847800a198ca4 + message: "Refactorings and improvements on batch importer staging\n\nThe staging framework for the ParallelBatchImporter has been\nchanged to reduce time spent in the framework itself as well\nas reporing much more accurate statistics.\n\n- Simplified Stage to only have #add(Step), instead of input/add\n- Simplified and streamlined waiting of conditions in individual steps\n where conditions are first busy waited a little while, to then\n back off to a sleep strategy.\n- Improved accuracy of statistics of upstream/downstream idling,\n possible since better placed for measuring was found.\n- DetailedExecutionMonitor will find and print which step is the\n current most likely step to be the bottle neck in the stage.\n- Correctly and efficiently follows the contract of a step, where\n batches must be sent downstream in the order they arrived, which\n implies total ordering as a whole between steps. Previously there\n was too strict checks such that some parallelization was hindered.\n\n(cherry picked from commit e50473ec0ff19786fae997c4f9b09bca368d5bae)\n\nConflicts:\n\tcommunity/consistency-check/src/test/java/org/neo4j/unsafe/impl/batchimport/ParallelBatchImporterTest.java\n" + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1411992232, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5977faf23b0eaa7f4e437ca133246092db8f4d63 + parent_ids: + - 41346d45e45aca2b00adf5840d47a8798c447a86 + message: | + Sends booked instance payload for repropose after rejectPrepare + + this will fix an issue where there were a null payload when reproposing an + instance. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387371338, offset: 60, sign: 43 } }' + target: + id: d353f6b23045c04e51a4c961c2addbaf43bd4b5a + parent_ids: + - 750b7d6e489b79fde136f3b742feee8270a1731e + message: | + Sends booked instance payload for repropose after rejectPrepare + + this will fix an issue where there were a null payload when reproposing an + instance. + + (cherry picked from commit 5977faf23b0eaa7f4e437ca133246092db8f4d63) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1387499355, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 829ee8a76acb968eefc177b3b44f4a7c1b1938a2 + parent_ids: + - ea33317a4c9cce0b4a7945a920806333e864827d + message: | + Renamed NodeRelationshipLink-->NodeRelationshipCache + + to have similar names as other e.g. NodeLabelsCache. Also removed + unnecessary NodeRelationshipLink interface as well. + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1427184574, offset: 60, sign: 43 } }' + target: + id: 6763221c7d8c928fd60d6d4348c317abc4fd507e + parent_ids: + - 1eb61315d43f24f43740c36dacd6643ae49a653a + message: | + Renamed NodeRelationshipLink-->NodeRelationshipCache + + to have similar names as other e.g. NodeLabelsCache. Also removed + unnecessary NodeRelationshipLink interface as well. + + (cherry picked from commit 829ee8a76acb968eefc177b3b44f4a7c1b1938a2) + author: Mattias Persson + committer: Mattias Persson + time: 'Time { raw: git_time { time: 1428573170, offset: 120, sign: 43 } }' + is_trivial: false diff --git a/dataset/mined-cherries-verification/PHP_webonyx_graphql-php.yaml b/dataset/mined-cherries-verification/PHP_webonyx_graphql-php.yaml new file mode 100644 index 00000000..748b7b54 --- /dev/null +++ b/dataset/mined-cherries-verification/PHP_webonyx_graphql-php.yaml @@ -0,0 +1,861 @@ +- repo_name: webonyx/graphql-php + total_number_of_commits: '1846' + total_number_of_branches: '13' + total_number_of_results: '36' + language: PHP + total_number_of_committers: '68' +- - search_method: MessageScan + cherry_and_target: + cherry: + id: ccc4746b516699cf8bfce3bfed5d13eaba946802 + parent_ids: + - 5bc702b6f24369402d91cdaf61fdf253438fbf49 + message: |+ + Add support for type config decorator in SchemaExtender (#871) + + author: Adrien Poupa + committer: GitHub + time: 'Time { raw: git_time { time: 1623773452, offset: 120, sign: 43 } }' + target: + id: f304483ba01def42b41855aa9a15b8d257d174ed + parent_ids: + - 578d0875f6c5140085b19bfb82167bed3fb23e1b + message: | + Add support for type config decorator in SchemaExtender (#871) + + (cherry picked from commit ccc4746b516699cf8bfce3bfed5d13eaba946802) + author: Adrien Poupa + committer: Benedikt Franke + time: 'Time { raw: git_time { time: 1623773598, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 77448ba6239f7a8bd413c2dfc4ef6d2c3d040009 + parent_ids: + - 7d59811c4ff3134d1e4fd79652f8601e9bd37d36 + message: | + Fix incorrect array type of rootValue in PHPDocs + author: Petr Skoda + committer: Petr Skoda + time: 'Time { raw: git_time { time: 1549828404, offset: 780, sign: 43 } }' + target: + id: 7405ddc85206b647e9ca0ea35ec543fb78d0e75b + parent_ids: + - 0cbc1c9c0702668a759110d7aa12471819bbf456 + message: | + Fix incorrect array type of rootValue in PHPDocs + + (cherry picked from commit 77448ba6239f7a8bd413c2dfc4ef6d2c3d040009) + author: Petr Skoda + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145250, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 31d89acfae5a27b772cd0ebec992d0e564796cf5 + parent_ids: + - f96bd2740df01dccc682eebe3e02215acea1b374 + message: | + Failing test for CoroutineExecutor removes associative array when using custom scalar producing JSON + author: Jan Bukva + committer: Jan Bukva + time: 'Time { raw: git_time { time: 1545922887, offset: 60, sign: 43 } }' + target: + id: 42b20e76517167c056848069a96ed2dcad27a826 + parent_ids: + - 08992de960e831329303982eb8cb5611ee024a35 + message: | + Failing test for CoroutineExecutor removes associative array when using custom scalar producing JSON + + (cherry picked from commit 31d89acfae5a27b772cd0ebec992d0e564796cf5) + author: Jan Bukva + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145057, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 828a9fb002e975ddf5945d3f4af91e4e4e8f23e2 + parent_ids: + - f96bd2740df01dccc682eebe3e02215acea1b374 + message: | + Fix Deferred + author: Simon Podlipsky + committer: Simon Podlipsky + time: 'Time { raw: git_time { time: 1545947412, offset: 60, sign: 43 } }' + target: + id: ababa18157048547a6b6adf3c16e1d28df6ea7b1 + parent_ids: + - f52dfcfaef1ab566fb30ba63c6c5cc9e2e1564ca + message: | + Fix Deferred + + (cherry picked from commit 828a9fb002e975ddf5945d3f4af91e4e4e8f23e2) + author: Simon Podlipsky + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145147, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 93ccd7351d2acceae23e4abffd1d4196d0d849d3 + parent_ids: + - ed1746e800637eebf8529530ab208fcafb7214ac + message: | + Array in variables in place of object shouldn't cause fatal error (fixes #467) + author: Vladimir Razuvaev + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1560947343, offset: 420, sign: 43 } }' + target: + id: 27340a18182999a423bc48c17d84fc9229ab24e0 + parent_ids: + - 9fcf29302f2c5a1b762d1bf91026a06304f46f43 + message: | + Array in variables in place of object shouldn't cause fatal error (fixes #467) + + (cherry picked from commit 93ccd7351d2acceae23e4abffd1d4196d0d849d3) + author: Vladimir Razuvaev + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1565163656, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: dcb3a7e88d49a4c876badda045b7b7ab272b9c13 + parent_ids: + - b5f0a8b65dfde9113e9411c2a8860d39ec4afd10 + message: |+ + Support non-JSON ServerRequestInterface (#1004) + + author: Dorian Savina + committer: GitHub + time: 'Time { raw: git_time { time: 1636119127, offset: 60, sign: 43 } }' + target: + id: adff4080c6b7fea71dc148d5a1da0257b79f2b2a + parent_ids: + - 31165d49be8673d9464e0bfc8b39369cbbf9c2d8 + message: | + Support non-JSON ServerRequestInterface (#1004) + + (cherry picked from commit dcb3a7e88d49a4c876badda045b7b7ab272b9c13) + author: Dorian Savina + committer: Benedikt Franke + time: 'Time { raw: git_time { time: 1637228638, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2abac8cdd9cdbc892dd09d05eae15b1431eb3b7c + parent_ids: + - cf5e6fdbb2e146290890b0a7d552048811a2dee7 + message: |+ + Fix `extend()` to preserve `repeatable` (#931) + + author: Vašek Henzl + committer: GitHub + time: 'Time { raw: git_time { time: 1630948102, offset: 120, sign: 43 } }' + target: + id: 7a8ae9e0d304853a196302c9f8708ca5f066297f + parent_ids: + - 814e286a68dad8ea04f3b3cc84c24a79906bff41 + message: | + Fix `extend()` to preserve `repeatable` (#931) + + (cherry picked from commit 2abac8cdd9cdbc892dd09d05eae15b1431eb3b7c) + author: Vašek Henzl + committer: Benedikt Franke + time: 'Time { raw: git_time { time: 1643022678, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: dfefdf24cb482e0551147aeae5eda4fe1364105e + parent_ids: + - 20e98aefa4784b16164e758198062d06f4f7de98 + message: | + Test against PHP 7.4 + author: Simon Podlipsky + committer: Simon Podlipsky + time: 'Time { raw: git_time { time: 1549870625, offset: 60, sign: 43 } }' + target: + id: 255ecbd709315bb7aff857264f344b7469b31a68 + parent_ids: + - 8c66fa8d1ee381c19012e0e456bf4e738bb36027 + message: | + Test against PHP 7.4 + + (cherry picked from commit dfefdf24cb482e0551147aeae5eda4fe1364105e) + author: Simon Podlipsky + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145274, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: adff917e83671bc2c1f849fe28aa38c247c6879f + parent_ids: + - 4754e67b4bb7a17de4f79f2b4f8444777bb082a8 + message: | + Make IntType constants public + author: Benedikt Franke + committer: Benedikt Franke + time: 'Time { raw: git_time { time: 1635429594, offset: 120, sign: 43 } }' + target: + id: cfb0b56d5dac561e934eb5a33ec7b53d54b35362 + parent_ids: + - 8df24df72f719cd34a2ccd0c6dde1c5e4695cc81 + message: | + Make IntType constants public + + (cherry picked from commit adff917e83671bc2c1f849fe28aa38c247c6879f) + author: Benedikt Franke + committer: Benedikt Franke + time: 'Time { raw: git_time { time: 1635429608, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: f95d1e81eabfae0b83bba19f2b260b5d04640d4f + parent_ids: + - b2cea8b538b16f6df9fa047a4451cdf05345c9a2 + message: | + Add PHP 7.3 to Travis + author: Simon Podlipsky + committer: Simon Podlipsky + time: 'Time { raw: git_time { time: 1543486251, offset: 60, sign: 43 } }' + target: + id: b005803bf62055be04e982813fb8b4f57dd6b0ba + parent_ids: + - 21dc3fe664fe56f803ad2be4e1a284ad26910bb3 + message: | + Add PHP 7.3 to Travis + + (cherry picked from commit f95d1e81eabfae0b83bba19f2b260b5d04640d4f) + author: Simon Podlipsky + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144526, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: bc637414e5158f5b2b8144be19f4baf8b4c7abc2 + parent_ids: + - 6544197ef898343b409a82c7f6008aa4a109d95b + message: | + Support PHP 8 + author: Simon Podlipsky + committer: Simon Podlipsky + time: 'Time { raw: git_time { time: 1549634891, offset: 60, sign: 43 } }' + target: + id: 0cbc1c9c0702668a759110d7aa12471819bbf456 + parent_ids: + - c628fa39a1b7b61e8b30c8b502d22a30769a74ed + message: | + Support PHP 8 + + (cherry picked from commit bc637414e5158f5b2b8144be19f4baf8b4c7abc2) + author: Simon Podlipsky + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145237, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 376e9275054f9b63a27b8fa2215553f050898602 + parent_ids: + - b2cea8b538b16f6df9fa047a4451cdf05345c9a2 + message: | + Document BC and fix types in ResolveInfo + author: Simon Podlipsky + committer: Simon Podlipsky + time: 'Time { raw: git_time { time: 1543323955, offset: 60, sign: 43 } }' + target: + id: 16d42dead3ae990bdd03d01c17dca6db3a8d23cc + parent_ids: + - bf471838ae6a423ef5f2f170595070d5ed579b21 + message: | + Document BC and fix types in ResolveInfo + + (cherry picked from commit 376e9275054f9b63a27b8fa2215553f050898602) + author: Simon Podlipsky + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144475, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 33e3c9c338b3c77edcdc009d546015e8d74ae049 + parent_ids: + - b2cea8b538b16f6df9fa047a4451cdf05345c9a2 + message: Error handling improvements + author: Yury + committer: GitHub + time: 'Time { raw: git_time { time: 1543415721, offset: 180, sign: 43 } }' + target: + id: 21dc3fe664fe56f803ad2be4e1a284ad26910bb3 + parent_ids: + - 16d42dead3ae990bdd03d01c17dca6db3a8d23cc + message: | + Error handling improvements + + (cherry picked from commit 33e3c9c338b3c77edcdc009d546015e8d74ae049) + author: Yury + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144511, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 9609d2ac84623291c76c4c72b61235799ad07f5e + parent_ids: + - d5fddfd504d438d92a32be28546a615cc49f3c6f + message: | + TASK: Code style + author: Torsten Blindert + committer: Torsten Blindert + time: 'Time { raw: git_time { time: 1544034187, offset: 60, sign: 43 } }' + target: + id: 1d8f526d91e7ed0929867e14f81b6e6a933e6958 + parent_ids: + - fda73f321221e54446238a4499aa477f392aff64 + message: | + TASK: Code style + + (cherry picked from commit 9609d2ac84623291c76c4c72b61235799ad07f5e) + author: Torsten Blindert + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144734, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: d20a6a9d56fd02639e6cf0750b1b441f98e639f8 + parent_ids: + - 77448ba6239f7a8bd413c2dfc4ef6d2c3d040009 + message: | + Standardise whitespace + author: Petr Skoda + committer: Petr Skoda + time: 'Time { raw: git_time { time: 1549828416, offset: 780, sign: 43 } }' + target: + id: 8c66fa8d1ee381c19012e0e456bf4e738bb36027 + parent_ids: + - 7405ddc85206b647e9ca0ea35ec543fb78d0e75b + message: | + Standardise whitespace + + (cherry picked from commit d20a6a9d56fd02639e6cf0750b1b441f98e639f8) + author: Petr Skoda + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145260, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 7d59811c4ff3134d1e4fd79652f8601e9bd37d36 + parent_ids: + - 31bbc416a545c304212564e0cf46db6a9280be3e + message: | + Fix incorrect array type of contextValue in PHPDocs + author: Petr Skoda + committer: Petr Skoda + time: 'Time { raw: git_time { time: 1549484297, offset: 780, sign: 43 } }' + target: + id: c628fa39a1b7b61e8b30c8b502d22a30769a74ed + parent_ids: + - a0f214a9f98762a706f5126d91204aa90318f356 + message: | + Fix incorrect array type of contextValue in PHPDocs + + (cherry picked from commit 7d59811c4ff3134d1e4fd79652f8601e9bd37d36) + author: Petr Skoda + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145223, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 244ec66ecc0677d25ad2d958baed7de50e551043 + parent_ids: + - f644c1a837890e4ca854d16371c0730bc32b9314 + message: | + Allow extensions to be provided in GET request. + author: chriszarate + committer: chriszarate + time: 'Time { raw: git_time { time: 1544587137, offset: -300, sign: 45 } }' + target: + id: 08992de960e831329303982eb8cb5611ee024a35 + parent_ids: + - 1d8f526d91e7ed0929867e14f81b6e6a933e6958 + message: | + Allow extensions to be provided in GET request. + + (cherry picked from commit 244ec66ecc0677d25ad2d958baed7de50e551043) + author: chriszarate + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144814, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 51ad17c3af192e34bc2e368a3c645a5a668f3e15 + parent_ids: + - 2bac02f3291fde9186eb192ee4520c6b0c1fcab9 + message: |+ + Fix compatiblity of methods with native return type in PHP 8.1 (#1011) + + author: Mykola Silin + committer: GitHub + time: 'Time { raw: git_time { time: 1636971785, offset: 60, sign: 43 } }' + target: + id: 527bdee2747dca166db9a51a952eba450bbd0d96 + parent_ids: + - d191b0c34365c025c6cd953b5a63aa62946c7060 + message: | + Fix compatibility of methods with native return type in PHP 8.1 (#1011) + + (cherry picked from commit 51ad17c3af192e34bc2e368a3c645a5a668f3e15) + author: Mykola Silin + committer: Benedikt Franke + time: 'Time { raw: git_time { time: 1636971938, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: b1ab1820b684ac659dbdfd04ae2b0f502dbc0696 + parent_ids: + - 1dc291b073cd0fe17da5e943664a59ca2acd49ec + message: |- + Update docs intro verbiage + + Just throwing my 2 cents: I don't think it's fair to say that "it's intended to be a replacement", given that multiple API paradigms can coexist in the same system and each of them have their trade-offs. + author: Stefano Torresi + committer: GitHub + time: 'Time { raw: git_time { time: 1549280311, offset: 60, sign: 43 } }' + target: + id: a0f214a9f98762a706f5126d91204aa90318f356 + parent_ids: + - 153f6f862e7f31ea11026eaff3907869319a9ae1 + message: | + Update docs intro verbiage + + Just throwing my 2 cents: I don't think it's fair to say that "it's intended to be a replacement", given that multiple API paradigms can coexist in the same system and each of them have their trade-offs. + + (cherry picked from commit b1ab1820b684ac659dbdfd04ae2b0f502dbc0696) + author: Stefano Torresi + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145204, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5fa2dffa1b3d0b42d535f18db1f88a821d7c7811 + parent_ids: + - acdbd501fdeb8d27cde7dec451ee0b3b0a9af6b2 + message: | + Code style fix + author: Vladimir Razuvaev + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1566642418, offset: 420, sign: 43 } }' + target: + id: bc44bcd7aed403d4f61f40494523618601968b54 + parent_ids: + - 3141f0cbf4e39763fbbf7435fb681d3f9fc7054a + message: | + Code style fix + + (cherry picked from commit 5fa2dffa1b3d0b42d535f18db1f88a821d7c7811) + author: Vladimir Razuvaev + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1566729040, offset: 420, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: d5fddfd504d438d92a32be28546a615cc49f3c6f + parent_ids: + - 62b003643779246403c0fd71d7e7486d94c50835 + message: | + TASK: Added test + author: Torsten Blindert + committer: Torsten Blindert + time: 'Time { raw: git_time { time: 1544033612, offset: 60, sign: 43 } }' + target: + id: fda73f321221e54446238a4499aa477f392aff64 + parent_ids: + - 22cee4974765e91218f786fe07eb0a520e355dee + message: | + TASK: Added test + + (cherry picked from commit d5fddfd504d438d92a32be28546a615cc49f3c6f) + author: Torsten Blindert + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144725, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: fb52ab0d9ba12aeec0217f8e0714265edccf663c + parent_ids: + - 39bfaa40c113d5be8e8707e047d8b306b3c74d0f + message: | + Added ability to retrieve the query complexity once the query has been completed + author: Nicholas Clark + committer: Nicholas Clark + time: 'Time { raw: git_time { time: 1565246721, offset: 600, sign: 43 } }' + target: + id: 97cbe4093074917fc487e95034e25ccca483fd52 + parent_ids: + - 4dfaf9a39c7bff8c95be5e5f7a5fa4994bdb8a4a + message: | + Added ability to retrieve the query complexity once the query has been completed + + (cherry picked from commit fb52ab0d9ba12aeec0217f8e0714265edccf663c) + author: Nicholas Clark + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1566547794, offset: 420, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: edb52685839c12db3b50e51df035ee6ab5c6a67f + parent_ids: + - 6544197ef898343b409a82c7f6008aa4a109d95b + message: | + Fix return annotation of resolveType() in InterfaceType + author: Jan Bukva + committer: Jan Bukva + time: 'Time { raw: git_time { time: 1548711980, offset: 60, sign: 43 } }' + target: + id: 153f6f862e7f31ea11026eaff3907869319a9ae1 + parent_ids: + - 610979555d590e0d5379cb92eeef7f5314226512 + message: | + Fix return annotation of resolveType() in InterfaceType + + (cherry picked from commit edb52685839c12db3b50e51df035ee6ab5c6a67f) + author: Jan Bukva + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145189, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 62b003643779246403c0fd71d7e7486d94c50835 + parent_ids: + - e22b4003737c272a43ff6867e9d1cac5a260ae75 + message: | + BUGFIX: expect ->getType() to throw + author: Torsten Blindert + committer: Torsten Blindert + time: 'Time { raw: git_time { time: 1544032514, offset: 60, sign: 43 } }' + target: + id: 22cee4974765e91218f786fe07eb0a520e355dee + parent_ids: + - 59c128c54a22060b5899f91ed07dc841afb5d6da + message: | + BUGFIX: expect ->getType() to throw + + (cherry picked from commit 62b003643779246403c0fd71d7e7486d94c50835) + author: Torsten Blindert + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144715, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 07c9ad67e29461f820a46daaa965e5ff21e99b88 + parent_ids: + - 9e787e4f916a765c67dac94743b52a619278bab8 + message: |+ + Allow \stdClass for input types + + author: Adam + committer: Adam + time: 'Time { raw: git_time { time: 1565698595, offset: 600, sign: 43 } }' + target: + id: 4dfaf9a39c7bff8c95be5e5f7a5fa4994bdb8a4a + parent_ids: + - 123af49e46d26b0cd2e7a71a387253aa01ea9a6b + message: | + Allow \stdClass for input types + + (cherry picked from commit 07c9ad67e29461f820a46daaa965e5ff21e99b88) + author: Adam + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1566547635, offset: 420, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: ead1b864bc82e4ede561ede0d3e9085d4fac3ca0 + parent_ids: + - b2cea8b538b16f6df9fa047a4451cdf05345c9a2 + message: | + Added all possible scalar types to Node constructor + author: Simon Podlipsky + committer: Simon Podlipsky + time: 'Time { raw: git_time { time: 1543317721, offset: 60, sign: 43 } }' + target: + id: bf471838ae6a423ef5f2f170595070d5ed579b21 + parent_ids: + - 012082d1d9bb767b2296ab3f9efb44fdae1ff796 + message: | + Added all possible scalar types to Node constructor + + (cherry picked from commit ead1b864bc82e4ede561ede0d3e9085d4fac3ca0) + author: Simon Podlipsky + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144464, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: f644c1a837890e4ca854d16371c0730bc32b9314 + parent_ids: + - c33e41f2bf44b20b3d79b00d335c3a5ea5975116 + message: | + Add extensions to OperationParams instance. + author: chriszarate + committer: chriszarate + time: 'Time { raw: git_time { time: 1543781409, offset: -300, sign: 45 } }' + target: + id: a116127436bc533d3d1cf33c567f4fdc06eb31c7 + parent_ids: + - 9ada6069193c7f2ffd63435d283b42590ac2333a + message: | + Add extensions to OperationParams instance. + + (cherry picked from commit f644c1a837890e4ca854d16371c0730bc32b9314) + author: chriszarate + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144605, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 9a0dbff26b72cca1528ba4fac536fafc7ba3f624 + parent_ids: + - e22b4003737c272a43ff6867e9d1cac5a260ae75 + message: | + Add NullableType interface + author: Erik Gaal + committer: Erik Gaal + time: 'Time { raw: git_time { time: 1544006607, offset: 60, sign: 43 } }' + target: + id: 59c128c54a22060b5899f91ed07dc841afb5d6da + parent_ids: + - a116127436bc533d3d1cf33c567f4fdc06eb31c7 + message: | + Add NullableType interface + + (cherry picked from commit 9a0dbff26b72cca1528ba4fac536fafc7ba3f624) + author: Erik Gaal + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144642, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 8b2b4d5443c538500375ae23c5a4b5ae5038c348 + parent_ids: + - cf6eddfd0f57657483e87c71dc07669e4c014f13 + message: |+ + Implement Utils::getOperationAST (#755) + + author: Alexander Varwijk + committer: GitHub + time: 'Time { raw: git_time { time: 1622542835, offset: 120, sign: 43 } }' + target: + id: ea24c733c93275ba95f4dceddd0a5c385ae77bc3 + parent_ids: + - 6b0ba9a4688b2ef74ea4d52b27477dd80fa67f31 + message: | + Implement Utils::getOperationAST (#755) + + + (cherry picked from commit 8b2b4d5443c538500375ae23c5a4b5ae5038c348) + author: Alexander Varwijk + committer: Benedikt Franke + time: 'Time { raw: git_time { time: 1622542955, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 89a7a7e362535dffd020fcab1736912f68b3c7d8 + parent_ids: + - cca8fd658767313c63d151b8dec2193e1a9e3c19 + message: | + Don't call global fieldResolver on introspection fields (#481) + author: Vladimir Razuvaev + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1566642250, offset: 420, sign: 43 } }' + target: + id: 3141f0cbf4e39763fbbf7435fb681d3f9fc7054a + parent_ids: + - b7a03cd847ad930d884f354d4e4c7e538c289061 + message: | + Don't call global fieldResolver on introspection fields (#481) + + (cherry picked from commit 89a7a7e362535dffd020fcab1736912f68b3c7d8) + author: Vladimir Razuvaev + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1566729032, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 00490d289c0a858842e08cead45635016f147921 + parent_ids: + - 2295b96a493b9ce6528abd8e1a644e1164bb9fbe + message: | + Use key-value foreach + author: Jan Bukva + committer: Jan Bukva + time: 'Time { raw: git_time { time: 1545943996, offset: 60, sign: 43 } }' + target: + id: f52dfcfaef1ab566fb30ba63c6c5cc9e2e1564ca + parent_ids: + - 8b8ea0d4a387aca74489d8093d1ee5f2062e4508 + message: | + Use key-value foreach + + (cherry picked from commit 00490d289c0a858842e08cead45635016f147921) + author: Jan Bukva + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145086, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: e01b6e0a93fa2dd8a2cdedd8a26ddbd19bad8bdb + parent_ids: + - 76c229b8ad22f4a7d83d755296b989504ea95b9c + message: "Scalar type ResolverInfo::getFieldSelection support (#529)\n\n* Scalar type ResolveInfo::getFieldSelection support\r\n\r\nCo-Authored-By: Šimon Podlipský \r\n" + author: Steve Lacey + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1565107996, offset: 420, sign: 43 } }' + target: + id: 991187f3b8697571bfcec3ec4a8a74547f77049d + parent_ids: + - 27340a18182999a423bc48c17d84fc9229ab24e0 + message: | + Scalar type ResolverInfo::getFieldSelection support (#529) + + Co-Authored-By: Šimon Podlipský + + (cherry picked from commit e01b6e0a93fa2dd8a2cdedd8a26ddbd19bad8bdb) + author: Steve Lacey + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1565164114, offset: 420, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2295b96a493b9ce6528abd8e1a644e1164bb9fbe + parent_ids: + - 31d89acfae5a27b772cd0ebec992d0e564796cf5 + message: | + Fix CoroutineExecutor::resultToArray for associative arrays + author: Jan Bukva + committer: Jan Bukva + time: 'Time { raw: git_time { time: 1545925120, offset: 60, sign: 43 } }' + target: + id: 8b8ea0d4a387aca74489d8093d1ee5f2062e4508 + parent_ids: + - 42b20e76517167c056848069a96ed2dcad27a826 + message: | + Fix CoroutineExecutor::resultToArray for associative arrays + + (cherry picked from commit 2295b96a493b9ce6528abd8e1a644e1164bb9fbe) + author: Jan Bukva + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552145079, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: e87460880c49393fa8bb823409a0e65233f6d873 + parent_ids: + - 6f6a39468c822267f47c3c15d382afe8eec7cbeb + message: | + QueryPlan can now be used on interfaces not only objects. + + It's often the case to use interfaces in queries: + + interface Pet { name: String! } + + Query { + pets: [Pet] + } + author: Aurélien David + committer: Aurélien David + time: 'Time { raw: git_time { time: 1560333582, offset: 120, sign: 43 } }' + target: + id: 9fcf29302f2c5a1b762d1bf91026a06304f46f43 + parent_ids: + - cdcf5b44737ee743358f5ed25b19a39ad7daf777 + message: | + QueryPlan can now be used on interfaces not only objects. + + It's often the case to use interfaces in queries: + + interface Pet { name: String! } + + Query { + pets: [Pet] + } + + (cherry picked from commit e87460880c49393fa8bb823409a0e65233f6d873) + author: Aurélien David + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1565163456, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: c33e41f2bf44b20b3d79b00d335c3a5ea5975116 + parent_ids: + - 63b4e3f0a4527c491f51e37bbda99169a47690f1 + message: | + Fix linting issue and typos. + author: chriszarate + committer: chriszarate + time: 'Time { raw: git_time { time: 1543711431, offset: -300, sign: 45 } }' + target: + id: 9ada6069193c7f2ffd63435d283b42590ac2333a + parent_ids: + - 1e778d259e0dcdf50be629f972fcdd089b8432ee + message: | + Fix linting issue and typos. + + (cherry picked from commit c33e41f2bf44b20b3d79b00d335c3a5ea5975116) + author: chriszarate + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144590, offset: 420, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 63b4e3f0a4527c491f51e37bbda99169a47690f1 + parent_ids: + - b2cea8b538b16f6df9fa047a4451cdf05345c9a2 + message: | + Apollo server/client compatibility. Look for queryid in extensions. + author: chriszarate + committer: chriszarate + time: 'Time { raw: git_time { time: 1543709219, offset: -300, sign: 45 } }' + target: + id: 1e778d259e0dcdf50be629f972fcdd089b8432ee + parent_ids: + - b005803bf62055be04e982813fb8b4f57dd6b0ba + message: | + Apollo server/client compatibility. Look for queryid in extensions. + + (cherry picked from commit 63b4e3f0a4527c491f51e37bbda99169a47690f1) + author: chriszarate + committer: Vladimir Razuvaev + time: 'Time { raw: git_time { time: 1552144555, offset: 420, sign: 43 } }' + is_trivial: true diff --git a/dataset/mined-cherries-verification/Python_RasaHQ_rasa.yaml b/dataset/mined-cherries-verification/Python_RasaHQ_rasa.yaml new file mode 100644 index 00000000..09078542 --- /dev/null +++ b/dataset/mined-cherries-verification/Python_RasaHQ_rasa.yaml @@ -0,0 +1,462 @@ +- total_number_of_committers: '461' + repo_name: RasaHQ/rasa + total_number_of_commits: '29523' + total_number_of_branches: '568' + total_number_of_results: '18' + language: Python +- - search_method: MessageScan + cherry_and_target: + cherry: + id: db0f81d6d009f17a715f56972da45537d25d35ee + parent_ids: + - 67eca350eb9e25967806711e368591e989be4b12 + message: "Run custom form validation on form activation (#12467)\n\n* run custom form validation on form activation\r\n\r\n* add changelog entry\r\n\r\n* update tests, rename var" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1686151836, offset: 60, sign: 43 } }' + target: + id: 2f8a9a403de496814e19f0f557cdfd23263937b8 + parent_ids: + - 6bfc268d88651b3ee8c37a0a6ade921a25a73895 + message: | + Run custom form validation on form activation (#12467) + + * run custom form validation on form activation + + * add changelog entry + + * update tests, rename var + + (cherry picked from commit db0f81d6d009f17a715f56972da45537d25d35ee) + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: Anca Lita <27920906+ancalita@users.noreply.github.com> + time: 'Time { raw: git_time { time: 1686152831, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: e8957525b090c69f009275f7c666771a1f0fd165 + parent_ids: + - ad17246c7e479d64f5d3b7605afd81fd05fdc8b5 + message: "Fix retrieving a conversation tracker from HTTP API endpoint dependent on query params (#12139)\n\n* implement fix\r\n\r\n* update fix\r\n\r\n* add unit tests for tracker store update\r\n\r\n* add tests for processor and server updates\r\n\r\n* implement fix for story endpoint bug, add tests, implement retrieve_full_tracker for InMemoryTrackerStore\r\n\r\n* fix failing tests\r\n\r\n* add retrieve_full_tracker implementation to redis and dynamodb + tests for InMemoryTS\r\n\r\n* simplify tracker store new tests, add tests for mongodb\r\n\r\n* fix redis tracker saving mechanism, add tests\r\n\r\n* correct dynamodb update to retrieve method\r\n\r\n* address review feedback\r\n\r\n* add integration testing for postgres, redis, mongodb\r\n\r\n* add changelog entry" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1680011720, offset: 60, sign: 43 } }' + target: + id: 0057bf4e3de8a9670623b21b271c9108da791bc9 + parent_ids: + - 427ebad41be54ba2d4ed5920fafa665c5c599ff9 + message: | + Fix retrieving a conversation tracker from HTTP API endpoint dependent on query params (#12139) + + * implement fix + + * update fix + + * add unit tests for tracker store update + + * add tests for processor and server updates + + * implement fix for story endpoint bug, add tests, implement retrieve_full_tracker for InMemoryTrackerStore + + * fix failing tests + + * add retrieve_full_tracker implementation to redis and dynamodb + tests for InMemoryTS + + * simplify tracker store new tests, add tests for mongodb + + * fix redis tracker saving mechanism, add tests + + * correct dynamodb update to retrieve method + + * address review feedback + + * add integration testing for postgres, redis, mongodb + + * add changelog entry + + (cherry picked from commit e8957525b090c69f009275f7c666771a1f0fd165) + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: Anca Lita <27920906+ancalita@users.noreply.github.com> + time: 'Time { raw: git_time { time: 1680016445, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 59e1aa28c74703cdeb8d62393a9e7c5e4768d6aa + parent_ids: + - c46abfc7b77cd288b10037fbc1bb63fc7d129645 + message: | + make sure explicit code is equivalent to bool + author: Vova Vv + committer: Vova Vv + time: 'Time { raw: git_time { time: 1606135681, offset: 60, sign: 43 } }' + target: + id: b6c646d72fc6d194936c4d0c1851d77d4d40a50c + parent_ids: + - 3d4e89d7180d85d0745d1e9c56c03000952cc5be + message: | + make sure explicit code is equivalent to bool + + (cherry picked from commit 59e1aa28c74703cdeb8d62393a9e7c5e4768d6aa) + author: Vova Vv + committer: samsucik + time: 'Time { raw: git_time { time: 1606487621, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b6268c3a2610f87e23746b8e8c2056ae8a8804c5 + parent_ids: + - 319811f5fec69fd754d0ae25b3ac8a2bfe856867 + message: "Fix RabbitMQ url parsing (#12325)\n\n* fix url parsing of path and query params, add integration test\r\n\r\n* add changelog entry, fix regression bug to allow credentials in URL\r\n\r\n* fix issue with pruning conflict when tests were run in parallel\r\n\r\n* apply review suggestion\r\n\r\n* make method private" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1682602462, offset: 60, sign: 43 } }' + target: + id: 4f4df442b23411a7c692e8a291d22b616603ad76 + parent_ids: + - e836639c0a1bfeb6ebaf235d79d5ea2ca8420a0d + message: | + Fix RabbitMQ url parsing (#12325) + + * fix url parsing of path and query params, add integration test + + * add changelog entry, fix regression bug to allow credentials in URL + + * fix issue with pruning conflict when tests were run in parallel + + * apply review suggestion + + * make method private + + (cherry picked from commit b6268c3a2610f87e23746b8e8c2056ae8a8804c5) + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: Anca Lita <27920906+ancalita@users.noreply.github.com> + time: 'Time { raw: git_time { time: 1682665384, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1bf16cd76d02592f3c3d2472feecc3bc7e780b7b + parent_ids: + - 02ad4f8717f5db79187f47d3d2847b9f50479f2a + message: "Fix bug in from_intent slot mapping without intent specified (#12096)\n\n* implement fix + some small logging and docstring improvement\r\n\r\n* add unit test and changelog entry" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1678104018, offset: 0, sign: 43 } }' + target: + id: 628af32108134fe8f855145975693eaffa459c65 + parent_ids: + - 57ab5d61f22419e417fb72f477b173f632731aa3 + message: | + Fix bug in from_intent slot mapping without intent specified (#12096) + + * implement fix + some small logging and docstring improvement + + * add unit test and changelog entry + + (cherry picked from commit 1bf16cd76d02592f3c3d2472feecc3bc7e780b7b) + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: Anca Lita <27920906+ancalita@users.noreply.github.com> + time: 'Time { raw: git_time { time: 1678106903, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b8b4fa647b438f275b6ed914795836413bf6a070 + parent_ids: + - cc86f8d75083b3412121901e49599c143ef4d931 + message: | + Write tests for #7235 (conversation start rules not working with initial slot values). + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1605606701, offset: 0, sign: 43 } }' + target: + id: 8caef99b05156bbf25ad0bb0098da501d78ea79f + parent_ids: + - 8c58fc5a44c2f7a0ae2f59a63a9c09f3b526394b + message: | + Write tests for #7234 (conversation start rules not working with initial slot values). + + (cherry picked from commit b8b4fa647b438f275b6ed914795836413bf6a070) + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1606487288, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 566040ed6f9baea27ca09e6813f299c6108621ae + parent_ids: + - 1ef9aa3b59a00cb976d53401b41f6d760e0e804d + message: | + remove blank lines after docstrings + author: Vova Vv + committer: Vova Vv + time: 'Time { raw: git_time { time: 1606148732, offset: 60, sign: 43 } }' + target: + id: 46f74a79e9e2eba4ca7060d473521d2657095ccd + parent_ids: + - e84d09a387807dd24beb9481aa6f7f464489b77f + message: | + remove blank lines after docstrings + + (cherry picked from commit 566040ed6f9baea27ca09e6813f299c6108621ae) + author: Vova Vv + committer: samsucik + time: 'Time { raw: git_time { time: 1606487757, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: e8957525b090c69f009275f7c666771a1f0fd165 + parent_ids: + - ad17246c7e479d64f5d3b7605afd81fd05fdc8b5 + message: "Fix retrieving a conversation tracker from HTTP API endpoint dependent on query params (#12139)\n\n* implement fix\r\n\r\n* update fix\r\n\r\n* add unit tests for tracker store update\r\n\r\n* add tests for processor and server updates\r\n\r\n* implement fix for story endpoint bug, add tests, implement retrieve_full_tracker for InMemoryTrackerStore\r\n\r\n* fix failing tests\r\n\r\n* add retrieve_full_tracker implementation to redis and dynamodb + tests for InMemoryTS\r\n\r\n* simplify tracker store new tests, add tests for mongodb\r\n\r\n* fix redis tracker saving mechanism, add tests\r\n\r\n* correct dynamodb update to retrieve method\r\n\r\n* address review feedback\r\n\r\n* add integration testing for postgres, redis, mongodb\r\n\r\n* add changelog entry" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1680011720, offset: 60, sign: 43 } }' + target: + id: 874dc2de1974352db5480adbd2f10432ae541a61 + parent_ids: + - 141ed9785dd0c1a9fa9172c4c8459c57e6c4e0b9 + message: "Fix retrieving a conversation tracker from HTTP API endpoint dependent on query params (3.3.x) (#12192)\n\n* Fix retrieving a conversation tracker from HTTP API endpoint dependent on query params (#12139)\r\n\r\n* implement fix\r\n\r\n* update fix\r\n\r\n* add unit tests for tracker store update\r\n\r\n* add tests for processor and server updates\r\n\r\n* implement fix for story endpoint bug, add tests, implement retrieve_full_tracker for InMemoryTrackerStore\r\n\r\n* fix failing tests\r\n\r\n* add retrieve_full_tracker implementation to redis and dynamodb + tests for InMemoryTS\r\n\r\n* simplify tracker store new tests, add tests for mongodb\r\n\r\n* fix redis tracker saving mechanism, add tests\r\n\r\n* correct dynamodb update to retrieve method\r\n\r\n* address review feedback\r\n\r\n* add integration testing for postgres, redis, mongodb\r\n\r\n* add changelog entry\r\n\r\n(cherry picked from commit e8957525b090c69f009275f7c666771a1f0fd165)\r\n\r\n* rename changelog entry ID, remove code added by mistake from 3.4.x" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1680017473, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 0c27b802624cb1ab828a3c428e55b9b49c8ec559 + parent_ids: + - 89eb526110a3e5eb824ed0cf4477617d71c792eb + message: | + Fix the 'incomplete rule' error being thrown whenever initial_value is set for some slot + reformat code. + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1605708884, offset: 0, sign: 43 } }' + target: + id: 4340da2eae0fbd1df74b1100c9c41e7e6e96aee3 + parent_ids: + - 9e792ff2f068893364d9f314a1ea261d51ea13fd + message: | + Fix the 'incomplete rule' error being thrown whenever initial_value is set for some slot + reformat code. + + (cherry picked from commit 0c27b802624cb1ab828a3c428e55b9b49c8ec559) + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1606487503, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1bf16cd76d02592f3c3d2472feecc3bc7e780b7b + parent_ids: + - 02ad4f8717f5db79187f47d3d2847b9f50479f2a + message: "Fix bug in from_intent slot mapping without intent specified (#12096)\n\n* implement fix + some small logging and docstring improvement\r\n\r\n* add unit test and changelog entry" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1678104018, offset: 0, sign: 43 } }' + target: + id: f78c19dda12db23579941dab62835a9b3e30ebca + parent_ids: + - ac2b11a0ea05c8a8aa98f516268d08c94adcf315 + message: | + Fix bug in from_intent slot mapping without intent specified (#12096) + + * implement fix + some small logging and docstring improvement + + * add unit test and changelog entry + + (cherry picked from commit 1bf16cd76d02592f3c3d2472feecc3bc7e780b7b) + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: Anca Lita <27920906+ancalita@users.noreply.github.com> + time: 'Time { raw: git_time { time: 1678106422, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 613c15be9df37b86b2d7a1b239966f84e0b33884 + parent_ids: + - 0c27b802624cb1ab828a3c428e55b9b49c8ec559 + message: | + Add a test to check that slots set before a rule starts don't trigger incomplete rule errors. + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1605714076, offset: 0, sign: 43 } }' + target: + id: 30eb959de0b701d3e7fac7f92b6a7d2440d0cda6 + parent_ids: + - 4340da2eae0fbd1df74b1100c9c41e7e6e96aee3 + message: | + Add a test to check that slots set before a rule starts don't trigger incomplete rule errors. + + (cherry picked from commit 613c15be9df37b86b2d7a1b239966f84e0b33884) + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1606487530, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: db0f81d6d009f17a715f56972da45537d25d35ee + parent_ids: + - 67eca350eb9e25967806711e368591e989be4b12 + message: "Run custom form validation on form activation (#12467)\n\n* run custom form validation on form activation\r\n\r\n* add changelog entry\r\n\r\n* update tests, rename var" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1686151836, offset: 60, sign: 43 } }' + target: + id: e583704815a284f7f95a180174970fe96358867b + parent_ids: + - 1ed293ac6cd7130359d3b86b8f37db88ed8b081a + message: | + Run custom form validation on form activation (#12467) + + * run custom form validation on form activation + + * add changelog entry + + * update tests, rename var + + (cherry picked from commit db0f81d6d009f17a715f56972da45537d25d35ee) + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: Anca Lita <27920906+ancalita@users.noreply.github.com> + time: 'Time { raw: git_time { time: 1686152103, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 0a3faa16480548467764052f4ae56615a4558775 + parent_ids: + - 64a997eabd7ede0535ca0291718159c9a265dca0 + message: | + emulate loop rejection only of it is active + author: Vova Vv + committer: Vova Vv + time: 'Time { raw: git_time { time: 1606318866, offset: 60, sign: 43 } }' + target: + id: 5f6ac466f5bfc342cb5931265cef6176e6277aa2 + parent_ids: + - 46f74a79e9e2eba4ca7060d473521d2657095ccd + message: | + emulate loop rejection only of it is active + + (cherry picked from commit 0a3faa16480548467764052f4ae56615a4558775) + author: Vova Vv + committer: samsucik + time: 'Time { raw: git_time { time: 1606487780, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: c46abfc7b77cd288b10037fbc1bb63fc7d129645 + parent_ids: + - 6f621bf93a331bc72c5a515478f00a8ebc3d7fec + message: | + Include review comments: make code & tests more readable + separate 2 tests. + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1606131462, offset: 0, sign: 43 } }' + target: + id: 3d4e89d7180d85d0745d1e9c56c03000952cc5be + parent_ids: + - 30eb959de0b701d3e7fac7f92b6a7d2440d0cda6 + message: | + Include review comments: make code & tests more readable + separate 2 tests. + + (cherry picked from commit c46abfc7b77cd288b10037fbc1bb63fc7d129645) + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1606487602, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 33b7560e378fc32216e7b4c52299de97805b06a0 + parent_ids: + - 67073749498e65821a04a48fabc88af75608091e + message: | + fix conversation starts with slots initial value + author: Vova Vv + committer: Vova Vv + time: 'Time { raw: git_time { time: 1605028357, offset: 60, sign: 43 } }' + target: + id: 8c58fc5a44c2f7a0ae2f59a63a9c09f3b526394b + parent_ids: + - 19ff9560302a2c99467266ae07fb2f72bc61b0f8 + message: | + fix conversation starts with slots initial value + + (cherry picked from commit 33b7560e378fc32216e7b4c52299de97805b06a0) + author: Vova Vv + committer: samsucik + time: 'Time { raw: git_time { time: 1606487206, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 3a993be51354a66453131acee84d50597fddf642 + parent_ids: + - 59e1aa28c74703cdeb8d62393a9e7c5e4768d6aa + message: | + fix docstring + author: Vova Vv + committer: Vova Vv + time: 'Time { raw: git_time { time: 1606137585, offset: 60, sign: 43 } }' + target: + id: e84d09a387807dd24beb9481aa6f7f464489b77f + parent_ids: + - b6c646d72fc6d194936c4d0c1851d77d4d40a50c + message: | + fix docstring + + (cherry picked from commit 3a993be51354a66453131acee84d50597fddf642) + author: Vova Vv + committer: samsucik + time: 'Time { raw: git_time { time: 1606487739, offset: 0, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: b6268c3a2610f87e23746b8e8c2056ae8a8804c5 + parent_ids: + - 319811f5fec69fd754d0ae25b3ac8a2bfe856867 + message: "Fix RabbitMQ url parsing (#12325)\n\n* fix url parsing of path and query params, add integration test\r\n\r\n* add changelog entry, fix regression bug to allow credentials in URL\r\n\r\n* fix issue with pruning conflict when tests were run in parallel\r\n\r\n* apply review suggestion\r\n\r\n* make method private" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1682602462, offset: 60, sign: 43 } }' + target: + id: 4ff8ae071172db78b0306b698d9885170f90c651 + parent_ids: + - 84ee8808c4f14f0a89cafbd26c9f14b6d8b79d73 + message: "Fix RabbitMQ url parsing (#12325) (#12337)\n\n* fix url parsing of path and query params, add integration test\r\n\r\n* add changelog entry, fix regression bug to allow credentials in URL\r\n\r\n* fix issue with pruning conflict when tests were run in parallel\r\n\r\n* apply review suggestion\r\n\r\n* make method private\r\n\r\n(cherry picked from commit b6268c3a2610f87e23746b8e8c2056ae8a8804c5)" + author: Anca Lita <27920906+ancalita@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1682678216, offset: 60, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 89eb526110a3e5eb824ed0cf4477617d71c792eb + parent_ids: + - b8b4fa647b438f275b6ed914795836413bf6a070 + message: | + Include review comments. + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1605621964, offset: 0, sign: 43 } }' + target: + id: 9e792ff2f068893364d9f314a1ea261d51ea13fd + parent_ids: + - 8caef99b05156bbf25ad0bb0098da501d78ea79f + message: | + Include review comments. + + (cherry picked from commit 89eb526110a3e5eb824ed0cf4477617d71c792eb) + author: samsucik + committer: samsucik + time: 'Time { raw: git_time { time: 1606487418, offset: 0, sign: 43 } }' + is_trivial: true diff --git a/dataset/mined-cherries-verification/Rust_neondatabase_neon.yaml b/dataset/mined-cherries-verification/Rust_neondatabase_neon.yaml new file mode 100644 index 00000000..4fb14631 --- /dev/null +++ b/dataset/mined-cherries-verification/Rust_neondatabase_neon.yaml @@ -0,0 +1,419 @@ +- total_number_of_branches: '932' + language: Rust + total_number_of_committers: '99' + repo_name: neondatabase/neon + total_number_of_results: '10' + total_number_of_commits: '11751' +- - search_method: MessageScan + cherry_and_target: + cherry: + id: 1b2663350cc36dfb1ef41131836c9273b7a0de22 + parent_ids: + - e8ae409bdc95ae41a8d4551063ab7c790078139f + message: | + basebackup import: pre-lock the layer map for the `flush()` calls + + The checkpointer loop isn't running anyway, so, there's no risk of + blocking it through the pre-lock. + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1683903089, offset: 120, sign: 43 } }' + target: + id: a1680b185f1bcd3f9b893568f27af591d7454995 + parent_ids: + - a1ae23b827ad2de80fda216f732ee0e7aef18253 + message: | + basebackup import: pre-lock the layer map for the `flush()` calls + + The checkpointer loop isn't running anyway, so, there's no risk of + blocking it through the pre-lock. + + (cherry picked from commit 1b2663350cc36dfb1ef41131836c9273b7a0de22) + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685120380, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1b2663350cc36dfb1ef41131836c9273b7a0de22 + parent_ids: + - e8ae409bdc95ae41a8d4551063ab7c790078139f + message: | + basebackup import: pre-lock the layer map for the `flush()` calls + + The checkpointer loop isn't running anyway, so, there's no risk of + blocking it through the pre-lock. + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1683903089, offset: 120, sign: 43 } }' + target: + id: 163ec3ccd184e28095d95c16601c06d822cb1331 + parent_ids: + - eba6c00de4ccb3b882909688f89496a03b764905 + message: | + basebackup import: pre-lock the layer map for the `flush()` calls + + The checkpointer loop isn't running anyway, so, there's no risk of + blocking it through the pre-lock. + + (cherry picked from commit 1b2663350cc36dfb1ef41131836c9273b7a0de22) + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685122510, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 9256788273d5661ced0b2661a8751e2aa86fbb59 + parent_ids: + - 9e1449353da1a5def821a35fec800f39946e5925 + message: "limit imitate accesses concurrency, using same semaphore as compactions (#5578)\n\nBefore this PR, when we restarted pageserver, we'd see a rush of\r\n`$number_of_tenants` concurrent eviction tasks starting to do imitate\r\naccesses building up in the period of `[init_order allows activations,\r\n$random_access_delay + EvictionPolicyLayerAccessThreshold::period]`.\r\n\r\nWe simply cannot handle that degree of concurrent IO.\r\n\r\nWe already solved the problem for compactions by adding a semaphore.\r\nSo, this PR shares that semaphore for use by evictions.\r\n\r\nPart of https://github.com/neondatabase/neon/issues/5479\r\n\r\nWhich is again part of https://github.com/neondatabase/neon/issues/4743\r\n\r\nRisks / Changes In System Behavior\r\n==================================\r\n\r\n* we don't do evictions as timely as we currently do\r\n* we log a bunch of warnings about eviction taking too long\r\n* imitate accesses and compactions compete for the same concurrency\r\nlimit, so, they'll slow each other down through this shares semaphore\r\n\r\n\r\nChanges\r\n=======\r\n\r\n- Move the `CONCURRENT_COMPACTIONS` semaphore into `tasks.rs`\r\n- Rename it to `CONCURRENT_BACKGROUND_TASKS`\r\n- Use it also for the eviction imitate accesses:\r\n - Imitate acceses are both per-TIMELINE and per-TENANT\r\n - The per-TENANT is done through coalescing all the per-TIMELINE\r\n tasks via a tokio mutex `eviction_task_tenant_state`.\r\n - We acquire the CONCURRENT_BACKGROUND_TASKS permit early, at the\r\n beginning of the eviction iteration, much before the imitate\r\n acesses start (and they may not even start at all in the given\r\n iteration, as they happen only every $threshold).\r\n - Acquiring early is **sub-optimal** because when the per-timline\r\n tasks coalesce on the `eviction_task_tenant_state` mutex,\r\n they are already holding a CONCURRENT_BACKGROUND_TASKS permit.\r\n - It's also unfair because tenants with many timelines win\r\n the CONCURRENT_BACKGROUND_TASKS more often.\r\n - I don't think there's another way though, without refactoring\r\n more of the imitate accesses logic, e.g, making it all per-tenant.\r\n- Add metrics for queue depth behind the semaphore.\r\nI found these very useful to understand what work is queued in the\r\nsystem.\r\n\r\n - The metrics are tagged by the new `BackgroundLoopKind`.\r\n - On a green slate, I would have used `TaskKind`, but we already had\r\n pre-existing labels whose names didn't map exactly to task kind.\r\n Also the task kind is kind of a lower-level detail, so, I think\r\nit's fine to have a separate enum to identify background work kinds.\r\n\r\n\r\nFuture Work\r\n===========\r\n\r\nI guess I could move the eviction tasks from a ticker to \"sleep for\r\n$period\".\r\nThe benefit would be that the semaphore automatically \"smears\" the\r\neviction task scheduling over time, so, we only have the rush on restart\r\nbut a smeared-out rush afterward.\r\n\r\nThe downside is that this perverts the meaning of \"$period\", as we'd\r\nactually not run the eviction at a fixed period. It also means the the\r\n\"took to long\" warning & metric becomes meaningless.\r\n\r\nThen again, that is already the case for the compaction and gc tasks,\r\nwhich do sleep for `$period` instead of using a ticker." + author: Christian Schwarz + committer: GitHub + time: 'Time { raw: git_time { time: 1697534988, offset: 120, sign: 43 } }' + target: + id: a6b2f4e54ef97333c35834788bf555fc10e3f4bb + parent_ids: + - 3666df6342b59efa235cba994aaaba19ec85e71b + message: | + limit imitate accesses concurrency, using same semaphore as compactions (#5578) + + Before this PR, when we restarted pageserver, we'd see a rush of + `$number_of_tenants` concurrent eviction tasks starting to do imitate + accesses building up in the period of `[init_order allows activations, + $random_access_delay + EvictionPolicyLayerAccessThreshold::period]`. + + We simply cannot handle that degree of concurrent IO. + + We already solved the problem for compactions by adding a semaphore. + So, this PR shares that semaphore for use by evictions. + + Part of https://github.com/neondatabase/neon/issues/5479 + + Which is again part of https://github.com/neondatabase/neon/issues/4743 + + Risks / Changes In System Behavior + ================================== + + * we don't do evictions as timely as we currently do + * we log a bunch of warnings about eviction taking too long + * imitate accesses and compactions compete for the same concurrency + limit, so, they'll slow each other down through this shares semaphore + + Changes + ======= + + - Move the `CONCURRENT_COMPACTIONS` semaphore into `tasks.rs` + - Rename it to `CONCURRENT_BACKGROUND_TASKS` + - Use it also for the eviction imitate accesses: + - Imitate acceses are both per-TIMELINE and per-TENANT + - The per-TENANT is done through coalescing all the per-TIMELINE + tasks via a tokio mutex `eviction_task_tenant_state`. + - We acquire the CONCURRENT_BACKGROUND_TASKS permit early, at the + beginning of the eviction iteration, much before the imitate + acesses start (and they may not even start at all in the given + iteration, as they happen only every $threshold). + - Acquiring early is **sub-optimal** because when the per-timline + tasks coalesce on the `eviction_task_tenant_state` mutex, + they are already holding a CONCURRENT_BACKGROUND_TASKS permit. + - It's also unfair because tenants with many timelines win + the CONCURRENT_BACKGROUND_TASKS more often. + - I don't think there's another way though, without refactoring + more of the imitate accesses logic, e.g, making it all per-tenant. + - Add metrics for queue depth behind the semaphore. + I found these very useful to understand what work is queued in the + system. + + - The metrics are tagged by the new `BackgroundLoopKind`. + - On a green slate, I would have used `TaskKind`, but we already had + pre-existing labels whose names didn't map exactly to task kind. + Also the task kind is kind of a lower-level detail, so, I think + it's fine to have a separate enum to identify background work kinds. + + Future Work + =========== + + I guess I could move the eviction tasks from a ticker to "sleep for + $period". + The benefit would be that the semaphore automatically "smears" the + eviction task scheduling over time, so, we only have the rush on restart + but a smeared-out rush afterward. + + The downside is that this perverts the meaning of "$period", as we'd + actually not run the eviction at a fixed period. It also means the the + "took to long" warning & metric becomes meaningless. + + Then again, that is already the case for the compaction and gc tasks, + which do sleep for `$period` instead of using a ticker. + + (cherry picked from commit 9256788273d5661ced0b2661a8751e2aa86fbb59) + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1697537786, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 7de3799e66946556e5ea3d2ac77fe594a4964316 + parent_ids: + - a1680b185f1bcd3f9b893568f27af591d7454995 + message: | + (does not compile): make TimelineWriter `Send` by using tokio::sync Mutex internally + + fails with + + cs@devvm:[~/src/neon]: cargo check -p pageserver --features testing + Checking pageserver v0.1.0 (/home/cs/src/neon/pageserver) + error: future cannot be sent between threads safely + --> pageserver/src/tenant/timeline/walreceiver/connection_manager.rs:426:33 + | + 426 | let connection_handle = TaskHandle::spawn(move |events_sender, cancellation| { + | ^^^^^^^^^^^^^^^^^ future created by async block is not `Send` + | + = help: within `Instrumented<[async block@pageserver/src/tenant/timeline/walreceiver/connection_manager.rs:427:13: 439:14]>`, the trait `std::marker::Send` is not implemented for `std::sync::RwLockReadGuard<'_, LayerMap>` + note: future is not `Send` as this value is used across an await + --> pageserver/src/tenant/timeline.rs:872:46 + | + 850 | let layers = self.layers.read().unwrap(); + | ------ has type `std::sync::RwLockReadGuard<'_, LayerMap>` which is not `Send` + ... + 872 | self.freeze_inmem_layer(true).await; + | ^^^^^^ await occurs here, with `layers` maybe used later + ... + 881 | } + | - `layers` is later dropped here + note: required by a bound in `TaskHandle::::spawn` + --> pageserver/src/tenant/timeline/walreceiver.rs:196:52 + | + 192 | fn spawn( + | ----- required by a bound in this + ... + 196 | Fut: Future> + Send, + | ^^^^ required by this bound in `TaskHandle::::spawn` + + error: could not compile `pageserver` due to previous error + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685120381, offset: 120, sign: 43 } }' + target: + id: 4d03689d9a7e9c74158e2f549731827755801869 + parent_ids: + - 47647433c34ae8d0b83f270d450d817cb79ce4a1 + message: | + (does not compile): make TimelineWriter `Send` by using tokio::sync Mutex internally + + fails with + + cs@devvm:[~/src/neon]: cargo check -p pageserver --features testing + Checking pageserver v0.1.0 (/home/cs/src/neon/pageserver) + error: future cannot be sent between threads safely + --> pageserver/src/tenant/timeline/walreceiver/connection_manager.rs:426:33 + | + 426 | let connection_handle = TaskHandle::spawn(move |events_sender, cancellation| { + | ^^^^^^^^^^^^^^^^^ future created by async block is not `Send` + | + = help: within `Instrumented<[async block@pageserver/src/tenant/timeline/walreceiver/connection_manager.rs:427:13: 439:14]>`, the trait `std::marker::Send` is not implemented for `std::sync::RwLockReadGuard<'_, LayerMap>` + note: future is not `Send` as this value is used across an await + --> pageserver/src/tenant/timeline.rs:872:46 + | + 850 | let layers = self.layers.read().unwrap(); + | ------ has type `std::sync::RwLockReadGuard<'_, LayerMap>` which is not `Send` + ... + 872 | self.freeze_inmem_layer(true).await; + | ^^^^^^ await occurs here, with `layers` maybe used later + ... + 881 | } + | - `layers` is later dropped here + note: required by a bound in `TaskHandle::::spawn` + --> pageserver/src/tenant/timeline/walreceiver.rs:196:52 + | + 192 | fn spawn( + | ----- required by a bound in this + ... + 196 | Fut: Future> + Send, + | ^^^^ required by this bound in `TaskHandle::::spawn` + + error: could not compile `pageserver` due to previous error + (cherry picked from commit 7de3799e66946556e5ea3d2ac77fe594a4964316) + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685124193, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5559b169535b67850129173e694e5297a5a1a960 + parent_ids: + - 1aea65eb9da46030f2b9740f3694b821663e0a90 + message: "bump shlex (#6421)\n\n## Problem\r\n\r\nhttps://rustsec.org/advisories/RUSTSEC-2024-0006\r\n\r\n## Summary of changes\r\n\r\n`cargo update -p shlex`" + author: Conrad Ludgate + committer: GitHub + time: 'Time { raw: git_time { time: 1705914870, offset: 0, sign: 43 } }' + target: + id: 7234208b36fea736ee1204c5a69a34db8160825e + parent_ids: + - 1aea65eb9da46030f2b9740f3694b821663e0a90 + message: | + bump shlex (#6421) + + ## Problem + + https://rustsec.org/advisories/RUSTSEC-2024-0006 + + ## Summary of changes + + `cargo update -p shlex` + + (cherry picked from commit 5559b169535b67850129173e694e5297a5a1a960) + author: Conrad Ludgate + committer: John Spray + time: 'Time { raw: git_time { time: 1705916973, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 15c0df4de7ee71b43526d9850b47c9107efe303e + parent_ids: + - 3290fb09bfacc3fca21a9a8d6be8a0237f8671bf + message: "fixup(#6037): actually fix the issue, #6388 failed to do so (#6429)\n\nBefore this patch, the select! still retured immediately if `futs` was\r\nempty. Must have tested a stale build in my manual testing of #6388." + author: Christian Schwarz + committer: GitHub + time: 'Time { raw: git_time { time: 1705933649, offset: 0, sign: 43 } }' + target: + id: f0b2d4b0535fae693af1a7a18b2bb03e7ce243fb + parent_ids: + - 299d9474c9e3abc67297d81ad301ca46aaf97535 + message: | + fixup(#6037): actually fix the issue, #6388 failed to do so (#6429) + + Before this patch, the select! still retured immediately if `futs` was + empty. Must have tested a stale build in my manual testing of #6388. + + (cherry picked from commit 15c0df4de7ee71b43526d9850b47c9107efe303e) + author: Christian Schwarz + committer: John Spray + time: 'Time { raw: git_time { time: 1705936992, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: a1ae23b827ad2de80fda216f732ee0e7aef18253 + parent_ids: + - 1ebe92bcf9b30495d9484eb357bc12ca6dfc981a + message: | + controversial but necessary: keep holding layer map lock inside compact_level0_phase1 + + Without this, the seocnd read().unwrap() becomes an await point, + which makes the future not-Send, but, we require it to be Send + because it runs inside task_mgr::spawn, which requires the Fut's to be Send + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685120370, offset: 120, sign: 43 } }' + target: + id: 24df184a4eb0c4f2c3a87bf5f2ec058ebfedf90e + parent_ids: + - 223aba4c095ed272822f969909e3043c45059e36 + message: | + controversial but necessary: keep holding layer map lock inside compact_level0_phase1 + + Without this, the seocnd read().unwrap() becomes an await point, + which makes the future not-Send, but, we require it to be Send + because it runs inside task_mgr::spawn, which requires the Fut's to be Send + + (cherry picked from commit a1ae23b827ad2de80fda216f732ee0e7aef18253) + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685124034, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 1b2663350cc36dfb1ef41131836c9273b7a0de22 + parent_ids: + - e8ae409bdc95ae41a8d4551063ab7c790078139f + message: | + basebackup import: pre-lock the layer map for the `flush()` calls + + The checkpointer loop isn't running anyway, so, there's no risk of + blocking it through the pre-lock. + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1683903089, offset: 120, sign: 43 } }' + target: + id: 47647433c34ae8d0b83f270d450d817cb79ce4a1 + parent_ids: + - 24df184a4eb0c4f2c3a87bf5f2ec058ebfedf90e + message: | + basebackup import: pre-lock the layer map for the `flush()` calls + + The checkpointer loop isn't running anyway, so, there's no risk of + blocking it through the pre-lock. + + (cherry picked from commit 1b2663350cc36dfb1ef41131836c9273b7a0de22) + (cherry picked from commit a1680b185f1bcd3f9b893568f27af591d7454995) + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685124040, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 93572a3e99f572f51529b3fbb3b11dafa88f7f5c + parent_ids: + - 15c0df4de7ee71b43526d9850b47c9107efe303e + message: "pageserver: mark tenant broken when cancelling attach (#6430)\n\n## Problem\r\n\r\nWhen a tenant is in Attaching state, and waiting for the\r\n`concurrent_tenant_warmup` semaphore, it also listens for the tenant\r\ncancellation token. When that token fires, Tenant::attach drops out.\r\nMeanwhile, Tenant::set_stopping waits forever for the tenant to exit\r\nAttaching state.\r\n\r\nFixes: https://github.com/neondatabase/neon/issues/6423\r\n\r\n## Summary of changes\r\n\r\n- In the absence of a valid state for the tenant, it is set to Broken in\r\nthis path. A more elegant solution will require more refactoring, beyond\r\nthis minimal fix." + author: John Spray + committer: GitHub + time: 'Time { raw: git_time { time: 1705938632, offset: 0, sign: 43 } }' + target: + id: 90e689addae554d7f79899379d3b84ff414404da + parent_ids: + - f0b2d4b0535fae693af1a7a18b2bb03e7ce243fb + message: | + pageserver: mark tenant broken when cancelling attach (#6430) + + ## Problem + + When a tenant is in Attaching state, and waiting for the + `concurrent_tenant_warmup` semaphore, it also listens for the tenant + cancellation token. When that token fires, Tenant::attach drops out. + Meanwhile, Tenant::set_stopping waits forever for the tenant to exit + Attaching state. + + Fixes: https://github.com/neondatabase/neon/issues/6423 + + ## Summary of changes + + - In the absence of a valid state for the tenant, it is set to Broken in + this path. A more elegant solution will require more refactoring, beyond + this minimal fix. + + (cherry picked from commit 93572a3e99f572f51529b3fbb3b11dafa88f7f5c) + author: John Spray + committer: John Spray + time: 'Time { raw: git_time { time: 1705940457, offset: 0, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 2001c31a14e723c25afef3afe00ba0111eb708a9 + parent_ids: + - 7de3799e66946556e5ea3d2ac77fe594a4964316 + message: | + turn Timeline::layers into tokio::sync::RwLock + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685120381, offset: 120, sign: 43 } }' + target: + id: 218af17c2949dde4d35c8b29045ca543249887e5 + parent_ids: + - 4d03689d9a7e9c74158e2f549731827755801869 + message: | + turn Timeline::layers into tokio::sync::RwLock + + (cherry picked from commit 2001c31a14e723c25afef3afe00ba0111eb708a9) + author: Christian Schwarz + committer: Christian Schwarz + time: 'Time { raw: git_time { time: 1685124665, offset: 120, sign: 43 } }' + is_trivial: false diff --git a/dataset/mined-cherries-verification/TypeScript_Unleash_unleash.yaml b/dataset/mined-cherries-verification/TypeScript_Unleash_unleash.yaml new file mode 100644 index 00000000..eb049bce --- /dev/null +++ b/dataset/mined-cherries-verification/TypeScript_Unleash_unleash.yaml @@ -0,0 +1,416 @@ +- total_number_of_committers: '56' + language: TypeScript + total_number_of_commits: '13088' + total_number_of_branches: '405' + total_number_of_results: '14' + repo_name: Unleash/unleash +- - search_method: MessageScan + cherry_and_target: + cherry: + id: 8d0477225667cf6fa0250d3c34c901784dd68482 + parent_ids: + - 5c27e7501482799dd9d637df551bd3b05bd3e871 + message: |+ + fix: duplicate column name in search query (#6989) + + author: Mateusz Kwasniewski + committer: GitHub + time: 'Time { raw: git_time { time: 1715016383, offset: 120, sign: 43 } }' + target: + id: 75897f138a3d2be7fccf7ad967963e12695c11d0 + parent_ids: + - c0e3f5d9c7f16f6186d33a4aade1aa6774ce78b9 + message: | + fix: duplicate column name in search query (#6989) + + (cherry picked from commit 8d0477225667cf6fa0250d3c34c901784dd68482) + author: Mateusz Kwasniewski + committer: kwasniew + time: 'Time { raw: git_time { time: 1715018019, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 46076fcbc8633ffd55299f1b649536a18e9ff9e7 + parent_ids: + - f0a929044d4efde680190a3c1c01e94cca0bfe02 + message: |+ + Fix: Conditionally hide Change Requests tab (#2329) + + author: andreas-unleash <104830839+andreas-unleash@users.noreply.github.com> + committer: GitHub + time: 'Time { raw: git_time { time: 1667549857, offset: 120, sign: 43 } }' + target: + id: 47cc168020703864f2014b12fea0f7588049b59e + parent_ids: + - 1061991ebbdd1f029414879342714ec7f3956914 + message: | + Fix: Conditionally hide Change Requests tab (#2329) + + (cherry picked from commit 46076fcbc8633ffd55299f1b649536a18e9ff9e7) + author: andreas-unleash <104830839+andreas-unleash@users.noreply.github.com> + committer: andreas-unleash <104830839+andreas-unleash@users.noreply.github.com> + time: 'Time { raw: git_time { time: 1667551556, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 84707e2bf3aa2241a60c92aa4fd40f5b6b6e34c2 + parent_ids: + - 33ec7e189410b29444b13da5bdf04427a9c32280 + message: "chore: create new flag to hide insights ui (#6638)\n\nCreates a new flag to control the executive dashboard ui\r\n\r\nCloses #\r\n[1-2208](https://linear.app/unleash/issue/1-2208/create-separate-ui-flag-decoupled-from-the-backend-flag)\r\n\r\n---------\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1710948106, offset: 120, sign: 43 } }' + target: + id: 1f98b17bb718e3938e80deba4e256740f4d6a198 + parent_ids: + - d11fd4dbe152e9659eed40ac266288655d04befc + message: | + chore: create new flag to hide insights ui (#6638) + + Creates a new flag to control the executive dashboard ui + + Closes # + [1-2208](https://linear.app/unleash/issue/1-2208/create-separate-ui-flag-decoupled-from-the-backend-flag) + + --------- + + Signed-off-by: andreas-unleash + + (cherry picked from commit 84707e2bf3aa2241a60c92aa4fd40f5b6b6e34c2) + Signed-off-by: andreas-unleash + author: andreas-unleash + committer: andreas-unleash + time: 'Time { raw: git_time { time: 1711009251, offset: 120, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: 5225452bfda0f6d379a92a4177a5ad468367d7a1 + parent_ids: + - 3643016a0ebe1bf304c8c68b9328a63cdd61795c + message: |+ + fix: remove stale stats widget (#7353) + + author: Mateusz Kwasniewski + committer: GitHub + time: 'Time { raw: git_time { time: 1718102364, offset: 120, sign: 43 } }' + target: + id: 2e47e4c35e4689985bc7bddb6add568e1fb29749 + parent_ids: + - 8664776388e7574efb72335e7781441a433268ea + message: | + fix: remove stale stats widget (#7353) + + (cherry picked from commit 5225452bfda0f6d379a92a4177a5ad468367d7a1) + author: Mateusz Kwasniewski + committer: kwasniew + time: 'Time { raw: git_time { time: 1718191844, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b132cce986eed73e6cc5d0bc94beb9ec441b6219 + parent_ids: + - 97f22d496facde35296ae73760a242882f337a8b + message: "fix: project api token type to lowercase (#3717)\n\n\r\n\r\n## About the changes\r\n\r\n\r\n\r\nCloses #\r\n\r\n\r\n\r\n\r\n### Important files\r\n\r\n\r\n\r\n## Discussion points\r\n\r\n\r\n---------\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1683624141, offset: 0, sign: 43 } }' + target: + id: 17be02fee832c4d2e37b38691cc0c0a3bcd68cfd + parent_ids: + - e2ce1cca519e6aa3193afbac6cbe3205d52ce912 + message: | + fix: project api token type to lowercase (#3717) + + + + ## About the changes + + + + Closes # + + + + + ### Important files + + + Cher + ## Discussion points + + + --------- + + Signed-off-by: andreas-unleash + + (cherry picked from commit b132cce986eed73e6cc5d0bc94beb9ec441b6219) + Signed-off-by: andreas-unleash + author: andreas-unleash + committer: andreas-unleash + time: 'Time { raw: git_time { time: 1683720402, offset: 180, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: cedf19d2ecb1f51dbc861d63ce438f56c26dde7c + parent_ids: + - d69d826586d7a39d1582613d246df3871020949b + message: |+ + fix: lifecycle metrics on metrics insert (#7322) + + author: Mateusz Kwasniewski + committer: GitHub + time: 'Time { raw: git_time { time: 1717768515, offset: 120, sign: 43 } }' + target: + id: b4ae2ab2311b9ba09847724395a2a2a5443cfb1e + parent_ids: + - d7d9929e33f3acbcf8a9c19d82422f5b708017f2 + message: | + fix: lifecycle metrics on metrics insert (#7322) + + (cherry picked from commit cedf19d2ecb1f51dbc861d63ce438f56c26dde7c) + author: Mateusz Kwasniewski + committer: kwasniew + time: 'Time { raw: git_time { time: 1718008646, offset: 120, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b07c032d56a428edecd5af5be21d6c981f16e800 + parent_ids: + - bd8b54b5bd0370a4de66b0308b27c61e8c21fd34 + message: "fix: update potentially-stale status dynamically (#4905)\n\nFixes 2 bugs:\r\n\r\n- project-health-service keeping the feature types as an instance\r\nvariable and only updating it once was preventing real calculation to\r\nhappen if the lifetime value changed for a feature toggle type\r\n- the ui was reading from a predefined map for the lifetime values so\r\nthey would never reflect the BE change\r\n\r\nCloses #\r\n[SR-66](https://linear.app/unleash/issue/SR-66/slack-question-around-potentially-stale-and-its-uses)\r\n\r\n\"Screenshot\r\n\"Screenshot\r\n\r\n---------\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1696412836, offset: 180, sign: 43 } }' + target: + id: a14f0e2e859c5a16a10dcb75f7bf656d74f41389 + parent_ids: + - e6114c6ce613180f5096764ea47808820e122c72 + message: | + fix: update potentially-stale status dynamically (#4905) + + Fixes 2 bugs: + + - project-health-service keeping the feature types as an instance + variable and only updating it once was preventing real calculation to + happen if the lifetime value changed for a feature toggle type + - the ui was reading from a predefined map for the lifetime values so + they would never reflect the BE change + + Closes # + [SR-66](https://linear.app/unleash/issue/SR-66/slack-question-around-potentially-stale-and-its-uses) + + Screenshot 2023-10-02 at 14 37 17 + Screenshot 2023-10-02 at 14 37 06 + + --------- + + Signed-off-by: andreas-unleash + + (cherry picked from commit b07c032d56a428edecd5af5be21d6c981f16e800) + Signed-off-by: andreas-unleash + author: andreas-unleash + committer: andreas-unleash + time: 'Time { raw: git_time { time: 1696413337, offset: 180, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: ce7644e0af884f80887a179ccdc769d924c2d930 + parent_ids: + - 5782efa6c09c2a505004ccf24fe2c948d368923b + message: |+ + chore: log unerlying DB error in set user root role (#5324) + + author: Mateusz Kwasniewski + committer: GitHub + time: 'Time { raw: git_time { time: 1699874161, offset: 60, sign: 43 } }' + target: + id: 659b9c8b9cf04890961d83c3777d37114f869661 + parent_ids: + - 5b81314be1c8c06471e079d4a0d77cf90b65cd37 + message: | + chore: log unerlying DB error in set user root role (#5324) + + (cherry picked from commit ce7644e0af884f80887a179ccdc769d924c2d930) + author: Mateusz Kwasniewski + committer: kwasniew + time: 'Time { raw: git_time { time: 1699971416, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 76a834ca91ca821d7fa1a0fb1794754a9767c8a9 + parent_ids: + - 7843c93dc51a8724e58e7c8d5d09804ce21e8d62 + message: "fix: sort toggleNames before updating last seen (#4747)\n\nSeems like when 2 pods are trying to POST lastSeen metrics, the db gets\r\ninto a deadlock state.\r\n\r\nThis is an attempt to fix the deadlock by sorting the toggleNames before\r\nthe update.\r\n\r\nThe hypothesis is that sorted toggle names will reduce the chance of\r\nworking on the same row at the same exact time\r\n\r\nCloses #\r\n[1-1382](https://linear.app/unleash/issue/1-1382/order-data-before-updating-the-lastseen-to-reduce-change-of-deadlock)\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1694772158, offset: 180, sign: 43 } }' + target: + id: 790a7baf11b687d4e9e452307124dd1c38c21f07 + parent_ids: + - e0409469efed9521143bd086ffecb1bf1af3be71 + message: | + fix: sort toggleNames before updating last seen (#4747) + + Seems like when 2 pods are trying to POST lastSeen metrics, the db gets + into a deadlock state. + + This is an attempt to fix the deadlock by sorting the toggleNames before + the update. + + The hypothesis is that sorted toggle names will reduce the chance of + working on the same row at the same exact time + + Closes # + [1-1382](https://linear.app/unleash/issue/1-1382/order-data-before-updating-the-lastseen-to-reduce-change-of-deadlock) + + Signed-off-by: andreas-unleash + (cherry picked from commit 76a834ca91ca821d7fa1a0fb1794754a9767c8a9) + author: andreas-unleash + committer: andreas-unleash + time: 'Time { raw: git_time { time: 1694772240, offset: 180, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 02da9b1d34690535f51ed86370972e522047741d + parent_ids: + - f3df3a31bf5372545410b498512a19fb2462b21e + message: |+ + fix: handle concurrent service account updates (#5349) + + author: Gastón Fournier + committer: GitHub + time: 'Time { raw: git_time { time: 1700132607, offset: 0, sign: 43 } }' + target: + id: 175357ea3f50e1729b5bd278c2a4aa0bd853bf45 + parent_ids: + - 4ca84560377824382f62488c62f36e6a69fb6df1 + message: | + fix: handle concurrent service account updates (#5349) + + (cherry picked from commit 02da9b1d34690535f51ed86370972e522047741d) + author: Gastón Fournier + committer: kwasniew + time: 'Time { raw: git_time { time: 1700133045, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 44d85c0dcd5ace6f2094e82494a2fcc2eef34a79 + parent_ids: + - 2b66ba41dce05bf565761111da42fc7b25851a61 + message: "fix: UI navigation (reverts #5506) (#5512)\n\nLatest version had a UI navigation bug where we wouldn't correctly\r\nnavigate to the tab and instead it would add infinitely to the\r\nbreadcrumbs / URL:\r\n\r\n\r\nhttps://github.com/Unleash/unleash/assets/14320932/509fa528-7f9e-4476-a945-f74393e99dd5\r\n\r\nThis fixes the UI navigation by reverting\r\nhttps://github.com/Unleash/unleash/pull/5506" + author: Nuno Góis + committer: GitHub + time: 'Time { raw: git_time { time: 1701335596, offset: 0, sign: 43 } }' + target: + id: 1a56985a3681670e7046a489b06c1aeff395bd42 + parent_ids: + - 2b66ba41dce05bf565761111da42fc7b25851a61 + message: | + fix: UI navigation (reverts #5506) (#5512) + + Latest version had a UI navigation bug where we wouldn't correctly + navigate to the tab and instead it would add infinitely to the + breadcrumbs / URL: + + https://github.com/Unleash/unleash/assets/14320932/509fa528-7f9e-4476-a945-f74393e99dd5 + + This fixes the UI navigation by reverting + https://github.com/Unleash/unleash/pull/5506 + + (cherry picked from commit 44d85c0dcd5ace6f2094e82494a2fcc2eef34a79) + author: Nuno Góis + committer: kwasniew + time: 'Time { raw: git_time { time: 1701335867, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: b07c032d56a428edecd5af5be21d6c981f16e800 + parent_ids: + - bd8b54b5bd0370a4de66b0308b27c61e8c21fd34 + message: "fix: update potentially-stale status dynamically (#4905)\n\nFixes 2 bugs:\r\n\r\n- project-health-service keeping the feature types as an instance\r\nvariable and only updating it once was preventing real calculation to\r\nhappen if the lifetime value changed for a feature toggle type\r\n- the ui was reading from a predefined map for the lifetime values so\r\nthey would never reflect the BE change\r\n\r\nCloses #\r\n[SR-66](https://linear.app/unleash/issue/SR-66/slack-question-around-potentially-stale-and-its-uses)\r\n\r\n\"Screenshot\r\n\"Screenshot\r\n\r\n---------\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1696412836, offset: 180, sign: 43 } }' + target: + id: ba40ed1f136a77642c45eba4dc3f40610555e19e + parent_ids: + - 32305bb44ec5e7b020d64bff114d0bfa118b2597 + message: "fix: update potentially-stale status dynamically (#4905) (#4920)\n\nFixes 2 bugs:\r\n\r\n- project-health-service keeping the feature types as an instance\r\nvariable and only updating it once was preventing real calculation to\r\nhappen if the lifetime value changed for a feature toggle type\r\n- the ui was reading from a predefined map for the lifetime values so\r\nthey would never reflect the BE change\r\n\r\nCloses #\r\n\r\n[SR-66](https://linear.app/unleash/issue/SR-66/slack-question-around-potentially-stale-and-its-uses)\r\n\r\n\"Screenshot\r\n\"Screenshot\r\n\r\n---------\r\n\r\n\r\n\r\n(cherry picked from commit b07c032d56a428edecd5af5be21d6c981f16e800)\r\n\r\n\r\n\r\n## About the changes\r\n\r\n\r\n\r\nCloses #\r\n\r\n\r\n\r\n\r\n### Important files\r\n\r\n\r\n\r\n## Discussion points\r\n\r\n\r\n---------\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1696414723, offset: 180, sign: 43 } }' + is_trivial: false + - search_method: MessageScan + cherry_and_target: + cherry: + id: f8a9d7f3558e62962cd722dbda1539359f920003 + parent_ids: + - 2dd2d520e344e2ee765d20621a242add92dcf4e2 + message: "fix: take into account project segments permission in form (#5352)\n\nhttps://linear.app/unleash/issue/SR-184/ticket-1106-users-with-createedit-project-segment-dont-see-all-the\r\n\r\nhttps://github.com/Unleash/unleash/pull/5304 did not take into account\r\npermissions further into the Segment form.\r\n\r\nThis PR fixes the remaining permission checks to take into consideration\r\nthe project-level permission: `UPDATE_PROJECT_SEGMENT`." + author: Nuno Góis + committer: GitHub + time: 'Time { raw: git_time { time: 1700135696, offset: 0, sign: 43 } }' + target: + id: 528ca2f90037634f3a7c42aa2faec3e60681077d + parent_ids: + - 510fa30dbf899cffc260f36066345eae74bf53b9 + message: | + fix: take into account project segments permission in form (#5352) + + https://linear.app/unleash/issue/SR-184/ticket-1106-users-with-createedit-project-segment-dont-see-all-the + + https://github.com/Unleash/unleash/pull/5304 did not take into account + permissions further into the Segment form. + + This PR fixes the remaining permission checks to take into consideration + the project-level permission: `UPDATE_PROJECT_SEGMENT`. + + (cherry picked from commit f8a9d7f3558e62962cd722dbda1539359f920003) + author: Nuno Góis + committer: kwasniew + time: 'Time { raw: git_time { time: 1700137411, offset: 60, sign: 43 } }' + is_trivial: true + - search_method: MessageScan + cherry_and_target: + cherry: + id: 84707e2bf3aa2241a60c92aa4fd40f5b6b6e34c2 + parent_ids: + - 33ec7e189410b29444b13da5bdf04427a9c32280 + message: "chore: create new flag to hide insights ui (#6638)\n\nCreates a new flag to control the executive dashboard ui\r\n\r\nCloses #\r\n[1-2208](https://linear.app/unleash/issue/1-2208/create-separate-ui-flag-decoupled-from-the-backend-flag)\r\n\r\n---------\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1710948106, offset: 120, sign: 43 } }' + target: + id: d24111bdd189652527f6b832aa747cc0995a25ae + parent_ids: + - d11fd4dbe152e9659eed40ac266288655d04befc + message: "chore: create new flag to hide insights ui (#6638) (#6653)\n\nCreates a new flag to control the executive dashboard ui\r\n\r\nCloses #\r\n\r\n[1-2208](https://linear.app/unleash/issue/1-2208/create-separate-ui-flag-decoupled-from-the-backend-flag)\r\n\r\n---------\r\n\r\n\r\n\r\n(cherry picked from commit 84707e2bf3aa2241a60c92aa4fd40f5b6b6e34c2)\r\n\r\n\r\n\r\n## About the changes\r\n\r\n\r\n\r\nCloses #\r\n\r\n\r\n\r\n\r\n### Important files\r\n\r\n\r\n\r\n## Discussion points\r\n\r\n\r\n---------\r\n\r\nSigned-off-by: andreas-unleash " + author: andreas-unleash + committer: GitHub + time: 'Time { raw: git_time { time: 1711011034, offset: 120, sign: 43 } }' + is_trivial: false diff --git a/evaluation-workdir/data/mined-cherries.z01 b/dataset/mined-cherries.z01 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z01 rename to dataset/mined-cherries.z01 diff --git a/evaluation-workdir/data/mined-cherries.z02 b/dataset/mined-cherries.z02 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z02 rename to dataset/mined-cherries.z02 diff --git a/evaluation-workdir/data/mined-cherries.z03 b/dataset/mined-cherries.z03 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z03 rename to dataset/mined-cherries.z03 diff --git a/evaluation-workdir/data/mined-cherries.z04 b/dataset/mined-cherries.z04 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z04 rename to dataset/mined-cherries.z04 diff --git a/evaluation-workdir/data/mined-cherries.z05 b/dataset/mined-cherries.z05 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z05 rename to dataset/mined-cherries.z05 diff --git a/evaluation-workdir/data/mined-cherries.z06 b/dataset/mined-cherries.z06 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z06 rename to dataset/mined-cherries.z06 diff --git a/evaluation-workdir/data/mined-cherries.z07 b/dataset/mined-cherries.z07 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z07 rename to dataset/mined-cherries.z07 diff --git a/evaluation-workdir/data/mined-cherries.z08 b/dataset/mined-cherries.z08 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z08 rename to dataset/mined-cherries.z08 diff --git a/evaluation-workdir/data/mined-cherries.z09 b/dataset/mined-cherries.z09 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z09 rename to dataset/mined-cherries.z09 diff --git a/evaluation-workdir/data/mined-cherries.z10 b/dataset/mined-cherries.z10 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z10 rename to dataset/mined-cherries.z10 diff --git a/evaluation-workdir/data/mined-cherries.z11 b/dataset/mined-cherries.z11 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z11 rename to dataset/mined-cherries.z11 diff --git a/evaluation-workdir/data/mined-cherries.z12 b/dataset/mined-cherries.z12 similarity index 100% rename from evaluation-workdir/data/mined-cherries.z12 rename to dataset/mined-cherries.z12 diff --git a/evaluation-workdir/data/mined-cherries.zip b/dataset/mined-cherries.zip similarity index 100% rename from evaluation-workdir/data/mined-cherries.zip rename to dataset/mined-cherries.zip diff --git a/evaluation-workdir/data/repo-sample.zip b/dataset/repo-sample.zip similarity index 100% rename from evaluation-workdir/data/repo-sample.zip rename to dataset/repo-sample.zip diff --git a/docker/config-reproduction.properties b/docker/config-reproduction.properties index afbbfad9..8670f664 100644 --- a/docker/config-reproduction.properties +++ b/docker/config-reproduction.properties @@ -1,27 +1,50 @@ -# inclusive start and end -experiment.repeats.start=1 -experiment.repeats.end=1 -experiment.startid=0 -experiment.sample-file = /home/user/evaluation-workdir/last-sample.ser +### +### GENERAL ### +### +# Should all repositories be cloned ahead of the evaluation? Warning: this may require several hundred GigaBytes of free disk space. +preload-repositories=false +# Delete each repository after it has been processed? This helps with freeing disk space, but makes it more difficult to rerun the evaluation later +# because the repositories have to be cloned again +clean-repositories=false +# The number of EXPERIMENT_TIMEOUT_UNIT to wait for a patcher to finish patching (long) +experiment.timeout.length=5 +# The time unit for the timeout, e.g., SECONDS, MINUTES, ... +experiment.timeout.unit=MINUTES +# Number of threads for parallel execution. Note that the bottleneck might be the IO capacity of your disk. +experiment.thread-count=5 +# Minimum number of cherries in a repository for it to be considered for the evaluation. +# -1 means all repositories are included. +experiment.dataset.min-size=-1 +# Maximum number of commits in a repository for a dataset to be considered for the study. If a repository has +# more commits, it is simply ignored. Values of 0 or less are automatically converted to Integer.MAX_VALUE. +# -1 means all repositories are included. +experiment.dataset.max-size=-1 +# Which patchers should be enabled? +experiment.patcher.gnu-patch=true +experiment.patcher.git-apply=true +experiment.patcher.git-cp=true +experiment.patcher.mpatch=true + +### +### PATHS ### +# THESE SHOULD ONLY BE CHANGED IF YOU KNOW WHAT YOU ARE DOING +### # The path to the list of dataset -experiment.datasets=/home/user/evaluation-workdir/data/cherries +experiment.datasets=/home/user/dataset/mined-cherries # The path to the main working directory of the experiment -experiment.dir.main=/home/user/evaluation-workdir/main/cherries +experiment.dir.main=/home/user/evaluation-workdir/main/reproduction # The path to the results directory -experiment.dir.results=/home/user/evaluation-workdir/results/cherries +experiment.dir.results=/home/user/evaluation-workdir/results/reproduction # The path to the directory to which the SPL repos are cloned -experiment.dir.repos=/home/user/evaluation-workdir/REPOS/cherries -experiment.processed-file=/home/user/evaluation-workdir/results/processed.txt -# Whether debug files should be written -experiment.debug=false -experiment.cherry-type = Complex -# Minimum number of cherries in a repository for it to be considered for the evaluation. -experiment.dataset.min-size=0 -# Maximum number of commits in a repository for a dataset to be considered for the study. If a repository has -# more commits, it is simply ignored. Values of 0 or less are automatically converted to Integer.MAX_VALUE. -experiment.dataset.max-size=0 -# Number of threads for parallel execution. Note that the bottleneck might be the IO capacity of your disk. -experiment.thread-count=10 +experiment.dir.repos=/home/user/evaluation-workdir/REPOS +# The path to the file in which already processed evaluation runs are tracked +experiment.processed-file=/home/user/evaluation-workdir/results/processed-reproduction.txt + +### +### SAMPLING ### +# ALLOWS FOR RUNNING THE EVALUATION ON A SMALL SUBSET OF THE DATASET +### +experiment.sample-file = /home/user/evaluation-workdir/reproduction-sample.ser # Should the amount of data be reduced by sampling with a certain confidence? experiment.enable-sampling=false # (z) is the Z-score corresponding to your desired confidence level (for a 95% confidence level, (z = 1.96)), @@ -36,3 +59,21 @@ sampling.e=0.01 sampling.p=0.5 # A seed that is used during sampling to ensure repeatability of the experiments sampling.seed=3 +# The number of random experiment repetitions. In each run, random variants are generated and a random source variant +# is selected from which a diff is calculated. The higher the number of repeats the more random repeats for two specific +# SPL-commit pairs (inclusive start and end) +# This setting only makes sense when samping is enabled +experiment.repeats.start=1 +experiment.repeats.end=1 + + +### +### MISC ### +# THESE SHOULD ONLY BE CHANGED IF YOU KNOW WHAT YOU ARE DOING +### +# From which run id to start the experiments. Can be used to skip experimental runs. +experiment.startid=0 +# Which type of cherry-picks should be considered (Trivial, Complex, Both)? +experiment.cherry-type = Complex +# Whether debug files should be written. If true, the evaluation will create DEBUG directories with various files in the working directories. +experiment.debug=false diff --git a/docker/config-verification.properties b/docker/config-verification.properties new file mode 100644 index 00000000..a32cccc3 --- /dev/null +++ b/docker/config-verification.properties @@ -0,0 +1,75 @@ +### +### GENERAL ### +### +# Should all repositories be cloned ahead of the evaluation? Warning: this may require several hundred GigaBytes of free disk space. +preload-repositories=false +# Delete each repository after it has been processed? This helps with freeing disk space, but makes it more difficult to rerun the evaluation later +# because the repositories have to be cloned again +clean-repositories=false +# The number of EXPERIMENT_TIMEOUT_UNIT to wait for a patcher to finish patching (long) +experiment.timeout.length=5 +# The time unit for the timeout, e.g., SECONDS, MINUTES, ... +experiment.timeout.unit=MINUTES +# Number of threads for parallel execution. Note that the bottleneck might be the IO capacity of your disk. +experiment.thread-count=5 +# Minimum number of cherries in a repository for it to be considered for the evaluation. +# -1 means all repositories are included. +experiment.dataset.min-size=-1 +# Maximum number of commits in a repository for a dataset to be considered for the study. If a repository has +# more commits, it is simply ignored. Values of 0 or less are automatically converted to Integer.MAX_VALUE. +# -1 means all repositories are included. +experiment.dataset.max-size=-1 +# Which patchers should be enabled? +experiment.patcher.gnu-patch=true +experiment.patcher.git-apply=true +experiment.patcher.git-cp=true +experiment.patcher.mpatch=true + +### +### PATHS ### +### +# The path to the list of dataset +experiment.datasets=/home/user/dataset/mined-cherries-verification +# The path to the main working directory of the experiment +experiment.dir.main=/home/user/evaluation-workdir/main/verification +# The path to the results directory +experiment.dir.results=/home/user/evaluation-workdir/results/verification +# The path to the directory to which the SPL repos are cloned +experiment.dir.repos=/home/user/evaluation-workdir/REPOS +experiment.processed-file=/home/user/evaluation-workdir/results/processed-verification.txt + +### +### SAMPLING ### +### +experiment.sample-file = /home/user/evaluation-workdir/verification-sample.ser +# Should the amount of data be reduced by sampling with a certain confidence? +experiment.enable-sampling=false +# (z) is the Z-score corresponding to your desired confidence level (for a 95% confidence level, (z = 1.96)), +# sampling.z=1.96 +# Score for 99% +sampling.z=2.58 +# (e) is the margin of error (in percent, such as 5% = 0.05). +# sampling.e=0.05 +# smaller error +sampling.e=0.01 +# (p) is the sample proportion (in percent, such as 50% = 0.5), +sampling.p=0.5 +# A seed that is used during sampling to ensure repeatability of the experiments +sampling.seed=3 +# The number of random experiment repetitions. In each run, random variants are generated and a random source variant +# is selected from which a diff is calculated. The higher the number of repeats the more random repeats for two specific +# SPL-commit pairs (inclusive start and end) +# This setting only makes sense when samping is enabled +experiment.repeats.start=1 +experiment.repeats.end=1 + + +### +### MISC ### +### +# From which run id to start the experiments. Can be used to skip experimental runs. +experiment.startid=0 +# Which type of cherry-picks should be considered (Trivial, Complex, Both)? +experiment.cherry-type = Complex +# Whether debug files should be written. If true, the evaluation will create DEBUG directories with various files in the working directories. +experiment.debug=false diff --git a/docker/run-simulation.sh b/docker/run-simulation.sh index 0eded85f..12fa6fc8 100755 --- a/docker/run-simulation.sh +++ b/docker/run-simulation.sh @@ -1,27 +1,59 @@ #! /bin/bash -cherries() { - echo "Running evaluation on cherry picks." - - if [ "$1" == 'replication' ]; then - java -jar -Dtinylog.configuration=/home/user/tinylog.properties cherries.jar config-reproduction.properties - java -jar result-analysis-cherries.jar config-reproduction.properties - elif [ "$1" == 'cleanup' ]; then - echo "Running cleanup of old result files." - rm -r /home/user/evaluation-workdir/results/ - rm -r /home/user/evaluation-workdir/main/ - mkdir /home/user/evaluation-workdir/results - mkdir /home/user/evaluation-workdir/main - else - echo "Invalid argument: $1" - fi +start() { + echo "User id: $(id -u)" + echo "Group id: $(id -g)" + + if [ "$1" == 'reproduction' ]; then + echo "Running full reproduction of evaluation on the entire patch dataset." + java -jar -Dtinylog.configuration=/home/user/tinylog.properties cherries.jar config-reproduction.properties + + analysis $1 + elif [ "$1" == 'verification' ]; then + echo "Verifying the evaluation setup on a tiny subset of the patch dataset." + java -jar -Dtinylog.configuration=/home/user/tinylog.properties cherries.jar config-verification.properties + + analysis $1 + elif [ "$1" == 'cleanup' ]; then + echo "Running cleanup of old result files." + rm -r /home/user/evaluation-workdir/results/* + rm -r /home/user/evaluation-workdir/main/* + rm -r /home/user/evaluation-workdir/tables/* + elif [ "$1" == 'analysis' ]; then + if [ "$2" == '' ]; then + echo "missing second argument" + echo "./execute.sh analysis [reproduction|verification] # Run a quick verification of the setup" + else + analysis $2 + fi + else + echo "Invalid argument: $1" + fi } -if [ "$1" == '' ]; then - echo "./execute.sh cherries replication" - exit -fi +analysis() { + cd /home/user/analysis + poetry run python result_analysis/__main__.py --repo_sample /home/user/dataset/repo-sample.yaml --results_dir /home/user/evaluation-workdir/results/"$1" --metrics_file /home/user/metrics-"$1".tex -if [ "$1" == 'cherries' ]; then - cherries $2 + cd /home/user/ + latexmk -pdf -interaction=nonstopmode -synctex=1 -shell-escape metrics-$1.tex + cp metrics-$1.pdf evaluation-workdir || exit + + echo "++++++++++++++++++++++++++++++++++++" + echo " Analysis done " + echo "++++++++++++++++++++++++++++++++++++" + + echo "" + echo "The result table can be found under evaluation-workdir/metrics-$1.pdf" +} + +if [ "$1" == '' ]; then + echo "Argument required. The following options are available:" + echo "./execute.sh reproduction # Reproduce the evaluation" + echo "./execute.sh verification # Run a quick verification of the setup" + echo "./execute.sh analysis [reproduction|verification] # Run a quick verification of the setup" + echo "./execute.sh cleanup # Clean the evaluation files" + exit +else + start $1 $2 fi diff --git a/evaluation-workdir/results/rep-1.zip b/evaluation-workdir/results/rep-1.zip index c2691b2e..be347a47 100644 Binary files a/evaluation-workdir/results/rep-1.zip and b/evaluation-workdir/results/rep-1.zip differ diff --git a/execute.sh b/execute.sh index 7631c827..2ffc22c7 100755 --- a/execute.sh +++ b/execute.sh @@ -1,5 +1,5 @@ #! /bin/bash echo "Starting $1" -docker run --rm -v "$(pwd)/evaluation-workdir/":"/home/user/evaluation-workdir" pwm-eval "$@" +docker run --rm -v "$(pwd)/evaluation-workdir/":"/home/user/evaluation-workdir" mpatch-reproduction "$@" echo "Done." diff --git a/misc/verification-results.png b/misc/verification-results.png new file mode 100644 index 00000000..1e048c3a Binary files /dev/null and b/misc/verification-results.png differ diff --git a/mpatch/.github/workflows/rust.yml b/mpatch/.github/workflows/rust.yml new file mode 100644 index 00000000..101c9d95 --- /dev/null +++ b/mpatch/.github/workflows/rust.yml @@ -0,0 +1,28 @@ +name: nightly clippy + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + components: clippy + override: true + - uses: actions/checkout@v3 + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/mpatch/LICENSE_APACHE b/mpatch/LICENSE_APACHE new file mode 100644 index 00000000..3a4f372e --- /dev/null +++ b/mpatch/LICENSE_APACHE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2023 Alexander Schultheiß + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mpatch/LICENSE_MIT b/mpatch/LICENSE_MIT new file mode 100644 index 00000000..1be25f0f --- /dev/null +++ b/mpatch/LICENSE_MIT @@ -0,0 +1,8 @@ +Copyright 2023 Alexander Schultheiß + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/mpatch/README.md b/mpatch/README.md index 86d32c2e..b5b76037 100644 --- a/mpatch/README.md +++ b/mpatch/README.md @@ -1,13 +1,10 @@ # mpatch - A patching tool and library -## Requirements -To install `mpatch`, you require an up-to-date installation of the rust toolchain. You can find instructions on how to install Rust on the [official website](https://www.rust-lang.org/tools/install). - ## Usage ### As a CLI tool -You can install `mpatch` locally by calling `cargo install --path .` in the root of the `mpatch` directory. Afterwards, you can call `mpatch --help` to get usage instructions. +You can install `mpatch` locally by cloning the git repo, installing Rust, and calling `cargo install --path .` in the root of the repository. Afterwards, you can call `mpatch --help` to get usage instructions. ### As a library -You can also use `mpatch` as a library in your own Rust projects. You can read the documentation by calling `cargo doc --open` in the root directory of the repository. +You can use `mpatch` as a library in your own Rust projects. You can read the documentation by calling `cargo doc --open` in the root directory of the repository. diff --git a/mpatch/src/bin/mpatch.rs b/mpatch/src/bin/mpatch.rs index 515c84c3..3bfa70a2 100644 --- a/mpatch/src/bin/mpatch.rs +++ b/mpatch/src/bin/mpatch.rs @@ -17,7 +17,7 @@ fn main() -> Result<(), Box> { ); if let Err(error) = mpatch::apply_all(patch_paths, cli.strip, cli.dryrun, matcher, filter) { - eprintln!("{}", error); + eprintln!("{error}"); return Err(Box::new(error)); } diff --git a/mpatch/src/diffs.rs b/mpatch/src/diffs.rs index c097f2f1..94d0251c 100644 --- a/mpatch/src/diffs.rs +++ b/mpatch/src/diffs.rs @@ -162,7 +162,7 @@ impl FileDiff { /// # Returns /// Returns a ChangedLines iterator that iterates all HunkLine instances containing changes. /// - pub fn changes(&self) -> ChangedLines { + pub fn changes<'a>(&'a self) -> ChangedLines<'a> { let changes: Vec<&HunkLine> = self .hunks() .iter() diff --git a/mpatch/src/io.rs b/mpatch/src/io.rs index d5caf695..a92cc15f 100644 --- a/mpatch/src/io.rs +++ b/mpatch/src/io.rs @@ -21,7 +21,7 @@ pub fn write_rejects( rejects: &[Change], file_writer: &mut BufWriter, ) -> Result<(), Error> { - file_writer.write_fmt(format_args!("{}\n", diff_header))?; + file_writer.write_fmt(format_args!("{diff_header}\n"))?; for reject in rejects { file_writer.write_fmt(format_args!("{}: {}", reject.change_id(), reject))? } diff --git a/mpatch/src/lib.rs b/mpatch/src/lib.rs index af7b3fc4..80da77fb 100644 --- a/mpatch/src/lib.rs +++ b/mpatch/src/lib.rs @@ -38,10 +38,7 @@ //! } //! ``` -// TODO: Feature traces and target configuration are part of the input! // TODO: Handle git diffs as well; they have differences e.g., /dev/null, permission change -// TODO: Handle certain edge cases in which code is added at then end of the file (the existing -// last line should not be pushed down) /// Module for types that implement reading and parsing diff files. pub mod diffs; diff --git a/mpatch/src/patch.rs b/mpatch/src/patch.rs index 850af955..935b8065 100644 --- a/mpatch/src/patch.rs +++ b/mpatch/src/patch.rs @@ -59,8 +59,6 @@ use self::filtering::Filter; /// Lastly, this function requires a matcher that is used to calculate the matching between source /// and target variant. See `mpatch::matching` for more information. /// -// TODO: It would be great to track differences during file removal as rejects -// TODO: Improve interface of this function (e.g., make it smaller or at least more versatile) pub fn apply_all( patch_paths: PatchPaths, strip: usize, diff --git a/mpatch/src/patch/application.rs b/mpatch/src/patch/application.rs index 52b3283a..c8f10384 100644 --- a/mpatch/src/patch/application.rs +++ b/mpatch/src/patch/application.rs @@ -73,13 +73,16 @@ fn apply_file_modification(patch: AlignedPatch, dryrun: bool) -> Result c.line_number <= target_line_number, - // Removes are anchored to actual line being removed (i.e. the line being currently - // processed which has line number 'target_line_number' - LineChangeType::Remove => c.line_number == target_line_number, - }) { + while changes.peek().map_or_else( + || false, + |c| match c.change_type { + // Adds are anchored to the context line above (i.e., lower than target_line_number) + LineChangeType::Add => c.line_number <= target_line_number, + // Removes are anchored to actual line being removed (i.e. the line being currently + // processed which has line number 'target_line_number' + LineChangeType::Remove => c.line_number == target_line_number, + }, + ) { let change = changes.next().expect("there should be a change to extract"); match change.change_type { LineChangeType::Add => { diff --git a/mpatch/src/patch/matching.rs b/mpatch/src/patch/matching.rs index bc8166e9..a5de2866 100644 --- a/mpatch/src/patch/matching.rs +++ b/mpatch/src/patch/matching.rs @@ -1,7 +1,20 @@ +use std::ops::Deref; +use std::ops::DerefMut; + use similar::{Change, TextDiff}; use crate::io::FileArtifact; +enum SearchState { + Searching, + OutOfBounds(usize), + Found(usize), +} + +use SearchState::Found; +use SearchState::OutOfBounds; +use SearchState::Searching; + /// A trait for defining a common interface for matchers that match lines between two files. /// /// Matchers are used by mpatch to determine the alignment for a patch. This means that mpatch @@ -241,38 +254,99 @@ impl Matching { /// Returns None if there is no matched line at or above the given line number. Returns /// Some(usize) with the target line number if a match has been found. pub(crate) fn target_index_fuzzy(&self, line_number: usize) -> (MatchId, MatchOffset) { - let mut line_number = line_number; - // Search for the closest context line above the change; i.e., key and value must both be // Some(...) // We have to insert the change after the found target line, if we had to skip at least one // line - let mut insert_after = false; let mut match_offset = MatchOffset(0); - while line_number > 0 && self.target_index(line_number).flatten().is_none() { - line_number -= 1; - match_offset.0 += 1; - insert_after = true; - } - if line_number == 0 { - // Line numbers start at '1', so there is no valid target index for '0' - (None, match_offset) - } else { - let target_line = self.target_index(line_number); - if insert_after { - // The result must be Some(...) in all cases - (target_line.unwrap().map(|v| v + 1), match_offset) - } else { - (target_line.unwrap(), match_offset) + let source_len = self.source.len(); + + // Helper closure for checking on a potential match based on a source index + // If a match is found, the search loop can be stopped, so "true" is returned + let find_match = |source_id, insert_after| { + self.target_index(source_id) + .flatten() + .map_or(Searching, |l| { + if insert_after { + // If the insertion should happen after the found match, we increase the line + // number by one + Found(l + 1) + } else { + Found(l) + } + }) + }; + + // Helper closure that checks lines above the given line number for potential matches. + let try_above_match = |offset| { + // In bounds? + if offset >= line_number { + // If there not, we insert the start of the file as match + return OutOfBounds(1); } - } + // For lines above, insertions should happend after the matched line + find_match(line_number - offset, offset > 0) + }; + + // Helper closure that checks lines below the given line number for potential matches. + let try_below_match = |offset| { + // In bounds? + if line_number + offset > source_len { + // If not, we insert the end of the file as match + return OutOfBounds(source_len + 1); + } + // For lines below, insertions should happend before the matched line + find_match(line_number + offset, false) + }; + + // Increase the match offset until a match is found, either above or below + let matched_line = loop { + let above = try_above_match(*match_offset); + if let Found(l) = above { + break Some(l); + } + let below = try_below_match(*match_offset); + if let Found(l) = below { + break Some(l); + } + + // Is the search out of bounds on both ends? + if let (OutOfBounds(a), OutOfBounds(b)) = (above, below) { + // Return the start or end, depending on which is closer to the initial line number + if usize::abs_diff(a, line_number) <= usize::abs_diff(b, line_number) { + break Some(a); + } else { + break Some(b); + } + } + + // Increase the offset to continue the search + *match_offset += 1; + }; + + (matched_line, match_offset) } } // The match offset of a fuzzy match search. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] pub struct MatchOffset(pub usize); +impl Deref for MatchOffset { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for MatchOffset { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + /// A simple matcher using the `similar` crate which offers implementations of the LCS algorithm. pub struct LCSMatcher; diff --git a/mpatch/tests/diffs/anchor_below.diff b/mpatch/tests/diffs/anchor_below.diff new file mode 100644 index 00000000..e454b0d9 --- /dev/null +++ b/mpatch/tests/diffs/anchor_below.diff @@ -0,0 +1,12 @@ +diff --color -NaurZ version-0/anchor_below.c version-1/anchor_below.c +--- version-0/anchor_below.c 2025-08-07 13:46:09.367836988 +0200 ++++ version-1/anchor_below.c 2025-08-07 13:46:40.655711631 +0200 +@@ -4,6 +4,8 @@ + int main() { + int number; + unsigned long long result; ++ number = 3; ++ result = 0; + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); diff --git a/mpatch/tests/diffs/prepending.diff b/mpatch/tests/diffs/prepending.diff new file mode 100644 index 00000000..138b4f3e --- /dev/null +++ b/mpatch/tests/diffs/prepending.diff @@ -0,0 +1,10 @@ +diff --color -NaurZ version-0/prepending.c version-1/prepending.c +--- version-0/prepending.c 2025-08-07 13:36:05.599856189 +0200 ++++ version-1/prepending.c 2025-08-07 13:36:02.562534718 +0200 +@@ -1,3 +1,6 @@ ++#include ++// Function prototype declaration ++unsigned long long factorial(int n); + int main() { + int number; + unsigned long long result; diff --git a/mpatch/tests/edge_cases.rs b/mpatch/tests/edge_cases.rs index 75bb9ce5..a3a137ae 100644 --- a/mpatch/tests/edge_cases.rs +++ b/mpatch/tests/edge_cases.rs @@ -152,7 +152,7 @@ fn compare_actual_and_expected(path_actual: &str, path_expected: &str) -> Result .zip(actual.into_lines().into_iter()) .enumerate() { - assert_eq!(expected, actual, "lines {} differ", i) + assert_eq!(expected, actual, "lines {i} differ") } } else { assert!(actual.is_err()); @@ -186,5 +186,5 @@ fn crlf() { #[test] fn mixed() { const DIFF_FILE: &str = "tests/weird_edge_cases/diffs/mixed.diff"; - let diff = VersionDiff::read(DIFF_FILE).unwrap(); + VersionDiff::read(DIFF_FILE).unwrap(); } diff --git a/mpatch/tests/expected_patches/anchor_below.diff b/mpatch/tests/expected_patches/anchor_below.diff new file mode 100644 index 00000000..eef080d9 --- /dev/null +++ b/mpatch/tests/expected_patches/anchor_below.diff @@ -0,0 +1,6 @@ +diff --color -NaurZ version-0/anchor_below.c version-1/anchor_below.c +--- version-0/anchor_below.c 2025-08-07 13:46:09.367836988 +0200 ++++ version-1/anchor_below.c 2025-08-07 13:46:40.655711631 +0200 +@@ -8,6 +8,8 @@ ++ number = 3; ++ result = 0; diff --git a/mpatch/tests/expected_patches/prepending.diff b/mpatch/tests/expected_patches/prepending.diff new file mode 100644 index 00000000..bcf5547f --- /dev/null +++ b/mpatch/tests/expected_patches/prepending.diff @@ -0,0 +1,7 @@ +diff --color -NaurZ version-0/prepending.c version-1/prepending.c +--- version-0/prepending.c 2025-08-07 13:36:05.599856189 +0200 ++++ version-1/prepending.c 2025-08-07 13:36:02.562534718 +0200 +@@ -1,3 +1,6 @@ ++#include ++// Function prototype declaration ++unsigned long long factorial(int n); diff --git a/mpatch/tests/patch.rs b/mpatch/tests/patch.rs index 2f6ca2a8..18e468bc 100644 --- a/mpatch/tests/patch.rs +++ b/mpatch/tests/patch.rs @@ -1,18 +1,6 @@ pub mod test_utils; -use mpatch::{ - patch::{alignment::align_patch_to_target, AlignedPatch}, - FileArtifact, LCSMatcher, Matcher, -}; -use test_utils::{get_aligned_patch, read_patch, run_alignment_test, run_application_test}; - -// TODO: Test multi-alignment -// TODO: Test file creation -// TODO: Test file removal -// TODO: Test file renaming -// TODO: Test file permission change -// TODO: Test patch application to entire directory -// TODO: Test missing target files +use test_utils::{get_aligned_patch, run_alignment_test, run_application_test}; const INVARIANT_SOURCE: &str = "tests/samples/source_variant/version-0/invariant.c"; const INVARIANT_TARGET: &str = "tests/samples/target_variant/version-0/invariant.c"; @@ -51,6 +39,18 @@ const APPENDING_DIFF: &str = "tests/diffs/appending.diff"; const EXPECTED_APPENDING_PATCH: &str = "tests/expected_patches/appending.diff"; const EXPECTED_APPENDING_RESULT: &str = "tests/samples/target_variant/version-1/appending.c"; +const PREPENDING_SOURCE: &str = "tests/samples/source_variant/version-0/prepending.c"; +const PREPENDING_TARGET: &str = "tests/samples/target_variant/version-0/prepending.c"; +const PREPENDING_DIFF: &str = "tests/diffs/prepending.diff"; +const EXPECTED_PREPENDING_PATCH: &str = "tests/expected_patches/prepending.diff"; +const EXPECTED_PREPENDING_RESULT: &str = "tests/samples/target_variant/version-1/prepending.c"; + +const ANCHOR_BELOW_SOURCE: &str = "tests/samples/source_variant/version-0/anchor_below.c"; +const ANCHOR_BELOW_TARGET: &str = "tests/samples/target_variant/version-0/anchor_below.c"; +const ANCHOR_BELOW_DIFF: &str = "tests/diffs/anchor_below.diff"; +const EXPECTED_ANCHOR_BELOW_PATCH: &str = "tests/expected_patches/anchor_below.diff"; +const EXPECTED_ANCHOR_BELOW_RESULT: &str = "tests/samples/target_variant/version-1/anchor_below.c"; + #[test] fn invariant_alignment() { run_alignment_test( @@ -143,3 +143,36 @@ fn apply_appending() { let aligned_patch = get_aligned_patch(APPENDING_SOURCE, APPENDING_TARGET, APPENDING_DIFF); run_application_test(aligned_patch, EXPECTED_APPENDING_RESULT, 0); } + +#[test] +fn prepending_alignment() { + run_alignment_test( + PREPENDING_SOURCE, + PREPENDING_TARGET, + PREPENDING_DIFF, + EXPECTED_PREPENDING_PATCH, + ); +} + +#[test] +fn apply_prepending() { + let aligned_patch = get_aligned_patch(PREPENDING_SOURCE, PREPENDING_TARGET, PREPENDING_DIFF); + run_application_test(aligned_patch, EXPECTED_PREPENDING_RESULT, 0); +} + +#[test] +fn anchor_below_alignment() { + run_alignment_test( + ANCHOR_BELOW_SOURCE, + ANCHOR_BELOW_TARGET, + ANCHOR_BELOW_DIFF, + EXPECTED_ANCHOR_BELOW_PATCH, + ); +} + +#[test] +fn apply_anchor_below() { + let aligned_patch = + get_aligned_patch(ANCHOR_BELOW_SOURCE, ANCHOR_BELOW_TARGET, ANCHOR_BELOW_DIFF); + run_application_test(aligned_patch, EXPECTED_ANCHOR_BELOW_RESULT, 0); +} diff --git a/mpatch/tests/samples/source_variant/patch.diff b/mpatch/tests/samples/source_variant/patch.diff index edf200ce..4f325215 100644 --- a/mpatch/tests/samples/source_variant/patch.diff +++ b/mpatch/tests/samples/source_variant/patch.diff @@ -1,4 +1,4 @@ -diff -Naur version-0/additive.c version-1/additive.c +diff --color -NaurZ version-0/additive.c version-1/additive.c --- version-0/additive.c 2024-02-02 12:03:12.953265907 +0100 +++ version-1/additive.c 2024-02-02 12:03:12.953265907 +0100 @@ -3,7 +3,7 @@ @@ -22,7 +22,33 @@ diff -Naur version-0/additive.c version-1/additive.c } return 0; } -diff -Naur version-0/invariant.c version-1/invariant.c +diff --color -NaurZ version-0/anchor_below.c version-1/anchor_below.c +--- version-0/anchor_below.c 2025-08-07 13:46:09.367836988 +0200 ++++ version-1/anchor_below.c 2025-08-07 13:46:40.655711631 +0200 +@@ -4,6 +4,8 @@ + int main() { + int number; + unsigned long long result; ++ number = 3; ++ result = 0; + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); +diff --color -NaurZ version-0/appending.c version-1/appending.c +--- version-0/appending.c 2024-05-17 19:51:59.905792244 +0200 ++++ version-1/appending.c 2024-05-17 19:51:59.905792244 +0200 +@@ -19,3 +19,10 @@ + return 0; + } + // Function to calculate the factorial of a number ++unsigned long long factorial(int n) { ++ if (n == 0) { ++ return 1; // Base case: factorial of 0 is 1 ++ } else { ++ return n * factorial(n - 1); // Recursive case ++ } ++} +diff --color -NaurZ version-0/invariant.c version-1/invariant.c --- version-0/invariant.c 2024-02-01 14:14:20.690115847 +0100 +++ version-1/invariant.c 2024-02-01 14:14:20.690115847 +0100 @@ -3,7 +3,7 @@ @@ -46,7 +72,7 @@ diff -Naur version-0/invariant.c version-1/invariant.c } return 0; } -diff -Naur version-0/main.c version-1/main.c +diff --color -NaurZ version-0/main.c version-1/main.c --- version-0/main.c 2024-02-01 14:14:20.690115847 +0100 +++ version-1/main.c 2024-02-01 14:14:20.690115847 +0100 @@ -3,7 +3,7 @@ @@ -70,8 +96,8 @@ diff -Naur version-0/main.c version-1/main.c } return 0; } -diff -Naur version-0/mixed.c version-1/mixed.c ---- version-0/mixed.c 2024-02-02 12:03:12.953265907 +0100 +diff --color -NaurZ version-0/mixed.c version-1/mixed.c +--- version-0/mixed.c 2024-02-17 16:23:43.899856705 +0100 +++ version-1/mixed.c 2024-02-02 12:03:12.953265907 +0100 @@ -3,7 +3,7 @@ unsigned long long factorial(int n); @@ -94,7 +120,17 @@ diff -Naur version-0/mixed.c version-1/mixed.c } return 0; } -diff -Naur version-0/remove_non_existant.c version-1/remove_non_existant.c +diff --color -NaurZ version-0/prepending.c version-1/prepending.c +--- version-0/prepending.c 2025-08-07 13:36:05.599856189 +0200 ++++ version-1/prepending.c 2025-08-07 13:36:02.562534718 +0200 +@@ -1,3 +1,6 @@ ++#include ++// Function prototype declaration ++unsigned long long factorial(int n); + int main() { + int number; + unsigned long long result; +diff --color -NaurZ version-0/remove_non_existant.c version-1/remove_non_existant.c --- version-0/remove_non_existant.c 2024-02-02 15:12:32.535612751 +0100 +++ version-1/remove_non_existant.c 2024-02-02 15:12:57.222196547 +0100 @@ -1,8 +1,6 @@ @@ -106,8 +142,8 @@ diff -Naur version-0/remove_non_existant.c version-1/remove_non_existant.c printf("Enter a positive integer: "); scanf("%d", &number); // Check if the user has entered a negative integer -diff -Naur version-0/substractive.c version-1/substractive.c ---- version-0/substractive.c 2024-02-02 12:03:12.953265907 +0100 +diff --color -NaurZ version-0/substractive.c version-1/substractive.c +--- version-0/substractive.c 2024-02-17 16:30:39.766832111 +0100 +++ version-1/substractive.c 2024-02-02 12:03:12.953265907 +0100 @@ -3,7 +3,7 @@ unsigned long long factorial(int n); @@ -130,17 +166,3 @@ diff -Naur version-0/substractive.c version-1/substractive.c } return 0; } -diff -Naur version-0/appending.c version-1/appending.c ---- version-0/appending.c 2024-05-17 11:00:45.783231097 +0200 -+++ version-1/appending.c 2024-05-17 11:00:47.609897748 +0200 -@@ -19,3 +19,10 @@ - return 0; - } - // Function to calculate the factorial of a number -+unsigned long long factorial(int n) { -+ if (n == 0) { -+ return 1; // Base case: factorial of 0 is 1 -+ } else { -+ return n * factorial(n - 1); // Recursive case -+ } -+} diff --git a/mpatch/tests/samples/source_variant/version-0/anchor_below.c b/mpatch/tests/samples/source_variant/version-0/anchor_below.c new file mode 100644 index 00000000..1ceef6a5 --- /dev/null +++ b/mpatch/tests/samples/source_variant/version-0/anchor_below.c @@ -0,0 +1,25 @@ +#include +// Function prototype declaration +unsigned long long factorial(int n); +int main() { + int number; + unsigned long long result; + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/mpatch/tests/samples/source_variant/version-0/prepending.c b/mpatch/tests/samples/source_variant/version-0/prepending.c new file mode 100644 index 00000000..66e4ea86 --- /dev/null +++ b/mpatch/tests/samples/source_variant/version-0/prepending.c @@ -0,0 +1,25 @@ +int main() { + int number; + unsigned long long result; + // Ask the user for input + printf("Enter a positive integer: "); + scanf("%d", &number); + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/mpatch/tests/samples/source_variant/version-1/anchor_below.c b/mpatch/tests/samples/source_variant/version-1/anchor_below.c new file mode 100644 index 00000000..4e8c4fee --- /dev/null +++ b/mpatch/tests/samples/source_variant/version-1/anchor_below.c @@ -0,0 +1,27 @@ +#include +// Function prototype declaration +unsigned long long factorial(int n); +int main() { + int number; + unsigned long long result; + number = 3; + result = 0; + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/mpatch/tests/samples/source_variant/version-1/prepending.c b/mpatch/tests/samples/source_variant/version-1/prepending.c new file mode 100644 index 00000000..c1a8eaf2 --- /dev/null +++ b/mpatch/tests/samples/source_variant/version-1/prepending.c @@ -0,0 +1,28 @@ +#include +// Function prototype declaration +unsigned long long factorial(int n); +int main() { + int number; + unsigned long long result; + // Ask the user for input + printf("Enter a positive integer: "); + scanf("%d", &number); + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/mpatch/tests/samples/target_variant/version-0/anchor_below.c b/mpatch/tests/samples/target_variant/version-0/anchor_below.c new file mode 100644 index 00000000..6c7e4a35 --- /dev/null +++ b/mpatch/tests/samples/target_variant/version-0/anchor_below.c @@ -0,0 +1,26 @@ +#include +int main() { + int number; + unsigned long long result; + // Ask the user for input + printf("Enter a positive integer: "); + scanf("%d", &number); + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/mpatch/tests/samples/target_variant/version-0/prepending.c b/mpatch/tests/samples/target_variant/version-0/prepending.c new file mode 100644 index 00000000..66e4ea86 --- /dev/null +++ b/mpatch/tests/samples/target_variant/version-0/prepending.c @@ -0,0 +1,25 @@ +int main() { + int number; + unsigned long long result; + // Ask the user for input + printf("Enter a positive integer: "); + scanf("%d", &number); + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/mpatch/tests/samples/target_variant/version-1/anchor_below.c b/mpatch/tests/samples/target_variant/version-1/anchor_below.c new file mode 100644 index 00000000..f35b6748 --- /dev/null +++ b/mpatch/tests/samples/target_variant/version-1/anchor_below.c @@ -0,0 +1,28 @@ +#include +int main() { + int number; + unsigned long long result; + // Ask the user for input + printf("Enter a positive integer: "); + scanf("%d", &number); + number = 3; + result = 0; + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/mpatch/tests/samples/target_variant/version-1/prepending.c b/mpatch/tests/samples/target_variant/version-1/prepending.c new file mode 100644 index 00000000..c1a8eaf2 --- /dev/null +++ b/mpatch/tests/samples/target_variant/version-1/prepending.c @@ -0,0 +1,28 @@ +#include +// Function prototype declaration +unsigned long long factorial(int n); +int main() { + int number; + unsigned long long result; + // Ask the user for input + printf("Enter a positive integer: "); + scanf("%d", &number); + // Check if the user has entered a negative integer + if (number < 0) { + printf("Factorial of a negative number doesn't exist.\n"); + } else { + // Calculate factorial + result = factorial(number); + // Display the result + printf("Factorial of %d is %llu\n", number, result); + } + return 0; +} +// Function to calculate the factorial of a number +unsigned long long factorial(int n) { + if (n == 0) { + return 1; // Base case: factorial of 0 is 1 + } else { + return n * factorial(n - 1); // Recursive case + } +} diff --git a/results/results-without-outliers.png b/results/results-without-outliers.png deleted file mode 100644 index 067e7343..00000000 Binary files a/results/results-without-outliers.png and /dev/null differ diff --git a/src/main/java/org/variantsync/evaluation/util/diff/DiffParser.java b/src/main/java/org/variantsync/evaluation/util/diff/DiffParser.java index e233104d..56333305 100644 --- a/src/main/java/org/variantsync/evaluation/util/diff/DiffParser.java +++ b/src/main/java/org/variantsync/evaluation/util/diff/DiffParser.java @@ -81,30 +81,17 @@ public static OriginalDiff toOriginalDiff(final List lines) { fileDiffContent = new ArrayList<>(); } } - } else if (line.contains(fileDiffStart)) { - if (indexNext < lines.size()) { - final String nextLine = lines.get(indexNext); - if (nextLine.startsWith(fileDiffFollow)) { - final String additionalContent = line.substring(0, line.indexOf(fileDiffStart)); - // Create a FileDiff from the collected lines - if (fileDiffContent != null) { - fileDiffContent.add(additionalContent); - fileDiffs.add(parseFileDiff(fileDiffContent)); - } - // Reset the lines that should go into the next FileDiff - fileDiffContent = new ArrayList<>(); - fileDiffContent.add(line.substring(line.indexOf(fileDiffStart))); - continue; - } - } } if (fileDiffContent == null) { throw new IllegalArgumentException("The provided lines do not contain one of the expected fileDiffStart values"); } fileDiffContent.add(line); } + // Parse the content of the last file diff - fileDiffs.add(parseFileDiff(fileDiffContent)); + if (fileDiffContent != null) { + fileDiffs.add(parseFileDiff(fileDiffContent)); + } return new OriginalDiff(fileDiffs); } diff --git a/src/main/java/org/variantsync/evaluation/util/diff/components/FileDiff.java b/src/main/java/org/variantsync/evaluation/util/diff/components/FileDiff.java index 77f5b978..ee9d9968 100644 --- a/src/main/java/org/variantsync/evaluation/util/diff/components/FileDiff.java +++ b/src/main/java/org/variantsync/evaluation/util/diff/components/FileDiff.java @@ -1,5 +1,6 @@ package org.variantsync.evaluation.util.diff.components; +import org.jetbrains.annotations.NotNull; import org.variantsync.evaluation.util.diff.lines.Line; import org.variantsync.evaluation.patching.Change; @@ -29,7 +30,7 @@ public int changeCount() { } @Override - public String toString() { + public @NotNull String toString() { StringBuilder sb = new StringBuilder(); for (String line : toLines()) { sb.append(line); diff --git a/src/main/java/org/variantsync/evaluation/util/diff/lines/ChangedLine.java b/src/main/java/org/variantsync/evaluation/util/diff/lines/ChangedLine.java index 92e8f3b7..00a649cb 100644 --- a/src/main/java/org/variantsync/evaluation/util/diff/lines/ChangedLine.java +++ b/src/main/java/org/variantsync/evaluation/util/diff/lines/ChangedLine.java @@ -1,5 +1,7 @@ package org.variantsync.evaluation.util.diff.lines; +import org.jetbrains.annotations.NotNull; + import java.nio.file.Path; import java.util.Objects; @@ -12,7 +14,7 @@ public record ChangedLine(Path file, Line line) { @Override - public String toString() { + public @NotNull String toString() { return file + "\n" + line; } diff --git a/src/main/java/org/variantsync/evaluation/util/shell/GitConfigCommand.java b/src/main/java/org/variantsync/evaluation/util/shell/GitConfigCommand.java new file mode 100644 index 00000000..a1c1b6b2 --- /dev/null +++ b/src/main/java/org/variantsync/evaluation/util/shell/GitConfigCommand.java @@ -0,0 +1,34 @@ +package org.variantsync.evaluation.util.shell; + +import java.util.ArrayList; + +public class GitConfigCommand extends ShellCommand { + private static final String COMMAND = "git"; + private static final String SUB_COMMAND = "config"; + private final ArrayList args = new ArrayList<>(); + + private GitConfigCommand() { + } + + @Override + public String[] parts() { + final String[] parts = new String[args.size() + 2]; + + parts[0] = COMMAND; + parts[1] = SUB_COMMAND; + int index = 0; + for (; index < args.size(); index++) { + parts[index + 2] = args.get(index); + } + return parts; + } + + public static GitConfigCommand DisableGPGSignLocally() { + final GitConfigCommand command = new GitConfigCommand(); + command.args.add("--local"); + command.args.add("commit.gpgsign"); + command.args.add("false"); + return command; + } + +} diff --git a/src/main/java/org/variantsync/evaluation/util/shell/MPatchCommand.java b/src/main/java/org/variantsync/evaluation/util/shell/MPatchCommand.java index 42af8a85..7a18cd70 100644 --- a/src/main/java/org/variantsync/evaluation/util/shell/MPatchCommand.java +++ b/src/main/java/org/variantsync/evaluation/util/shell/MPatchCommand.java @@ -1,16 +1,22 @@ package org.variantsync.evaluation.util.shell; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; -import java.util.Arrays; /** * Represents a shell 'patch' command that can be executed using a ShellExecutor */ public class MPatchCommand extends ShellCommand { - private static final String COMMAND = "mpatch"; + private final String command; private final ArrayList args = new ArrayList<>(); + private MPatchCommand() { + String home = System.getProperty("user.home"); + Path binary = Paths.get(home, ".cargo", "bin", "mpatch"); + this.command = binary.toString(); + } + /** * A MPatchCommand configured as recommended in the documentation of 'patch' * @@ -74,20 +80,11 @@ public MPatchCommand maxMatchDistance(final int maxMatchDistance) { public String[] parts() { final String[] parts = new String[args.size() + 1]; - parts[0] = COMMAND; + parts[0] = command; int index = 0; for (; index < args.size(); index++) { parts[index + 1] = args.get(index); } - //for (var part : parts) { - // System.out.print(part + " "); - // } - // System.out.println(); return parts; } - - @Override - public String toString() { - return "mpatch: " + Arrays.toString(parts()); - } } \ No newline at end of file diff --git a/src/main/java/org/variantsync/evaluation/util/shell/ShellCommand.java b/src/main/java/org/variantsync/evaluation/util/shell/ShellCommand.java index b2b43675..a276259f 100644 --- a/src/main/java/org/variantsync/evaluation/util/shell/ShellCommand.java +++ b/src/main/java/org/variantsync/evaluation/util/shell/ShellCommand.java @@ -3,7 +3,6 @@ import org.variantsync.evaluation.error.ShellException; import org.variantsync.functjonal.Result; -import java.util.Arrays; import java.util.List; /** @@ -29,6 +28,11 @@ public Result, ShellException> interpretResult(final int resultCode @Override public String toString() { - return Arrays.toString(this.parts()); + StringBuilder sb = new StringBuilder(); + for (String part : parts()) { + sb.append(part); + sb.append(" "); + } + return sb.toString(); } -} \ No newline at end of file +} diff --git a/src/main/java/org/variantsync/evaluation/util/shell/ShellExecutor.java b/src/main/java/org/variantsync/evaluation/util/shell/ShellExecutor.java index 95af4897..7ef845e9 100644 --- a/src/main/java/org/variantsync/evaluation/util/shell/ShellExecutor.java +++ b/src/main/java/org/variantsync/evaluation/util/shell/ShellExecutor.java @@ -13,10 +13,9 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.function.Consumer; /** @@ -26,8 +25,8 @@ public class ShellExecutor { private final Consumer outputReader; private final Consumer errorReader; private final Path workDir; - private final ExecutorService outputCollection; - private final ExecutorService errorCollection; + private final long timeout; + private final TimeUnit timeoutUnit; /** * Initialize a new ShellExecutor @@ -40,7 +39,8 @@ public ShellExecutor(final Consumer outputReader, final Consumer } /** - * Initialize a new ShellExecutor that executes all commands in the given working directory + * Initialize a new ShellExecutor that executes all commands in the given + * working directory * * @param outputReader Consumer for shell's normal output * @param errorReader Consumer for shell's error output @@ -50,8 +50,34 @@ public ShellExecutor(final Consumer outputReader, final Consumer this.workDir = workDir; this.outputReader = outputReader; this.errorReader = errorReader; - outputCollection = Executors.newSingleThreadExecutor(); - errorCollection = Executors.newSingleThreadExecutor(); + this.timeout = 0; + this.timeoutUnit = null; + } + + /** + * Initialize a new ShellExecutor + * + * @param outputReader Consumer for shell's normal output + * @param errorReader Consumer for shell's error output + */ + public ShellExecutor(final Consumer outputReader, final Consumer errorReader, long timeout, TimeUnit timeoutUnit) { + this(outputReader, errorReader, null, timeout, timeoutUnit); + } + + /** + * Initialize a new ShellExecutor that executes all commands in the given + * working directory + * + * @param outputReader Consumer for shell's normal output + * @param errorReader Consumer for shell's error output + * @param workDir The working directory + */ + public ShellExecutor(final Consumer outputReader, final Consumer errorReader, final Path workDir, long timeout, TimeUnit timeoutUnit) { + this.workDir = workDir; + this.outputReader = outputReader; + this.errorReader = errorReader; + this.timeout = timeout; + this.timeoutUnit = timeoutUnit; } /** @@ -72,7 +98,7 @@ public Result, ShellException> execute(final ShellCommand command) */ public Result, ShellException> execute(final ShellCommand command, final Path executionDir) { if (System.getProperty("os.name").toLowerCase().startsWith("windows")) { - throw new SetupError("The synchronization study can only be executed under Linux!"); + throw new SetupError("The evaluation can only be executed under Linux!"); } final ProcessBuilder builder = new ProcessBuilder(); @@ -82,55 +108,56 @@ public Result, ShellException> execute(final ShellCommand command, Logger.debug("Executing '" + command + "' in directory " + builder.directory()); builder.command(command.parts()); - final Process process; - final Future outputFuture; - final Future errorFuture; + Process process; final List output = new ArrayList<>(); final Consumer shareOutput = s -> { output.add(s); outputReader.accept(s); }; + final int exitCode; try { process = builder.start(); - outputFuture = outputCollection.submit(collectOutput(process.getInputStream(), shareOutput)); - errorFuture = errorCollection.submit(collectOutput(process.getErrorStream(), errorReader)); } catch (final IOException e) { - Logger.error("Was not able to execute " + command, e); + Logger.warn("Was not able to execute " + command, e); e.printStackTrace(); return Result.Failure(new ShellException(e)); } - - final int exitCode; - try { + try(ExecutorService executor = Executors.newFixedThreadPool(2)) { + executor.submit(() -> collectOutput(process.inputReader(), shareOutput)); + executor.submit(() -> collectOutput(process.errorReader(), errorReader)); + if (timeout > 0 && timeoutUnit != null) { + boolean completed = process.waitFor(timeout, timeoutUnit); + if (!completed) { + Logger.debug("Command timed out after 60 seconds:"); + Logger.debug(command.toString()); + process.destroy(); + executor.shutdown(); + } + } exitCode = process.waitFor(); - outputFuture.get(); - errorFuture.get(); - } catch (final InterruptedException | ExecutionException e) { - Logger.error("Interrupted while waiting for process to end.", e); + } catch (final InterruptedException e) { + Logger.warn("Interrupted while waiting for process to end.", e); return Result.Failure(new ShellException(e)); + } finally { + if (process.isAlive()) { + // Make sure the process is killed in case of an error + process.destroy(); + } } + + Logger.debug("Command '" + command + "' returned with exit code " + exitCode); return command.interpretResult(exitCode, output); } -private Runnable collectOutput(final InputStream inputStream, final Consumer consumer) { - return () -> { - try (inputStream; final BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, Charsets.UTF_8))) { - StringBuilder output = new StringBuilder(); - int character; - while ((character = reader.read()) != -1) { - output.append((char) character); - } - if (output.isEmpty()) { - return; - } - String[] lines = output.toString().split("\n"); - for (String line : lines) { +private void collectOutput(final BufferedReader reader, final Consumer consumer) { + try (reader) { + String line; + while ((line = reader.readLine()) != null) { consumer.accept(line); } } catch (final IOException e) { - Logger.error("Exception thrown while reading stream of Shell command.", e); + Logger.debug("Could not read output stream of Shell command. Command probably reached the configured timeout of %s %s.".formatted(timeout, timeoutUnit), e); } - }; -} + } } diff --git a/src/main/kotlin/org/variantsync/evaluation/PatcherEvaluationMain.kt b/src/main/kotlin/org/variantsync/evaluation/PatcherEvaluationMain.kt index 7c896b1f..5c764ba4 100644 --- a/src/main/kotlin/org/variantsync/evaluation/PatcherEvaluationMain.kt +++ b/src/main/kotlin/org/variantsync/evaluation/PatcherEvaluationMain.kt @@ -1,173 +1,178 @@ package org.variantsync.evaluation -import org.apache.commons.io.FileUtils -import org.eclipse.jgit.api.Git -import org.tinylog.kotlin.Logger -import org.variantsync.evaluation.execution.* -import org.variantsync.evaluation.util.shell.CpCommand -import org.variantsync.evaluation.util.shell.RmCommand -import org.variantsync.evaluation.util.shell.ShellExecutor -import org.yaml.snakeyaml.LoaderOptions -import org.yaml.snakeyaml.Yaml import java.io.File import java.io.IOException import java.io.UncheckedIOException -import java.math.RoundingMode import java.nio.ByteBuffer -import java.nio.file.FileVisitResult import java.nio.file.Files import java.nio.file.Path -import java.nio.file.SimpleFileVisitor -import java.nio.file.attribute.BasicFileAttributes import java.security.SecureRandom -import java.text.DecimalFormat import java.util.* -import java.util.concurrent.BlockingQueue +import java.util.concurrent.Callable import java.util.concurrent.Executors -import java.util.concurrent.LinkedBlockingQueue +import java.util.concurrent.Future import java.util.concurrent.TimeUnit -import java.util.stream.Collectors import kotlin.collections.ArrayList import kotlin.collections.HashMap import kotlin.collections.HashSet -import kotlin.math.max -import kotlin.math.min import kotlin.system.exitProcess +import org.apache.commons.io.FileUtils +import org.tinylog.kotlin.Logger +import org.variantsync.evaluation.execution.* +import org.variantsync.evaluation.util.shell.RmCommand +import org.variantsync.evaluation.util.shell.ShellExecutor class CherryPickStudy( - val config: EvalConfig, - dataset: CherryDataset, - repetition: Int, - idProvider: IDProvider, - completedRuns: Set, -) { - // The study tasks that are to be executed in parallel - private val evalTasks: MutableList - private val numThreads: Int - private val availableOperations: BlockingQueue + val config: EvalConfig, + val dataset: CherryDataset, + val repetition: Int, + val idProvider: IDProvider, + val completedRuns: Set, +) : Callable { - /** - * Initialize the study from the given configuration - */ - init { - if (!Files.exists(config.EXPERIMENT_DIR_RESULTS())) { - Files.createDirectories(config.EXPERIMENT_DIR_RESULTS()) - } - val repoPath: Path = cloneGitHubRepo(config, dataset.repositoryId) - val t = min(config.EXPERIMENT_THREAD_COUNT(), dataset.cherryPicks.size / 100) - this.numThreads = max(1, t) - this.availableOperations = LinkedBlockingQueue(numThreads) - - Logger.info("Preparing working directories for $numThreads threads.") - for (i in 1..numThreads) { - // Add one operations instance for each thread; each instance defines its own working directory - val operations = EvalOperations(config.EXPERIMENT_DIR_MAIN(), repoPath) - // Clean old variant files - cleanVariantDirectories(operations) - // Copy the source and target variant to the respective variant directories - prepareVariantDirectories(operations, repoPath) - availableOperations.add(operations) - } - - this.evalTasks = ArrayList() + fun initializeEvalTasks(evalSetup: EvalOperations): MutableList { + val evalTasks: MutableList = ArrayList() for (cherryPick in dataset.cherryPicks) { val runID = idProvider.next() - val run = EvaluationRun(repetition, dataset.datasetName, cherryPick.cherryCommit, cherryPick.expectedResultCommit) + val run = + EvaluationRun( + repetition, + dataset.datasetName, + cherryPick.cherryCommit, + cherryPick.expectedResultCommit + ) if (completedRuns.contains(run)) { Logger.info("Skipped cherry pick of run $runID (already processed)") continue } evalTasks.add( - CherryPickEvalTask( - repetition, - config, - dataset.datasetName, - cherryPick, - availableOperations, - runID, - run, - ) + CherryPickEvalTask( + repetition, + config, + dataset.datasetName, + cherryPick, + evalSetup, + runID, + run, + ) ) } + return evalTasks } - /** - * Execute the study. - */ - fun run() { - val threadPool = Executors.newFixedThreadPool(numThreads) - Logger.info("Scheduling ${evalTasks.size} tasks...") + /** Execute the study. */ + override fun call(): CherryDataset { + var evalSetup: EvalOperations? = null + try { + if (!Files.exists(config.EXPERIMENT_DIR_RESULTS())) { + Files.createDirectories(config.EXPERIMENT_DIR_RESULTS()) + } + + val repoPath: Path = cloneGitHubRepo(config, dataset.repositoryId) + evalSetup = EvalOperations(config, repoPath) - val futures = evalTasks.stream() - .map { runnable: CherryPickEvalTask -> FutureAndEvalRun(threadPool.submit(runnable), runnable.evalRun) } - .collect(Collectors.toList()) + // Clean old variant files + cleanVariantDirectories(evalSetup) + // Copy the source and target variant to the respective variant directories + prepareVariantDirectories(evalSetup, repoPath) - Logger.info("Scheduled all tasks.") + val evalTasks = initializeEvalTasks(evalSetup) - val hadTimeout = waitForShutdown(threadPool, futures, config) + Logger.info( + "Beginning execution of ${evalTasks.size} evaluation tasks for " + + this.dataset.datasetName + ) - if (hadTimeout) { - Logger.info("Timeout detected. Marking task of ${evalTasks.first().evalRun.datasetName} as completed.") - for (evalTask in evalTasks) { - markEvalRun(evalTask.evalRun, config.EXPERIMENT_PROCESSED_FILE()) + var processed = 0uL + for (task in evalTasks) { + processed++ + if (processed % 25uL == 0uL) { + Logger.info( + String.format( + "Running task %s of %s.", + processed.toString(), + evalTasks.size.toString(), + ) + ) + } + executeTask(config, task) } - } - Logger.info("Running clean up.") - // Delete all workdirs - for (operations in this.availableOperations) { - try { - FileUtils.deleteDirectory(operations.workDir.toFile()) - } catch (e: Exception) { - Logger.debug(e) - if (Files.exists(operations.workDir)) { - Logger.debug("Trying to remove directory with 'rm -rf'") - if (ShellExecutor(Logger::warn, Logger::warn, operations.workDir) - .execute(RmCommand(operations.workDir).recursive().force()).isSuccess) { - Logger.debug("Success!") - } - } + Logger.info(String.format("Finished %s tasks.", evalTasks.size.toString())) + } catch (e: Exception) { + Logger.debug { "Was not able to evaluate patchers on " + dataset.datasetName } + Logger.debug(e) + } finally { + if (evalSetup != null) { + clean(evalSetup) } } - Logger.info("Cleaned all working directories.") + return dataset } } -private fun cloneGitHubRepo(config: EvalConfig, repoId: String): Path { - val repoUri = "https://github.com/$repoId.git" - val cloneDir = config.EXPERIMENT_DIR_REPOS().resolve(repoId.replace("/", "_")) +fun executeTask(config: EvalConfig, task: CherryPickEvalTask) { + try { + val taskOutcome = task.execute() + val runID = taskOutcome.runID - if (Files.exists(cloneDir)) { - return cloneDir + if (taskOutcome.result.isPresent) { + for (result in taskOutcome.result.get()) { + saveResult(result, runID) + } + } + } catch (e: Throwable) { + Logger.error("Failed to finish task!") + Logger.error(e) + e.printStackTrace() + } finally { + markEvalRun(task.evalRun, config.EXPERIMENT_PROCESSED_FILE()) + } +} + +fun clean(evalSetup: EvalOperations) { + Logger.debug("Running clean up.") + try { + FileUtils.deleteDirectory(evalSetup.workDir.toFile()) + } catch (e: Exception) { + Logger.debug(e) + if (Files.exists(evalSetup.workDir)) { + Logger.debug("Trying to remove directory with 'rm -rf'") + if (ShellExecutor(Logger::warn, Logger::warn, evalSetup.workDir) + .execute(RmCommand(evalSetup.workDir).recursive().force()) + .isSuccess + ) { + Logger.debug("Success!") + } + } } - - Logger.info("cloning $repoUri into $cloneDir") - Git.cloneRepository().setURI(repoUri).setDirectory(cloneDir.toFile()).call().close() - Logger.info("done") - return cloneDir } fun main(args: Array) { if (args.isEmpty()) { System.err.println( - "The first argument should provide the path to the configuration file that is to be used" + "The first argument should provide the path to the configuration file that is to be used" ) } val config = EvalConfig(File(args[0])) Logger.info("Starting experiment initialization.") - val datasetsPerLanguage: Map> = try { - loadPRDatasets(config) - } catch (e: IOException) { - Logger.error( - "Was not able to load the yaml datasets from '" - + config.EXPERIMENT_DATASETS() + "'" - ) - throw UncheckedIOException(e) - } + val datasetsPerLanguage: Map> = + try { + loadPRDatasets(config) + } catch (e: IOException) { + Logger.error( + "Was not able to load the yaml datasets from '" + + config.EXPERIMENT_DATASETS() + + "'" + ) + throw UncheckedIOException(e) + } - val seed: ByteArray = ByteBuffer.allocate(java.lang.Long.BYTES).putLong(config.EXPERIMENT_REPEATS_START() - + config.SEED()).array() + val seed: ByteArray = + ByteBuffer.allocate(java.lang.Long.BYTES) + .putLong(config.EXPERIMENT_REPEATS_START() + config.SEED()) + .array() val idProvider = IDProvider(config.EXPERIMENT_START_ID()) var id = 0uL val rand = SecureRandom(seed) @@ -175,7 +180,7 @@ fun main(args: Array) { cloneDatasets(allSamples, config) - val n = 5 + val n = config.EXPERIMENT_THREAD_COUNT() Logger.info("Processing $n repos in parallel") val threadPool = Executors.newFixedThreadPool(n) @@ -188,366 +193,51 @@ fun main(args: Array) { val completedRuns = completedRunsAll.getOrDefault(repetition, HashMap()) var completed = 0 Logger.info("Already considered ${completedRuns.size} repos.") - completedRuns.forEach { s -> completed += s.value.size} - Logger.info("Processed a total of $completed evaluation runs.\n") + completedRuns.forEach { s -> completed += s.value.size } + Logger.info("Already processed a total of $completed evaluation runs.\n") Thread.sleep(5000) - - Logger.info("Considering a total of $numCherryPicks cherry-picks for repetition $repetition") + Logger.info( + "Considering a total of $numCherryPicks cherry-picks for repetition $repetition" + ) + val futures: MutableList> = ArrayList() for (dataset in allSamples[repetitionIndex]) { - while (idProvider.next() < id) {} id += dataset.cherryPicks.size.toUInt() - - if (completedRuns.contains(dataset.datasetName) && completedRuns[dataset.datasetName]!!.size == dataset.cherryPicks.size) { + if (completedRuns.contains(dataset.datasetName) && + completedRuns[dataset.datasetName]!!.size == dataset.cherryPicks.size + ) { // Skip this dataset, it was already processed - Logger.info("Skipping evaluation of cherry picks from ${dataset.datasetName} (rep.: $repetition)") - printProgress(completed, numCherryPicks, repetition, 0uL) - continue - } - threadPool.submit { - val i = id - Logger.info("Preparing evaluation of cherry picks from ${dataset.datasetName}") - val study = CherryPickStudy(config, dataset, repetition, idProvider, completedRuns.getOrDefault(dataset.datasetName, HashSet())) - try { - study.run() - } catch (e: Exception) { - e.printStackTrace() - Logger.error(e) - } - completed += dataset.cherryPicks.size - printProgress(completed, numCherryPicks, repetition, i) - } - } - threadPool.awaitTermination(10, TimeUnit.DAYS) - } - threadPool.shutdown() - - exitProcess(0) -} - -private fun printProgress(completed: Int, numCherryPicks: Int, repetition: Int, i: ULong) { - val completionPercentage = 100 * (completed.toDouble() / numCherryPicks.toDouble()) - val df = DecimalFormat("#.##") - df.roundingMode = RoundingMode.DOWN - Logger.info( - "(Rep.: $repetition, ID: $i) Finished $completed of $numCherryPicks cherry picks (${ - df.format( - completionPercentage - ) - }%)\n" - ) -} - -private fun cloneDatasets( - allSamples: ArrayList>, - config: EvalConfig -) { - Logger.info("Looking for datasets that still should be cloned.") - val datasetsToClone = HashSet() - for (s in allSamples) { - for (dataset in allSamples[0]) { - datasetsToClone.add(dataset) - } - } - - Logger.info("There are ${datasetsToClone.size} to check.") - val threadPool = Executors.newFixedThreadPool(config.EXPERIMENT_THREAD_COUNT()) - for (dataset in datasetsToClone) { - threadPool.submit { - try { - cloneGitHubRepo(config, dataset.repositoryId) - } catch (e: Exception) { - Thread.sleep(60_000) - cloneGitHubRepo(config, dataset.repositoryId) - } - } - } - threadPool.shutdown() - if (!threadPool.awaitTermination(1, TimeUnit.DAYS)) { - Logger.error("Thread pool timeout.") - } - Logger.info("Cloned all datasets\n") -} - -private fun createOrLoadSamples( - config: EvalConfig, - datasetsPerLanguage: Map>, - rand: SecureRandom -): ArrayList> { - if (Files.exists(config.EXPERIMENT_SAMPLE_FILE())) { - Logger.info("Found existing sample file...loading it\n") - return loadSample(config.EXPERIMENT_SAMPLE_FILE()) - } - - val allSamples = ArrayList>() - for (i in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { - allSamples.add(ArrayList()) - } - val langs = ArrayList(datasetsPerLanguage.keys) - langs.sort() - for (language in langs) { - val datasets = datasetsPerLanguage[language]!! - val sample: List> = if (config.EXPERIMENT_ENABLE_SAMPLING()) { - Logger.info("Sampling for next language $language with ${datasets.size} usable repositories") - sampleCherries(config, datasets, rand) - } else { - Logger.info("Loading dataset for $language with ${datasets.size} usable repositories") - val temp = ArrayList>() - for (i in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { - temp.add(datasets) - } - temp - } - for (sampleList in sample.withIndex()) { - allSamples[sampleList.index].addAll(sampleList.value) - } - } - // Shuffle the datasets to consider repos in random order - for (repetition in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { - val repetitionIndex = repetition - config.EXPERIMENT_REPEATS_START() - allSamples[repetitionIndex].shuffle(rand) - } - Logger.info("Done.\n") - saveSample(config.EXPERIMENT_SAMPLE_FILE(), allSamples) - return allSamples -} - -fun sampleCherries(config: EvalConfig, datasets: List, rand: SecureRandom): List> { - val allCherryPicks = HashMap() - // Collect all cherry picks and associate them with the dataset from which they came - for (dataset in datasets) { - for (cherryPick in dataset.cherryPicks) { - val datasetCopy = CherryDataset(dataset.datasetName, dataset.repositoryId, dataset.language, ArrayList()) - allCherryPicks[cherryPick] = datasetCopy - } - } - - val sampleSize = determineSampleSize(config, allCherryPicks.keys.size) - Logger.info("Considering ${config.EXPERIMENT_REPEATS_COUNT()} representative samples of $sampleSize cherry picks " + - "for ${allCherryPicks.keys.size} cherry picks in total.") - - val sample: MutableList> = ArrayList() - val cherries: List = ArrayList(allCherryPicks.keys) - for (repetition in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { - val cherrySubset = cherries.shuffled(rand).subList(0, sampleSize) - val remainingDatasets = HashMap>() - for (cherry in cherrySubset) { - val cherryPickList = remainingDatasets.getOrPut(allCherryPicks[cherry]!!){ ArrayList()} - cherryPickList.add(cherry) - } - - val datasetSubset: MutableList = ArrayList() - for (dataset in remainingDatasets.keys) { - dataset.cherryPicks = remainingDatasets[dataset]!! - datasetSubset.add(dataset) - } - - val sampledCherries = countCherryPicks(datasetSubset) - Logger.info("Created sample of $sampledCherries cherry picks for repetition $repetition.") - if (sampledCherries != sampleSize) { - Logger.error("Mismatch of expected to actual sample size") - } - - sample.add(datasetSubset) - } - return sample -} - -fun countCherryPicks(datasets: List): Int { - var totalNumberOfCherryPicks = 0 - for (dataset in datasets) { - totalNumberOfCherryPicks += dataset.cherryPicks.size - } - return totalNumberOfCherryPicks -} - -class YamlFileVisitor : SimpleFileVisitor() { - val yamlFiles = mutableListOf() - - override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult { - if (file.toString().endsWith(".yaml")) { - yamlFiles.add(file) - } - return FileVisitResult.CONTINUE - } - - override fun visitFileFailed(file: Path?, exc: IOException?): FileVisitResult { - return FileVisitResult.CONTINUE - } -} - -fun loadPRDatasets(config: EvalConfig): Map> { - val datasetsPerLanguage = HashMap>() - for (yamlFile in getYamlFiles(config.EXPERIMENT_DATASETS())) { - val dataset = loadDataset(yamlFile, config.EXPERIMENT_CHERRY_TYPE()) - if (dataset.isPresent) { - val datasetSize = dataset.get().cherryPicks.size - if (datasetSize < config.EXPERIMENT_DATASET_MIN_SIZE() || datasetSize > config.EXPERIMENT_DATASET_MAX_SIZE()) { Logger.info( - ("Skipping %s with %s cherry picks because its size is outside the range (%d, %d) set in " + - "the configuration.").format( - dataset.get().datasetName, - datasetSize, - config.EXPERIMENT_DATASET_MIN_SIZE(), - config.EXPERIMENT_DATASET_MAX_SIZE(), - ) + "Skipping evaluation of cherry picks from ${dataset.datasetName} (rep.: $repetition): Already processed." ) + printProgress(completed, numCherryPicks, repetition) continue } - if (datasetSize == 0) { - continue + val study = + CherryPickStudy( + config, + dataset, + repetition, + idProvider, + completedRuns.getOrDefault(dataset.datasetName, HashSet()) + ) + val future: Future = threadPool.submit(study) + futures.add(future) + } + threadPool.shutdown() + for (future in futures) { + val dataset = future.get() + completed += dataset.cherryPicks.size + if (config.CLEAN_REPOSITORIES()) { + val cloneDir = + config.EXPERIMENT_DIR_REPOS() + .resolve(dataset.repositoryId.replace("/", "_")) + cloneDir.toFile().deleteRecursively() } - val list = datasetsPerLanguage.getOrPut(dataset.get().language) { ArrayList() } - list.add(dataset.get()) - } - } - return datasetsPerLanguage -} - -fun getYamlFiles(directoryPath: Path): List { - val yamlFileVisitor = YamlFileVisitor() - - Files.walkFileTree(directoryPath, yamlFileVisitor) - - return yamlFileVisitor.yamlFiles -} - -enum class CherryType { - Trivial, - Complex, - Both, -} - -fun loadDataset(pathToYaml: Path, cherryType: CherryType): Optional { - val parseException = IllegalArgumentException("the yaml file under $pathToYaml cannot be parsed into a pr dataset") - - val loaderOptions = LoaderOptions() - loaderOptions.codePointLimit = Integer.MAX_VALUE - val yaml = Yaml(loaderOptions) - val entries = yaml.loadAll(Files.readString(pathToYaml)).iterator().next() - - if (entries !is List<*>) { - throw parseException - } - - val repoId = entries[0] - if (repoId !is HashMap<*, *>) { - throw parseException - } - val repoName = repoId["repo_name"] - if (repoName !is String) { - throw parseException - } - - val language = repoId["language"] - if (language !is String) { - throw parseException - } - - val cherryPicks = ArrayList() - val prEntries = entries[1] - if (prEntries !is List<*>) { - throw parseException - } - - var id = 0 - for (cp in prEntries) { - if (cp !is HashMap<*, *>) { - throw parseException - } - val isTrivial = cp["is_trivial"] as? Boolean ?: true - - if (cherryType == CherryType.Trivial && !isTrivial) { - continue - } else if (cherryType == CherryType.Complex && isTrivial) { - continue - } - - val cherryAndTarget = cp["cherry_and_target"] - if (cherryAndTarget !is HashMap<*, *>) { - throw parseException - } - - val cherry = cherryAndTarget["cherry"] - val target = cherryAndTarget["target"] - - if (cherry !is HashMap<*, *> || target !is HashMap<*, *>) { - throw parseException - } - - val cherryParents = cherry["parent_ids"] - val targetParents = target["parent_ids"] - if (cherryParents !is List<*> || targetParents !is List<*>) { - throw parseException + printProgress(completed, numCherryPicks, repetition) } - if (cherryParents.size != 1 || targetParents.size != 1) { - // We filter all cherry-pick scenarios with merges - continue - } - - - val cherryId = cherry["id"] - val cherryParentId = cherryParents[0] - // The target of a cherry-pick is what we consider the expected result - // The parent of this target is our actual target to which we want to propagate the changes - val targetId = targetParents[0] - val expectedResultId = target["id"] - - if (cherryParentId !is String || expectedResultId !is String || cherryId !is String || targetId !is String) { - return Optional.empty() - } - - cherryPicks.add(CherryPick(id, cherryId, cherryParentId, targetId, expectedResultId, isTrivial)) - id++ - } - - return Optional.of(CherryDataset(pathToYaml.fileName.toString(), repoName, language, cherryPicks)) -} - -private fun prepareVariantDirectories(operations: EvalOperations, gitHubRepoPath: Path) { - Logger.debug("Creating new source and target variant copies.") - operations.shell.execute(CpCommand(gitHubRepoPath, operations.sourceVariantV0).recursive()) - .expect("Was not able to copy source variant V0.") - operations.shell.execute(CpCommand(gitHubRepoPath, operations.sourceVariantV1).recursive()) - .expect("Was not able to copy source variant V1.") - operations.shell.execute(CpCommand(gitHubRepoPath, operations.targetVariantV0).recursive()) - .expect("Was not able to copy target variant V0.") - operations.shell.execute(CpCommand(gitHubRepoPath, operations.targetVariantV1).recursive()) - .expect("Was not able to copy target variant V1.") -} - -private fun cleanVariantDirectories(operations: EvalOperations) { - Logger.debug("Cleaning old variant files.") - if (Files.exists(operations.sourceVariantV0)) { - operations.shell.execute(RmCommand(operations.sourceVariantV0).recursive().force()) - .expect("Was not able to remove source variant V0.") - } - if (Files.exists(operations.sourceVariantV1)) { - operations.shell.execute(RmCommand(operations.sourceVariantV1).recursive().force()) - .expect("Was not able to remove source variant V1.") - } - if (Files.exists(operations.targetVariantV0)) { - operations.shell.execute(RmCommand(operations.targetVariantV0).recursive().force()) - .expect("Was not able to remove target variant V0.") - } - if (Files.exists(operations.targetVariantV1)) { - operations.shell.execute(RmCommand(operations.sourceVariantV1).recursive().force()) - .expect("Was not able to remove target variant V1.") - } -} - -private fun loadCompletedRuns(config: EvalConfig): HashMap>> { - if (!Files.exists(config.EXPERIMENT_DIR_RESULTS())) { - return HashMap() + threadPool.awaitTermination(10, TimeUnit.DAYS) } - val completedRuns = loadProcessedRuns(config) - val map = HashMap>>() - for (run in completedRuns) { - val datasetName = run.datasetName - val innerMap = map.getOrPut(run.repetition) { HashMap() } - val set = innerMap.getOrPut(datasetName) { HashSet() } - set.add(run) - } - return map + exitProcess(0) } diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/AnalysisMode.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/AnalysisMode.kt index b4624aa4..58509025 100644 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/AnalysisMode.kt +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/AnalysisMode.kt @@ -4,4 +4,4 @@ enum class AnalysisMode { Trivial, NonTrivial, All -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/CountingMap.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/CountingMap.kt index 9653cf4d..8f531b7e 100644 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/CountingMap.kt +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/CountingMap.kt @@ -53,4 +53,4 @@ class CountingMap() { fun elementCount(): UInt { return elementCount } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationResult.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationResult.kt index b7b2c954..63f4a372 100644 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationResult.kt +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationResult.kt @@ -1,112 +1,46 @@ package org.variantsync.evaluation.analysis class EvaluationResult( - val applied: Applied, val invalid: Invalid, val wrongLocation: WrongLocation, val missing: Missing, - val filteredCorrectly: FilteredCorrectly, val filteredIncorrectly: FilteredIncorrectly, - val mitigatedInvalid: MitigatedInvalid, val mitigatedMissing: MitigatedMissing, - val editDistance: EditDistance, + val applied: Applied, + val invalid: Invalid, + val wrongLocation: WrongLocation, + val missing: Missing, + val filteredCorrectly: FilteredCorrectly, + val filteredIncorrectly: FilteredIncorrectly, + val mitigatedInvalid: MitigatedInvalid, + val mitigatedMissing: MitigatedMissing, + val editDistance: EditDistance, ) { fun resultCount(): Long { - return applied.v + invalid.v + wrongLocation.v + missing.v + filteredCorrectly.v + filteredIncorrectly.v + mitigatedInvalid.v + mitigatedMissing.v - } - - fun incorrectCount(): Long { - return invalid.v + wrongLocation.v + missing.v + filteredIncorrectly.v - } - -} - -class AccumulatedResult( - var applied: Applied, var invalid: Invalid, - var wrongLocation: WrongLocation, var missing: Missing, - var filteredCorrectly: FilteredCorrectly, var filteredIncorrectly: FilteredIncorrectly, - var mitigatedInvalid: MitigatedInvalid, var mitigatedMissing: MitigatedMissing, - var editDistance: EditDistance, - private var fullyCorrectCommits: FullyCorrectCommits, - private var numResultsTotal: NumResultsTotal, -) { - - constructor() : this( - Applied(0), - Invalid(0), - WrongLocation(0), - Missing(0), - FilteredCorrectly(0), - FilteredIncorrectly(0), - MitigatedInvalid(0), - MitigatedMissing(0), - EditDistance(0u), - FullyCorrectCommits(0u), - NumResultsTotal(0u), - ) - - fun resultCount(): Long { - return applied.v + invalid.v + wrongLocation.v + missing.v + filteredCorrectly.v + filteredIncorrectly.v + mitigatedInvalid.v + mitigatedMissing.v - } - - fun correctCount(): Long { - return applied.v + filteredCorrectly.v + mitigatedInvalid.v + mitigatedMissing.v - } - - fun incorrectCount(): Long { - return invalid.v + wrongLocation.v + missing.v + filteredIncorrectly.v - } - - fun averageEditDistance(): Double { - return this.editDistance.v.toDouble() / this.numResultsTotal.v.toDouble() - } - - fun fullyCorrectPercentage(): Double { - return 100.0 * (this.fullyCorrectCommits.v.toDouble() / this.numResultsTotal.v.toDouble()) - } - - fun add(other: EvaluationResult) { - this.applied = Applied(this.applied.v + other.applied.v) - this.invalid = Invalid(this.invalid.v + other.invalid.v) - this.wrongLocation = WrongLocation(this.wrongLocation.v + other.wrongLocation.v) - this.missing = Missing(this.missing.v + other.missing.v) - this.filteredCorrectly = FilteredCorrectly(this.filteredCorrectly.v + other.filteredCorrectly.v) - this.filteredIncorrectly = FilteredIncorrectly(this.filteredIncorrectly.v + other.filteredIncorrectly.v) - this.mitigatedInvalid = MitigatedInvalid(this.mitigatedInvalid.v + other.mitigatedInvalid.v) - this.mitigatedMissing = MitigatedMissing(this.mitigatedMissing.v + other.mitigatedMissing.v) - this.editDistance = EditDistance(this.editDistance.v + other.editDistance.v) - this.numResultsTotal = NumResultsTotal(this.numResultsTotal.v + 1u) - - if (other.incorrectCount() == 0L) { - this.fullyCorrectCommits = FullyCorrectCommits(this.fullyCorrectCommits.v + 1u) - } + return applied.v + + invalid.v + + wrongLocation.v + + missing.v + + filteredCorrectly.v + + filteredIncorrectly.v + + mitigatedInvalid.v + + mitigatedMissing.v } } -@JvmInline -value class Applied(val v: Long) +@JvmInline value class Applied(val v: Long) -@JvmInline -value class Invalid(val v: Long) +@JvmInline value class Invalid(val v: Long) -@JvmInline -value class Missing(val v: Long) +@JvmInline value class Missing(val v: Long) -@JvmInline -value class FilteredCorrectly(val v: Long) +@JvmInline value class FilteredCorrectly(val v: Long) -@JvmInline -value class FilteredIncorrectly(val v: Long) +@JvmInline value class FilteredIncorrectly(val v: Long) -@JvmInline -value class WrongLocation(val v: Long) +@JvmInline value class WrongLocation(val v: Long) -@JvmInline -value class MitigatedInvalid(val v: Long) +@JvmInline value class MitigatedInvalid(val v: Long) -@JvmInline -value class MitigatedMissing(val v: Long) +@JvmInline value class MitigatedMissing(val v: Long) -@JvmInline -value class EditDistance(val v: UInt) +@JvmInline value class EditDistance(val v: UInt) -@JvmInline -value class FullyCorrectCommits(val v: UInt) +@JvmInline value class FullyCorrectCommits(val v: UInt) -@JvmInline -value class NumResultsTotal(val v: UInt) \ No newline at end of file +@JvmInline value class NumResultsTotal(val v: UInt) diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationScenario.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationScenario.kt index d644552c..8063c06b 100644 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationScenario.kt +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/EvaluationScenario.kt @@ -1,18 +1,18 @@ package org.variantsync.evaluation.analysis import org.variantsync.diffdetective.util.Assert -import org.variantsync.evaluation.util.diff.lines.ChangedLine import org.variantsync.evaluation.patching.Change +import org.variantsync.evaluation.util.diff.lines.ChangedLine class EvaluationScenario( - private val required: CountingMap, - private val undesired: CountingMap, - private val unPatchable: CountingMap + private val required: CountingMap, + private val undesired: CountingMap, + private val unPatchable: CountingMap ) { fun evaluate( - patch: CountingMap, - rejects: CountingMap, - observedDifference: CountingMap + patch: CountingMap, + rejects: CountingMap, + observedDifference: CountingMap ): EvaluationResult { var correct = 0L var invalid = 0L @@ -58,7 +58,7 @@ class EvaluationScenario( wrongLocation++ } else { // It is just missing - missing++; + missing++ } continue } @@ -79,7 +79,6 @@ class EvaluationScenario( // Did the undesired change fail to apply? if (rejects.contains(undesired.asRejectedChange())) { // If it did, it has effectively been filtered - // TODO: Count under a different name filteredCorrectly++ Assert.assertTrue(rejects.removeOne(undesired.asRejectedChange())) continue @@ -93,21 +92,20 @@ class EvaluationScenario( continue } - // The change has been applied, but did not cause an observable undesired effect + // The change has been applied but did not cause an observable undesired effect mitigatedInvalid++ } - return EvaluationResult( - Applied(correct), - Invalid(invalid), - WrongLocation(wrongLocation), - Missing(missing), - FilteredCorrectly(filteredCorrectly), - FilteredIncorrectly(filteredIncorrectly), - MitigatedInvalid(mitigatedInvalid), - MitigatedMissing(mitigatedMissing), - EditDistance(editDistance), + Applied(correct), + Invalid(invalid), + WrongLocation(wrongLocation), + Missing(missing), + FilteredCorrectly(filteredCorrectly), + FilteredIncorrectly(filteredIncorrectly), + MitigatedInvalid(mitigatedInvalid), + MitigatedMissing(mitigatedMissing), + EditDistance(editDistance), ) } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/ExperimentResult.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/ExperimentResult.kt deleted file mode 100644 index 0dda4219..00000000 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/ExperimentResult.kt +++ /dev/null @@ -1,8 +0,0 @@ -package org.variantsync.evaluation.analysis - -import java.nio.file.Path - -data class ExperimentResult( - val resultObj: Any, - val pathToResultFile: Path, -) diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/PatchOutcome.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/PatchOutcome.kt index 97e4c9f9..080f839d 100644 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/PatchOutcome.kt +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/PatchOutcome.kt @@ -2,40 +2,51 @@ package org.variantsync.evaluation.analysis import java.time.Duration -/** - * Represents the outcome of a single experimental run in the study. - */ +/** Represents the outcome of a single experimental run in the study. */ class PatchOutcome /** - * @param dataset The considered subject - * @param runID The id of this run - * @param cherry The id of the parent commit - * @param pick The id of the child commit - * @param normalActualVsExpected Number of differences between the patched target variant and the expected result (without filtering) - * @param lineNormal Number of unfiltered line-level patches - * @param lineSuccessNormal Number of successful line-level patches - */( - val dataset: String, - val runID: ULong, - val cherry: String, - val pick: String, - val normalActualVsExpected: Long, - val lineNormal: Long, - val lineSuccessNormal: Long, - val normalResult: EvaluationResult, - val patchDuration: Duration, - val patchIsTrivial: Boolean, + * @param dataset The considered subject + * @param runID The id of this run + * @param cherry The id of the parent commit + * @param pick The id of the child commit + * @param normalActualVsExpected Number of differences between the patched target variant and the + * expected result (without filtering) + * @param lineNormal Number of unfiltered line-level patches + * @param lineSuccessNormal Number of successful line-level patches + */ +( + val dataset: String, + val runID: ULong, + val cherry: String, + val pick: String, + val normalActualVsExpected: Long, + val lineNormal: Long, + val lineSuccessNormal: Long, + val normalResult: EvaluationResult, + val patchDuration: Duration, + val patchIsTrivial: Boolean, ) { override fun toString(): String { return "PatchOutcome{" + - "dataset='" + dataset + '\'' + - ", runID=" + runID + - ", cherry='" + cherry + '\'' + - ", pick='" + pick + '\'' + - ", normalActualVsExpected=" + normalActualVsExpected + - ", lineNormal=" + lineNormal + - ", lineSuccessNormal=" + lineSuccessNormal + - ", normalResult=" + normalResult + + "dataset='" + + dataset + + '\'' + + ", runID=" + + runID + + ", cherry='" + + cherry + + '\'' + + ", pick='" + + pick + + '\'' + + ", normalActualVsExpected=" + + normalActualVsExpected + + ", lineNormal=" + + lineNormal + + ", lineSuccessNormal=" + + lineSuccessNormal + + ", normalResult=" + + normalResult + '}' } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/ResultAnalysis.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/ResultAnalysis.kt index 670414f3..3fc7f71d 100644 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/ResultAnalysis.kt +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/ResultAnalysis.kt @@ -1,27 +1,29 @@ package org.variantsync.evaluation.analysis +import java.time.Duration import org.tinylog.kotlin.Logger import org.variantsync.diffdetective.util.Assert -import org.variantsync.evaluation.util.diff.components.OriginalDiff -import org.variantsync.evaluation.util.diff.lines.ChangedLine import org.variantsync.evaluation.execution.CherryPick import org.variantsync.evaluation.execution.Operations import org.variantsync.evaluation.patching.Change import org.variantsync.evaluation.patching.Rejects -import java.time.Duration +import org.variantsync.evaluation.util.diff.components.OriginalDiff +import org.variantsync.evaluation.util.diff.lines.ChangedLine object ResultAnalysis { private const val STRIP = 1 fun processCherriesOutcome( - workdir: Operations, - cherryPick: CherryPick, - dataset: String, runID: ULong, - normalPatch: OriginalDiff, - resultDiffNormal: OriginalDiff, - rejectsNormal: Rejects, evolutionChanges: OriginalDiff, - patchDuration: Duration, - patchIsTrivial: Boolean, + workdir: Operations, + cherryPick: CherryPick, + dataset: String, + runID: ULong, + normalPatch: OriginalDiff, + resultDiffNormal: OriginalDiff, + rejectsNormal: Rejects, + evolutionChanges: OriginalDiff, + patchDuration: Duration, + patchIsTrivial: Boolean, ): PatchOutcome { Logger.debug("Processing outcome of $runID for patch process in " + workdir.workDir()) // number of tried line-level patches @@ -29,36 +31,45 @@ object ResultAnalysis { // number of failed patches // Determine the number of failed line-level patches - val lineNormalFailed: MutableList = rejectsNormal.intoChangedLines().toMutableList() + val lineNormalFailed: MutableList = + rejectsNormal.intoChangedLines().toMutableList() Logger.debug( - "${lineNormalFailed.size} of ${lineNormal.size} normal line-sized patches failed" + "${lineNormalFailed.size} of ${lineNormal.size} normal line-sized patches failed" ) val scenario = initCherryScenario(normalPatch, evolutionChanges) - val normalResult: EvaluationResult = scenario.evaluate( - CountingMap(normalPatch.intoChanges(STRIP)), - CountingMap(rejectsNormal.intoChanges()), - CountingMap(OriginalDiff.determineChangedLines(resultDiffNormal, STRIP)) - ) + val normalResult: EvaluationResult = + scenario.evaluate( + CountingMap(normalPatch.intoChanges(STRIP)), + CountingMap(rejectsNormal.intoChanges()), + CountingMap(OriginalDiff.determineChangedLines(resultDiffNormal, STRIP)) + ) Assert.assertEquals(normalResult.resultCount(), lineNormal.size.toLong()) return PatchOutcome( - dataset, runID, cherryPick.cherryCommit, cherryPick.expectedResultCommit, OriginalDiff.determineChangedLines(resultDiffNormal, STRIP).size.toLong(), - lineNormal.size.toLong(), lineNormal.size.toLong() - lineNormalFailed.size.toLong(), - normalResult, - patchDuration, - patchIsTrivial, + dataset, + runID, + cherryPick.cherryCommit, + cherryPick.expectedResultCommit, + OriginalDiff.determineChangedLines(resultDiffNormal, STRIP).size.toLong(), + lineNormal.size.toLong(), + lineNormal.size.toLong() - lineNormalFailed.size.toLong(), + normalResult, + patchDuration, + patchIsTrivial, ) } private fun initCherryScenario( - patch: OriginalDiff, - targetEvolutionDiff: OriginalDiff + patch: OriginalDiff, + targetEvolutionDiff: OriginalDiff ): EvaluationScenario { Logger.debug("Calculating result table with TP, FP, TN, and FN.") val changesToClassify = CountingMap(patch.intoChanges(STRIP)) val changesInEvolution = - CountingMap(OriginalDiff.determineChangedLines(targetEvolutionDiff, STRIP)) + CountingMap( + OriginalDiff.determineChangedLines(targetEvolutionDiff, STRIP) + ) // Changes in the target variant's evolution that cannot be // synchronized, because they are not part of the source variant and therefore not of the @@ -67,7 +78,8 @@ object ResultAnalysis { // Expected changes, i.e., changes in the target variant's // evolution that can be synchronized run { - val tempChanges: CountingMap = CountingMap(OriginalDiff.determineChangedLines(patch, STRIP)) + val tempChanges: CountingMap = + CountingMap(OriginalDiff.determineChangedLines(patch, STRIP)) for (evolutionChange in changesInEvolution) { if (!tempChanges.contains(evolutionChange)) { unpatchableChanges.addOne(evolutionChange) @@ -95,27 +107,16 @@ object ResultAnalysis { } val derivedElementCount = requiredChanges.elementCount() + undesiredChanges.elementCount() Assert.assertEquals(changesToClassify.elementCount(), derivedElementCount) - return EvaluationScenario( - requiredChanges, - undesiredChanges, - unpatchableChanges - ) + return EvaluationScenario(requiredChanges, undesiredChanges, unpatchableChanges) } - data class AccumulatedOutcome( - val normalResult: AccumulatedResult, - val commitPatches: Long, - val commitSuccessNormal: Long, - val lineNormal: Long, - val lineSuccessNormal: Long, - ) - fun percentage(x: Long, y: Long): String { - val percentage: Double = if (y == 0L) { - 0.0 - } else { - 100 * (x.toDouble() / y.toDouble()) - } + val percentage: Double = + if (y == 0L) { + 0.0 + } else { + 100 * (x.toDouble() / y.toDouble()) + } return String.format("%3.1f%s", percentage, "%") } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/TaskOutcome.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/TaskOutcome.kt index 15ac95b8..f47397f4 100644 --- a/src/main/kotlin/org/variantsync/evaluation/analysis/TaskOutcome.kt +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/TaskOutcome.kt @@ -1,6 +1,10 @@ package org.variantsync.evaluation.analysis -import org.variantsync.evaluation.execution.EvaluationRun import java.util.* +import org.variantsync.evaluation.execution.EvaluationRun -data class TaskOutcome(val runID: ULong, val result: Optional>, val evalRun: EvaluationRun) +data class TaskOutcome( + val runID: ULong, + val result: Optional>, + val evalRun: EvaluationRun +) diff --git a/src/main/kotlin/org/variantsync/evaluation/analysis/TaskResult.kt b/src/main/kotlin/org/variantsync/evaluation/analysis/TaskResult.kt new file mode 100644 index 00000000..22dac37a --- /dev/null +++ b/src/main/kotlin/org/variantsync/evaluation/analysis/TaskResult.kt @@ -0,0 +1,8 @@ +package org.variantsync.evaluation.analysis + +import java.nio.file.Path + +data class TaskResult( + val resultObj: Any, + val pathToResultFile: Path, +) diff --git a/src/main/kotlin/org/variantsync/evaluation/error/Panic.kt b/src/main/kotlin/org/variantsync/evaluation/error/Panic.kt index 918c05ab..78afdf93 100644 --- a/src/main/kotlin/org/variantsync/evaluation/error/Panic.kt +++ b/src/main/kotlin/org/variantsync/evaluation/error/Panic.kt @@ -1,6 +1,7 @@ package org.variantsync.evaluation.error /** - * Custom Error class to represent critical errors due to requirements of the study not being fulfilled. + * Custom Error class to represent critical errors due to requirements of the study not being + * fulfilled. */ class Panic(message: String) : Error(message) diff --git a/src/main/kotlin/org/variantsync/evaluation/error/SetupError.kt b/src/main/kotlin/org/variantsync/evaluation/error/SetupError.kt index 37c2621d..519d30c7 100644 --- a/src/main/kotlin/org/variantsync/evaluation/error/SetupError.kt +++ b/src/main/kotlin/org/variantsync/evaluation/error/SetupError.kt @@ -1,6 +1,4 @@ package org.variantsync.evaluation.error -/** - * Custom Error class to represent critical errors in the study's setup. - */ -class SetupError(s: String) : Error(s) \ No newline at end of file +/** Custom Error class to represent critical errors in the study's setup. */ +class SetupError(s: String) : Error(s) diff --git a/src/main/kotlin/org/variantsync/evaluation/error/ShellException.kt b/src/main/kotlin/org/variantsync/evaluation/error/ShellException.kt index 40679e57..52cec816 100644 --- a/src/main/kotlin/org/variantsync/evaluation/error/ShellException.kt +++ b/src/main/kotlin/org/variantsync/evaluation/error/ShellException.kt @@ -2,12 +2,9 @@ package org.variantsync.evaluation.error import java.util.function.Consumer -/** - * Custom Exception for representing errors caused by shell commands being executed. - */ +/** Custom Exception for representing errors caused by shell commands being executed. */ class ShellException : Exception { - @JvmField - val output: List + @JvmField val output: List constructor(e: Exception) : super(e) { output = ArrayList() @@ -24,4 +21,4 @@ class ShellException : Exception { return sb.toString() } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/error/VariantGenerationException.kt b/src/main/kotlin/org/variantsync/evaluation/error/VariantGenerationException.kt deleted file mode 100644 index 3864c964..00000000 --- a/src/main/kotlin/org/variantsync/evaluation/error/VariantGenerationException.kt +++ /dev/null @@ -1,3 +0,0 @@ -package org.variantsync.evaluation.error - -class VariantGenerationException(e: Exception) : Exception(e) \ No newline at end of file diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/CalcDatasetSize.kt b/src/main/kotlin/org/variantsync/evaluation/execution/CalcDatasetSize.kt index 148704dd..bbf3ab82 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/CalcDatasetSize.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/CalcDatasetSize.kt @@ -1,27 +1,28 @@ package org.variantsync.evaluation.execution -import org.tinylog.kotlin.Logger -import org.variantsync.evaluation.loadPRDatasets import java.io.File import java.io.IOException import java.io.UncheckedIOException +import org.tinylog.kotlin.Logger fun main(args: Array) { if (args.isEmpty()) { System.err.println( - "The first argument should provide the path to the configuration file that is to be used" + "The first argument should provide the path to the configuration file that is to be used" ) } val config = EvalConfig(File(args[0])) - val datasets: Map> = try { - loadPRDatasets(config) - } catch (e: IOException) { - Logger.error( - "Was not able to load the yaml datasets from '" - + config.EXPERIMENT_DATASETS() + "'" - ) - throw UncheckedIOException(e) - } + val datasets: Map> = + try { + loadPRDatasets(config) + } catch (e: IOException) { + Logger.error( + "Was not able to load the yaml datasets from '" + + config.EXPERIMENT_DATASETS() + + "'" + ) + throw UncheckedIOException(e) + } var totalNumberOfCherryPicks = 0 for (datasetList in datasets.values) { @@ -31,4 +32,5 @@ fun main(args: Array) { } Logger.info("There are $totalNumberOfCherryPicks cherry picks to consider for all languages.") -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/CherryDataset.kt b/src/main/kotlin/org/variantsync/evaluation/execution/CherryDataset.kt index a0cb7e29..978be9ff 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/CherryDataset.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/CherryDataset.kt @@ -3,11 +3,11 @@ package org.variantsync.evaluation.execution import java.io.Serializable class CherryDataset( - val datasetName: String, - val repositoryId: String, - val language: String, - var cherryPicks: MutableList -): Serializable { + val datasetName: String, + val repositoryId: String, + val language: String, + var cherryPicks: MutableList +) : Serializable { override fun equals(other: Any?): Boolean { if (this === other) return true if (javaClass != other?.javaClass) return false @@ -27,4 +27,5 @@ class CherryDataset( result = 31 * result + language.hashCode() return result } -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/CherryPick.kt b/src/main/kotlin/org/variantsync/evaluation/execution/CherryPick.kt index 6eafa6c4..12ced5d9 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/CherryPick.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/CherryPick.kt @@ -2,9 +2,12 @@ package org.variantsync.evaluation.execution import java.io.Serializable - class CherryPick( - val id: Int, val cherryCommit: String, val cherryParentCommit: String, - val targetCommit: String, val expectedResultCommit: String, - val isTrivial: Boolean, -): Serializable \ No newline at end of file + val id: Int, + val cherryCommit: String, + val cherryParentCommit: String, + val targetCommit: String, + val expectedResultCommit: String, + val isTrivial: Boolean, +) : Serializable + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/CherryPickEvalTask.kt b/src/main/kotlin/org/variantsync/evaluation/execution/CherryPickEvalTask.kt index 7ea920d0..21c423d2 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/CherryPickEvalTask.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/CherryPickEvalTask.kt @@ -1,85 +1,55 @@ package org.variantsync.evaluation.execution +import java.io.IOException +import java.nio.file.Files +import java.nio.file.Path +import java.time.Duration +import java.time.Instant +import java.util.* +import kotlin.collections.ArrayList import org.prop4j.Node import org.tinylog.kotlin.Logger import org.variantsync.evaluation.analysis.ResultAnalysis -import org.variantsync.evaluation.analysis.ExperimentResult import org.variantsync.evaluation.analysis.TaskOutcome -import org.variantsync.evaluation.util.diff.DiffParser -import org.variantsync.evaluation.util.diff.components.OriginalDiff -import org.variantsync.evaluation.util.shell.CpCommand -import org.variantsync.evaluation.util.shell.DiffCommand -import org.variantsync.evaluation.util.shell.RmCommand +import org.variantsync.evaluation.analysis.TaskResult import org.variantsync.evaluation.error.Panic import org.variantsync.evaluation.patching.Patcher import org.variantsync.evaluation.patching.Rejects import org.variantsync.evaluation.patching.UTF8Exception +import org.variantsync.evaluation.util.diff.components.OriginalDiff +import org.variantsync.evaluation.util.shell.CpCommand +import org.variantsync.evaluation.util.shell.RmCommand import org.variantsync.vevos.simulation.feature.Variant import org.variantsync.vevos.simulation.feature.config.IConfiguration -import java.io.IOException -import java.nio.file.Files -import java.nio.file.Path -import java.time.Duration -import java.time.Instant -import java.util.* -import java.util.concurrent.BlockingQueue -import java.util.concurrent.Callable -import kotlin.collections.ArrayList - class CherryPickEvalTask( - private val repetition: Int, - private val config: EvalConfig, - private val datasetName: String, - private val cherryPick: CherryPick, - private val availableOperations: BlockingQueue, - private val runID: ULong, - val evalRun: EvaluationRun, -) : Callable { - - override fun call(): TaskOutcome { - val operations: EvalOperations - - synchronized(CherryPickEvalTask::class.java) { - // Retrieve the operations and the repo manager for this task - Logger.debug("Getting the next available operations (" + availableOperations.size + ")") - operations = availableOperations.take() - Logger.debug("There are now " + availableOperations.size + " operations available. Took $operations") - Logger.debug("Remaining after take: " + opsToString()) - } - - var experimentResult = Optional.empty>() + private val repetition: Int, + private val config: EvalConfig, + private val datasetName: String, + private val cherryPick: CherryPick, + private val evalSetup: EvalOperations, + private val runID: ULong, + val evalRun: EvaluationRun, +) { + + fun execute(): TaskOutcome { + var experimentResult = Optional.empty>() try { - experimentResult = Optional.of(callExecution(operations)) + experimentResult = Optional.of(callExecution()) } catch (e: Throwable) { Logger.error("Failed to finish task with runID $runID") Logger.error(e) e.printStackTrace() - } finally { - // Place the operations back in the queue to make them available to the next task - Logger.debug("Placing operation $operations back in queue (" + availableOperations.size + ")") - Logger.debug("Remaining before place back: " + opsToString()) - availableOperations.add(operations) - Logger.debug("There are now " + availableOperations.size + " operations available.") } - return TaskOutcome(runID, experimentResult, evalRun) } - private fun opsToString(): String { - val sb = StringBuilder() - for (op in availableOperations) { - sb.append(op) - sb.append(",") - } - return sb.toString() - } - - fun callExecution(operations: EvalOperations): List { + private fun callExecution(): List { try { - // repoManager.cleanRepoStates() - if (!operations.repoManager.prepareCherryPick(cherryPick)) { - Logger.info("Not all commits of the cherry pick could be found... skipping cherry pick ${cherryPick.id} of $datasetName") + if (!evalSetup.repoManager.prepareCherryPick(cherryPick)) { + Logger.info( + "Not all commits of the cherry pick could be found... skipping cherry pick ${cherryPick.id} of $datasetName" + ) return ArrayList() } } catch (e: Exception) { @@ -88,7 +58,7 @@ class CherryPickEvalTask( return ArrayList() } - if (config.EXPERIMENT_DEBUG() && operations.debugDir(cherryPick).toFile().mkdirs()) { + if (config.EXPERIMENT_DEBUG() && evalSetup.debugDir(cherryPick).toFile().mkdirs()) { Logger.debug("Created Debug directory.") } @@ -96,64 +66,61 @@ class CherryPickEvalTask( val source = Variant("source", AllTrueConfiguration()) val target = Variant("target", AllTrueConfiguration()) - if (Files.exists(operations.splitPatchFile)) { - Logger.debug("Cleaning old patch file " + operations.splitPatchFile) - operations.shell.execute(RmCommand(operations.splitPatchFile)) + if (Files.exists(evalSetup.splitPatchFile)) { + Logger.debug("Cleaning old patch file " + evalSetup.splitPatchFile) + evalSetup.shell.execute(RmCommand(evalSetup.splitPatchFile)) } // Apply diff to both versions of source variant Logger.debug("Diffing source...") - val originalPatch = getOriginalDiff( - operations, - operations.sourceVariantV0, - operations.sourceVariantV1 - ) + val originalPatch = + getOriginalDiff(evalSetup, evalSetup.sourceVariantV0, evalSetup.sourceVariantV1) if (originalPatch.isEmpty) { // There was no change to this variant, so we can skip it as source - Logger.info( - "Skipping cherry pick " + cherryPick.id + " because there are no changes to code. Diff of code files is empty." + Logger.debug( + "Skipping cherry pick " + + cherryPick.id + + " because there are no changes to text files. Diff of text files is empty." ) return ArrayList() } if (config.EXPERIMENT_DEBUG()) { - saveDiff( - originalPatch, - operations.debugDir(cherryPick).resolve("original.diff") - ) + saveDiff(originalPatch, evalSetup.debugDir(cherryPick).resolve("original.diff")) } - saveDiff(originalPatch, operations.patchFile) + saveDiff(originalPatch, evalSetup.patchFile) Logger.debug("Saved original diff.") - val results = ArrayList() + val results = ArrayList() try { Logger.debug("Starting patch application for cherry-pick " + cherryPick.id) var evolutionDiff = - getOriginalDiff(operations, operations.targetVariantV0, operations.targetVariantV1) + getOriginalDiff(evalSetup, evalSetup.targetVariantV0, evalSetup.targetVariantV1) - val patchIsTrivial = originalPatch.partiallyEquals(evolutionDiff, operations.STRIP) + val patchIsTrivial = originalPatch.partiallyEquals(evolutionDiff, evalSetup.STRIP) if (patchIsTrivial) { - // We only focus on variability, which is expressed by differences in the patch and evolution + // We only focus on variability, which is expressed by differences in the patch and + // evolution Logger.debug("Patch is trivial") } else { Logger.debug("Patch is not trivial") } - evolutionDiff = filterUnpatchedFiles(originalPatch, evolutionDiff, operations.STRIP) + evolutionDiff = filterUnpatchedFiles(originalPatch, evolutionDiff, evalSetup.STRIP) - for (patcher in operations.patchers) { + for (patcher in evalSetup.patchers) { /* Application of patches without knowledge about features */ Logger.debug("Applying patch from cherry-pick...") val start = Instant.now() var rejectsNormal: Rejects try { - rejectsNormal = patcher.applyPatch(operations, source, target, false) + rejectsNormal = patcher.applyPatch(evalSetup, source, target, false) } catch (e: UTF8Exception) { Logger.debug(e) - patcher.clean(operations) - operations.repoManager.resetTargetVariant() + patcher.clean(evalSetup) + evalSetup.repoManager.resetTargetVariant() return ArrayList() } catch (e: Exception) { Logger.debug(e) @@ -164,46 +131,58 @@ class CherryPickEvalTask( // Gather the patch result var actualVsExpectedNormal = - getActualVsExpected(operations, operations.targetVariantV1, target, cherryPick) - actualVsExpectedNormal = filterUnpatchedFiles(originalPatch, actualVsExpectedNormal, operations.STRIP) + getActualVsExpected( + evalSetup, + evalSetup.targetVariantV1, + target, + cherryPick + ) + actualVsExpectedNormal = + filterUnpatchedFiles(originalPatch, actualVsExpectedNormal, evalSetup.STRIP) if (config.EXPERIMENT_DEBUG()) { patchFilesDebug( - operations, - patcher, - originalPatch, - cherryPick, - source, - target, - rejectsNormal, - evolutionDiff + evalSetup, + patcher, + originalPatch, + cherryPick, + source, + target, + rejectsNormal, + evolutionDiff ) } /* Result Evaluation */ - val patchOutcome = ResultAnalysis.processCherriesOutcome( - operations, - cherryPick, - datasetName, - runID, - originalPatch, - actualVsExpectedNormal, - rejectsNormal, - evolutionDiff, - patchDuration, - patchIsTrivial, - ) - - val resultFile = config.EXPERIMENT_DIR_RESULTS().resolve("rep-${repetition}").resolve("${datasetName}_${patcher.name()}.results") - results.add(ExperimentResult(patchOutcome, resultFile)) + val patchOutcome = + ResultAnalysis.processCherriesOutcome( + evalSetup, + cherryPick, + datasetName, + runID, + originalPatch, + actualVsExpectedNormal, + rejectsNormal, + evolutionDiff, + patchDuration, + patchIsTrivial, + ) + + val resultFile = + config.EXPERIMENT_DIR_RESULTS() + .resolve("rep-${repetition}") + .resolve("${datasetName}_${patcher.name()}.results") + results.add(TaskResult(patchOutcome, resultFile)) Logger.debug( - "Finished patching for cherry " + cherryPick.cherryCommit + " and target " - + cherryPick.targetCommit + "Finished patching for cherry " + + cherryPick.cherryCommit + + " and target " + + cherryPick.targetCommit ) - patcher.clean(operations) - operations.repoManager.resetTargetVariant() + patcher.clean(evalSetup) + evalSetup.repoManager.resetTargetVariant() } } catch (e: Exception) { Logger.debug("Captured exception for cherry pick ${cherryPick.id}: ", e.message) @@ -211,24 +190,27 @@ class CherryPickEvalTask( return results } - /** * Get the difference between the target variant after patching and the target variant in the * next de.variantsync.studies.evolution step. Then, filter all differences that do not belong * to the source variant and could have therefore not been synchronized in any case. */ private fun getActualVsExpected( - operations: EvalOperations, - pathToExpectedResult: Path, - target: Variant, - currentPR: CherryPick + operations: EvalOperations, + pathToExpectedResult: Path, + target: Variant, + currentPR: CherryPick ): OriginalDiff { - val resultDiff = getOriginalDiff(operations, operations.patchDir(), pathToExpectedResult, true) + val resultDiff = + getOriginalDiff(operations, operations.patchDir(), pathToExpectedResult, true) if (config.EXPERIMENT_DEBUG() && !resultDiff.isEmpty) { try { saveDiff( - resultDiff, operations.debugDir(currentPR).resolve(target.name) - .resolve(target.name + "_actual_expected.diff") + resultDiff, + operations + .debugDir(currentPR) + .resolve(target.name) + .resolve(target.name + "_actual_expected.diff") ) } catch (e: IOException) { Logger.error("Was not able to save resultDiffOriginal:\n{}", e) @@ -243,7 +225,7 @@ class CherryPickEvalTask( try { Files.createDirectories(file.parent) Files.write(file, rejects.toLines()) - } catch (e: IOException) { + } catch (_: IOException) { panic("Was not able to save diff to file $file") } } @@ -254,75 +236,51 @@ class CherryPickEvalTask( try { Files.createDirectories(file.parent) Files.write(file, fineDiff.toLines()) - } catch (e: IOException) { + } catch (_: IOException) { panic("Was not able to save diff to file $file") } } - // Get the difference between two directories using UNIX diff - private fun getOriginalDiff( - operations: EvalOperations, - v0Path: Path, v1Path: Path - ): OriginalDiff { - return getOriginalDiff(operations, v0Path, v1Path, false) - } - - // Get the difference between two directories using UNIX diff - private fun getOriginalDiff( - operations: EvalOperations, - v0Path: Path, v1Path: Path, ignoreBlanks: Boolean - ): OriginalDiff { - val diffCommand: DiffCommand = DiffCommand.Recommended( - operations.workDir.relativize(v0Path), - operations.workDir.relativize(v1Path) - ).exclude(".*") - if (ignoreBlanks) { - diffCommand.ignoreBlankLines() - } - val output = operations.shell.execute(diffCommand, operations.workDir) - //.expect("Was not able to diff variants.") - return if (output.isSuccess) { - DiffParser.toOriginalDiff(output.success) - } else { - // Assume that the error lines still contain valid diffs, which is usually the case - DiffParser.toOriginalDiff(output.failure.output) - } - } - private fun patchFilesDebug( - operations: EvalOperations, - patcher: Patcher, - originalPatch: OriginalDiff, - currentPR: CherryPick, - source: Variant, - target: Variant, - rejectsNormal: Rejects, - evolutionDiff: OriginalDiff + operations: EvalOperations, + patcher: Patcher, + originalPatch: OriginalDiff, + currentPR: CherryPick, + source: Variant, + target: Variant, + rejectsNormal: Rejects, + evolutionDiff: OriginalDiff ) { - saveDiff( - originalPatch, - operations.debugDir(currentPR).resolve(source.name + ".diff") - ) + saveDiff(originalPatch, operations.debugDir(currentPR).resolve(source.name + ".diff")) saveRejects( - rejectsNormal, - operations.debugDir(currentPR).resolve(target.name) - .resolve(target.name + "_rejects_normal_${patcher.name()}.diff") + rejectsNormal, + operations + .debugDir(currentPR) + .resolve(target.name) + .resolve(target.name + "_rejects_normal_${patcher.name()}.diff") ) operations.debugDir(currentPR).resolve(target.name).toFile().mkdirs() saveDiff( - evolutionDiff, - operations.debugDir(currentPR).resolve(target.name) - .resolve(target.name + "_evolution.diff") - ) - operations.shell.execute( - CpCommand( - operations.patchDir(), - operations.debugDir(currentPR).resolve(target.name).resolve("patched_filtered") - ).recursive() + evolutionDiff, + operations + .debugDir(currentPR) + .resolve(target.name) + .resolve(target.name + "_evolution.diff") ) - .expect("Was not able to copy variant $target.name") + operations + .shell + .execute( + CpCommand( + operations.patchDir(), + operations + .debugDir(currentPR) + .resolve(target.name) + .resolve("patched_filtered") + ) + .recursive() + ) + .expect("Was not able to copy variant $target.name") } - } class AllTrueConfiguration : IConfiguration { @@ -344,4 +302,4 @@ fun panic(message: String, e: Exception) { fun panic(message: String) { Logger.error(message) throw Panic(message) -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/EvalConfig.kt b/src/main/kotlin/org/variantsync/evaluation/execution/EvalConfig.kt index a428afff..41cc278c 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/EvalConfig.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/EvalConfig.kt @@ -1,38 +1,33 @@ package org.variantsync.evaluation.execution +import java.io.File +import java.nio.file.Path +import java.util.concurrent.TimeUnit import org.apache.commons.configuration2.Configuration import org.apache.commons.configuration2.PropertiesConfiguration import org.apache.commons.configuration2.builder.FileBasedConfigurationBuilder import org.apache.commons.configuration2.builder.fluent.Parameters import org.apache.commons.configuration2.convert.DefaultListDelimiterHandler import org.apache.commons.configuration2.ex.ConfigurationException -import org.variantsync.evaluation.CherryType -import java.io.File -import java.nio.file.Path -/** - * Determines the configuration of our study. - */ +/** Determines the configuration of our study. */ class EvalConfig(propertiesFile: File) { // Configuration object holding key-value properties. private var config: Configuration? = null - /** - * Load a configuration from the given properties file. - * - */ + /** Load a configuration from the given properties file. */ init { val params = Parameters() try { - val builder = FileBasedConfigurationBuilder( - PropertiesConfiguration::class.java - ) - .configure( - params.properties().setFile(propertiesFile) - .setListDelimiterHandler( - DefaultListDelimiterHandler(',') - ) - ) + val builder = + FileBasedConfigurationBuilder(PropertiesConfiguration::class.java) + .configure( + params.properties() + .setFile(propertiesFile) + .setListDelimiterHandler( + DefaultListDelimiterHandler(',') + ) + ) config = builder.configuration } catch (e: ConfigurationException) { System.err.println("Was not able to load properties file $propertiesFile") @@ -40,48 +35,26 @@ class EvalConfig(propertiesFile: File) { } } - /** - * @return The start of repetitions for each commit pair and source-target combination - */ + /** @return The start of repetitions for each commit pair and source-target combination */ fun EXPERIMENT_REPEATS_START(): Int { return config!!.getInt(EXPERIMENT_REPEATS_START, 1) } - /** - * @return The end of repetitions for each commit pair and source-target combination - */ + /** @return The end of repetitions for each commit pair and source-target combination */ fun EXPERIMENT_REPEATS_END(): Int { return config!!.getInt(EXPERIMENT_REPEATS_END) } - /** - * @return The number of repetitions for each commit pair and source-target combination - */ + /** @return The number of repetitions for each commit pair and source-target combination */ fun EXPERIMENT_REPEATS_COUNT(): Int { return EXPERIMENT_REPEATS_END() - EXPERIMENT_REPEATS_START() + 1 } - /** - * @return The number of variants that are to be generated - */ - fun EXPERIMENT_VARIANT_COUNT(): Int { - return config!!.getInt(EXPERIMENT_VARIANT_COUNT) - } - - /** - * @return The working directory - */ + /** @return The working directory */ fun EXPERIMENT_DIR_MAIN(): Path { return Path.of(config!!.getString(EXPERIMENT_DIR_MAIN)) } - /** - * @return The root directory of the ground truth - */ - fun EXPERIMENT_DIR_GROUND_TRUTH(): Path { - return Path.of(config!!.getString(EXPERIMENT_DIR_GROUND_TRUTH)) - } - /** * @return The directory to which the repositories specified in the dataset file are cloned to */ @@ -89,16 +62,12 @@ class EvalConfig(propertiesFile: File) { return Path.of(config!!.getString(EXPERIMENT_DIR_REPOS)) } - /** - * @return The file with the list of datasets in Markdown format - */ + /** @return The file with the list of datasets in Markdown format */ fun EXPERIMENT_DATASETS(): Path { return Path.of(config!!.getString(EXPERIMENT_DATASETS)) } - /** - * @return Whether additional debugging is enabled - */ + /** @return Whether additional debugging is enabled */ fun EXPERIMENT_DEBUG(): Boolean { return config!!.getBoolean(EXPERIMENT_DEBUG) } @@ -112,16 +81,12 @@ class EvalConfig(propertiesFile: File) { return config!!.getLong(EXPERIMENT_STARTID, 0).toULong() } - /** - * @return The path to the file remembering processed runs - */ + /** @return The path to the file remembering processed runs */ fun EXPERIMENT_PROCESSED_FILE(): Path { return Path.of(config!!.getString(EXPERIMENT_PROCESSED_FILE)) } - /** - * @return The path to the results directory - */ + /** @return The path to the results directory */ fun EXPERIMENT_DIR_RESULTS(): Path { return Path.of(config!!.getString(EXPERIMENT_DIR_RESULTS)) } @@ -129,8 +94,8 @@ class EvalConfig(propertiesFile: File) { /** * * @return Minimum number of cherries in a repository for a dataset to be considered for the - * study. If a repository has fewer cherries, it is simply ignored. Values of 0 or less - * are automatically converted to 0. + * study. If a repository has fewer cherries, it is simply ignored. Values of 0 or less are + * automatically converted to 0. */ fun EXPERIMENT_DATASET_MIN_SIZE(): Int { var value = config!!.getInt(EXPERIMENT_DATASET_MIN_SIZE) @@ -143,8 +108,8 @@ class EvalConfig(propertiesFile: File) { /** * * @return Maximum number of cherries in a repository for a dataset to be considered for the - * study. If a repository has more cherries, it is simply ignored. Values of 0 or less - * are automatically converted to Integer.MAX_VALUE. + * study. If a repository has more cherries, it is simply ignored. Values of 0 or less are + * automatically converted to Integer.MAX_VALUE. */ fun EXPERIMENT_DATASET_MAX_SIZE(): Int { var value = config!!.getInt(EXPERIMENT_DATASET_MAX_SIZE) @@ -166,9 +131,7 @@ class EvalConfig(propertiesFile: File) { return count } - /** - * @return Whether sampling of commits that are processed is enabled - */ + /** @return Whether sampling of commits that are processed is enabled */ fun EXPERIMENT_ENABLE_SAMPLING(): Boolean { return config!!.getBoolean(EXPERIMENT_ENABLE_SAMPLING) } @@ -200,22 +163,63 @@ class EvalConfig(propertiesFile: File) { return config!!.getEnum(EXPERIMENT_CHERRY_TYPE, CherryType::class.java) } + fun EXPERIMENT_TIMEOUT_LENGTH(): Long { + return config!!.getLong(EXPERIMENT_TIMEOUT_LENGTH, 0) + } + + fun EXPERIMENT_TIMEOUT_UNIT(): TimeUnit { + return config!!.getEnum(EXPERIMENT_TIMEOUT_UNIT, TimeUnit::class.java, null) + } + + fun EXPERIMENT_PATCHER_GNU_PATCH(): Boolean { + return config!!.getBoolean(EXPERIMENT_PATCHER_GNU_PATCH) + } + + fun EXPERIMENT_PATCHER_GIT_APPLY(): Boolean { + return config!!.getBoolean(EXPERIMENT_PATCHER_GIT_APPLY) + } + + fun EXPERIMENT_PATCHER_GIT_CP(): Boolean { + return config!!.getBoolean(EXPERIMENT_PATCHER_GIT_CP) + } + + fun EXPERIMENT_PATCHER_MPATCH(): Boolean { + return config!!.getBoolean(EXPERIMENT_PATCHER_MPATCH) + } + + fun PRELOAD_REPOSITORIES(): Boolean { + return config!!.getBoolean(PRELOAD_REPOSITORIES) + } + + fun CLEAN_REPOSITORIES(): Boolean { + return config!!.getBoolean(CLEAN_REPOSITORIES) + } + companion object { + private const val PRELOAD_REPOSITORIES = "preload-repositories" + private const val CLEAN_REPOSITORIES = "clean-repositories" + + // Which patchers should be enabled? + private const val EXPERIMENT_PATCHER_GNU_PATCH = "experiment.patcher.gnu-patch" + private const val EXPERIMENT_PATCHER_GIT_APPLY = "experiment.patcher.git-apply" + private const val EXPERIMENT_PATCHER_GIT_CP = "experiment.patcher.git-cp" + private const val EXPERIMENT_PATCHER_MPATCH = "experiment.patcher.mpatch" + + // The number of EXPERIMENT_TIMEOUT_UNIT to wait for a patcher to finish patching (long) + private const val EXPERIMENT_TIMEOUT_LENGTH = "experiment.timeout.length" + + // The time unit for the timeout, e.g., SECONDS, MINUTES, ... + private const val EXPERIMENT_TIMEOUT_UNIT = "experiment.timeout.unit" + // The first id of repetitions for each commit and source target combination private const val EXPERIMENT_REPEATS_START = "experiment.repeats.start" // The last id of repetitions for each commit and source target combination private const val EXPERIMENT_REPEATS_END = "experiment.repeats.end" - // The number of generated variants - private const val EXPERIMENT_VARIANT_COUNT = "experiment.variant.count" - // The working directory private const val EXPERIMENT_DIR_MAIN = "experiment.dir.main" - // The directory containing the ground truth - private const val EXPERIMENT_DIR_GROUND_TRUTH = "experiment.dir.ground-truths" - // The directory to which the repositories are cloned to private const val EXPERIMENT_DIR_REPOS = "experiment.dir.repos" @@ -224,7 +228,8 @@ class EvalConfig(propertiesFile: File) { private const val EXPERIMENT_CHERRY_TYPE = "experiment.cherry-type" - // Enable saving of certain files (e.g., feature list, presence conditions, configurations) for + // Enable saving of certain files (e.g., feature list, presence conditions, configurations) + // for // additional debugging private const val EXPERIMENT_DEBUG = "experiment.debug" diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/EvalOperations.kt b/src/main/kotlin/org/variantsync/evaluation/execution/EvalOperations.kt index 9b55a20a..ad60fd2c 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/EvalOperations.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/EvalOperations.kt @@ -1,18 +1,17 @@ package org.variantsync.evaluation.execution -import org.tinylog.kotlin.Logger -import org.variantsync.evaluation.util.shell.AppliedPatchTracker -import org.variantsync.evaluation.util.shell.ShellExecutor -import org.variantsync.evaluation.patching.Patcher import java.io.IOException import java.io.UncheckedIOException import java.nio.file.Files import java.nio.file.Path +import org.tinylog.kotlin.Logger +import org.variantsync.evaluation.patching.Patcher +import org.variantsync.evaluation.util.shell.AppliedPatchTracker +import org.variantsync.evaluation.util.shell.ShellExecutor -class EvalOperations(mainDir: Path, gitHubRepoPath: Path) : Operations() { +class EvalOperations(config: EvalConfig, gitHubRepoPath: Path) : Operations() { // Working directory - @JvmField - var workDir: Path + @JvmField var workDir: Path // Debug directory private val debugBaseDir: Path @@ -46,7 +45,9 @@ class EvalOperations(mainDir: Path, gitHubRepoPath: Path) : Operations() { val STRIP = 1 init { + val mainDir = config.EXPERIMENT_DIR_MAIN() try { + var mainDir = mainDir.toAbsolutePath() if (mainDir.toFile().mkdirs()) { Logger.info("Created main directory $mainDir") } @@ -64,15 +65,17 @@ class EvalOperations(mainDir: Path, gitHubRepoPath: Path) : Operations() { splitPatchFile = workDir.resolve("patch-split.diff") rejectsFile = workDir.resolve("rejects-normal.txt") appliedPatchTracker = AppliedPatchTracker() - shell = - ShellExecutor( - appliedPatchTracker, - appliedPatchTracker, - workDir - ) - - patchers = defaultPatchers(STRIP) - repoManager = VariantRepoManager(sourceVariantV0, sourceVariantV1, targetVariantV0, targetVariantV1, gitHubRepoPath) + shell = ShellExecutor(appliedPatchTracker, appliedPatchTracker, workDir) + + patchers = initializePatchers(config, STRIP) + repoManager = + VariantRepoManager( + sourceVariantV0, + sourceVariantV1, + targetVariantV0, + targetVariantV1, + gitHubRepoPath + ) } fun debugDir(directory: String): Path { @@ -130,4 +133,4 @@ class EvalOperations(mainDir: Path, gitHubRepoPath: Path) : Operations() { fun strip(): Int { return STRIP } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/EvalUtils.kt b/src/main/kotlin/org/variantsync/evaluation/execution/EvalUtils.kt index e4ae191b..b5102c5e 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/EvalUtils.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/EvalUtils.kt @@ -1,14 +1,36 @@ package org.variantsync.evaluation.execution -import org.variantsync.evaluation.util.diff.components.FileDiff -import org.variantsync.evaluation.util.diff.components.OriginalDiff -import org.variantsync.evaluation.patching.* import java.io.IOException +import java.math.RoundingMode +import java.nio.file.FileVisitResult import java.nio.file.Files import java.nio.file.Path +import java.nio.file.SimpleFileVisitor +import java.nio.file.attribute.BasicFileAttributes +import java.security.SecureRandom +import java.text.DecimalFormat import java.util.* +import java.util.concurrent.Executors +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicInteger +import org.eclipse.jgit.api.Git +import org.tinylog.kotlin.Logger +import org.variantsync.evaluation.patching.* +import org.variantsync.evaluation.util.diff.DiffParser +import org.variantsync.evaluation.util.diff.components.FileDiff +import org.variantsync.evaluation.util.diff.components.OriginalDiff +import org.variantsync.evaluation.util.shell.CpCommand +import org.variantsync.evaluation.util.shell.DiffCommand +import org.variantsync.evaluation.util.shell.GitConfigCommand +import org.variantsync.evaluation.util.shell.RmCommand +import org.yaml.snakeyaml.LoaderOptions +import org.yaml.snakeyaml.Yaml -fun filterUnpatchedFiles(originalPatch: OriginalDiff, diffToFilter: OriginalDiff, strip: Int): OriginalDiff { +fun filterUnpatchedFiles( + originalPatch: OriginalDiff, + diffToFilter: OriginalDiff, + strip: Int +): OriginalDiff { val oldFiles = HashSet() val newFiles = HashSet() for (fd in originalPatch.fileDiffs) { @@ -27,13 +49,25 @@ fun filterUnpatchedFiles(originalPatch: OriginalDiff, diffToFilter: OriginalDiff return OriginalDiff(filteredDiffs) } -fun defaultPatchers(strip: Int): List { +fun initializePatchers(config: EvalConfig, strip: Int): List { val patchers = ArrayList() - patchers.add(GNUPatch("unix_patch", strip)) - // patchers.add(MPatch("pwm_f1", strip, 1)) - patchers.add(MPatch("pwm_f2", strip, 2)) - // patchers.add(GitApply("git_apply", strip)) - patchers.add(GitCP("git_cherry", strip, MergeStrategy.Ours)) + if (config.EXPERIMENT_PATCHER_GNU_PATCH()) { + patchers.add(GNUPatch(config, "unix_patch", strip)) + Logger.debug("Initialized GNU patch.") + } + if (config.EXPERIMENT_PATCHER_GIT_APPLY()) { + patchers.add(GitApply(config, "git_apply", strip)) + Logger.debug("Initialized git apply.") + } + if (config.EXPERIMENT_PATCHER_GIT_CP()) { + patchers.add(GitCP(config, "git_cherry", strip, MergeStrategy.Ours)) + Logger.debug("Initialized git cherry-pick.") + } + if (config.EXPERIMENT_PATCHER_MPATCH()) { + patchers.add(MPatch(config, "mpatch", strip, 2)) + Logger.debug("Initialized mpatch.") + } + Thread.sleep(3000) return patchers } @@ -51,4 +85,446 @@ fun readContentSafely(filePath: Path): List { } } +fun cloneGitHubRepo(config: EvalConfig, repoId: String): Path { + val repoUri = "https://github.com/$repoId.git" + val cloneDir = config.EXPERIMENT_DIR_REPOS().resolve(repoId.replace("/", "_")) + + if (Files.exists(cloneDir)) { + return cloneDir + } + + Logger.info("cloning $repoUri into $cloneDir") + Git.cloneRepository().setURI(repoUri).setDirectory(cloneDir.toFile()).call().close() + return cloneDir +} + +fun printProgress(completed: Int, numCherryPicks: Int, repetition: Int) { + val completionPercentage = 100 * (completed.toDouble() / numCherryPicks.toDouble()) + val df = DecimalFormat("#.##") + df.roundingMode = RoundingMode.DOWN + Logger.info( + "(Rep.: $repetition) Finished $completed of $numCherryPicks cherry picks (${ + df.format( + completionPercentage + ) + }%)\n" + ) +} + +fun cloneDatasets(allSamples: ArrayList>, config: EvalConfig) { + if (!config.PRELOAD_REPOSITORIES()) { + return + } + Logger.info("Looking for repositories that still should be cloned.") + val datasetsToClone = HashSet() + for (s in allSamples) { + for (dataset in s) { + if (dataset.cherryPicks.isNotEmpty()) { + datasetsToClone.add(dataset) + } + } + } + + Logger.info("There are ${datasetsToClone.size} to check.") + val threadPool = Executors.newFixedThreadPool(config.EXPERIMENT_THREAD_COUNT()) + val i = AtomicInteger(0) + for (dataset in datasetsToClone) { + threadPool.submit { + try { + cloneGitHubRepo(config, dataset.repositoryId) + } catch (_: Exception) { + // Retry in case of a server error + Thread.sleep(60_000) + Logger.info( + "Failed to clone repository" + + dataset.repositoryId + + ". This may happen due to server rate limiting. Retrying..." + ) + cloneGitHubRepo(config, dataset.repositoryId) + } finally { + synchronized(datasetsToClone) { + i.andIncrement + Logger.info { + "Cloned or found %,d of %,d repositories".format( + i.get(), + datasetsToClone.size + ) + } + } + } + } + } + threadPool.shutdown() + if (!threadPool.awaitTermination(1, TimeUnit.DAYS)) { + Logger.error("Thread pool timeout while cloning repositories.") + } + Logger.info("Cloned all repositories\n") +} + +fun createOrLoadSamples( + config: EvalConfig, + datasetsPerLanguage: Map>, + rand: SecureRandom +): ArrayList> { + if (Files.exists(config.EXPERIMENT_SAMPLE_FILE())) { + Logger.info("Found existing sample file...loading it\n") + return loadSample(config.EXPERIMENT_SAMPLE_FILE()) + } + + val allSamples = ArrayList>() + for (i in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { + allSamples.add(ArrayList()) + } + val langs = ArrayList(datasetsPerLanguage.keys) + langs.sort() + for (language in langs) { + val datasets = datasetsPerLanguage[language]!! + val sample: List> = + if (config.EXPERIMENT_ENABLE_SAMPLING()) { + Logger.info( + "Sampling for next language $language with ${datasets.size} usable repositories" + ) + sampleCherries(config, datasets, rand) + } else { + Logger.info( + "Loading dataset for $language with ${datasets.size} usable repositories" + ) + val temp = ArrayList>() + for (i in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { + temp.add(datasets) + } + temp + } + for (sampleList in sample.withIndex()) { + allSamples[sampleList.index].addAll(sampleList.value) + } + } + // Shuffle the datasets to consider repos in random order + for (repetition in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { + val repetitionIndex = repetition - config.EXPERIMENT_REPEATS_START() + allSamples[repetitionIndex].shuffle(rand) + } + Logger.info("Done.\n") + saveSample(config.EXPERIMENT_SAMPLE_FILE(), allSamples) + return allSamples +} + +fun sampleCherries( + config: EvalConfig, + datasets: List, + rand: SecureRandom +): List> { + val allCherryPicks = HashMap() + // Collect all cherry picks and associate them with the dataset from which they came + for (dataset in datasets) { + for (cherryPick in dataset.cherryPicks) { + val datasetCopy = + CherryDataset( + dataset.datasetName, + dataset.repositoryId, + dataset.language, + ArrayList() + ) + allCherryPicks[cherryPick] = datasetCopy + } + } + + val sampleSize = determineSampleSize(config, allCherryPicks.keys.size) + Logger.info( + "Considering ${config.EXPERIMENT_REPEATS_COUNT()} representative samples of $sampleSize cherry picks " + + "for ${allCherryPicks.keys.size} cherry picks in total." + ) + val sample: MutableList> = ArrayList() + val cherries: List = ArrayList(allCherryPicks.keys) + for (repetition in config.EXPERIMENT_REPEATS_START()..config.EXPERIMENT_REPEATS_END()) { + val cherrySubset = cherries.shuffled(rand).subList(0, sampleSize) + val remainingDatasets = HashMap>() + for (cherry in cherrySubset) { + val cherryPickList = + remainingDatasets.getOrPut(allCherryPicks[cherry]!!) { ArrayList() } + cherryPickList.add(cherry) + } + + val datasetSubset: MutableList = ArrayList() + for (dataset in remainingDatasets.keys) { + dataset.cherryPicks = remainingDatasets[dataset]!! + datasetSubset.add(dataset) + } + + val sampledCherries = countCherryPicks(datasetSubset) + Logger.info("Created sample of $sampledCherries cherry picks for repetition $repetition.") + if (sampledCherries != sampleSize) { + Logger.error("Mismatch of expected to actual sample size") + } + + sample.add(datasetSubset) + } + return sample +} + +fun countCherryPicks(datasets: List): Int { + var totalNumberOfCherryPicks = 0 + for (dataset in datasets) { + totalNumberOfCherryPicks += dataset.cherryPicks.size + } + return totalNumberOfCherryPicks +} + +class YamlFileVisitor : SimpleFileVisitor() { + val yamlFiles = mutableListOf() + + override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult { + if (file.toString().endsWith(".yaml")) { + yamlFiles.add(file) + } + return FileVisitResult.CONTINUE + } + + override fun visitFileFailed(file: Path, exc: IOException): FileVisitResult { + return FileVisitResult.CONTINUE + } +} + +fun loadPRDatasets(config: EvalConfig): Map> { + val datasetsPerLanguage = HashMap>() + for (yamlFile in getYamlFiles(config.EXPERIMENT_DATASETS())) { + val dataset = loadDataset(yamlFile, config.EXPERIMENT_CHERRY_TYPE()) + if (dataset.isPresent) { + val datasetSize = dataset.get().cherryPicks.size + if (datasetSize < config.EXPERIMENT_DATASET_MIN_SIZE() || + datasetSize > config.EXPERIMENT_DATASET_MAX_SIZE() + ) { + Logger.info( + ("Skipping %s with %s cherry picks because its size is outside the range (%d, %d) set in " + + "the configuration.") + .format( + dataset.get().datasetName, + datasetSize, + config.EXPERIMENT_DATASET_MIN_SIZE(), + config.EXPERIMENT_DATASET_MAX_SIZE(), + ) + ) + continue + } + if (datasetSize == 0) { + continue + } + val list = datasetsPerLanguage.getOrPut(dataset.get().language) { ArrayList() } + list.add(dataset.get()) + } + } + return datasetsPerLanguage +} + +fun getYamlFiles(directoryPath: Path): List { + val yamlFileVisitor = YamlFileVisitor() + + Files.walkFileTree(directoryPath, yamlFileVisitor) + + return yamlFileVisitor.yamlFiles +} + +enum class CherryType { + Trivial, + Complex, + Both, +} + +fun loadDataset(pathToYaml: Path, cherryType: CherryType): Optional { + val parseException = + IllegalArgumentException( + "the yaml file under $pathToYaml cannot be parsed into a pr dataset" + ) + + val loaderOptions = LoaderOptions() + loaderOptions.codePointLimit = Integer.MAX_VALUE + val yaml = Yaml(loaderOptions) + val entries = yaml.loadAll(Files.readString(pathToYaml)).iterator().next() + + if (entries !is List<*>) { + throw parseException + } + + val repoId = entries[0] + if (repoId !is HashMap<*, *>) { + throw parseException + } + val repoName = repoId["repo_name"] + if (repoName !is String) { + throw parseException + } + + val language = repoId["language"] + if (language !is String) { + throw parseException + } + + val cherryPicks = ArrayList() + val prEntries = entries[1] + if (prEntries !is List<*>) { + throw parseException + } + + var id = 0 + for (cp in prEntries) { + if (cp !is HashMap<*, *>) { + throw parseException + } + val isTrivial = cp["is_trivial"] as? Boolean ?: true + + if (cherryType == CherryType.Trivial && !isTrivial) { + continue + } else if (cherryType == CherryType.Complex && isTrivial) { + continue + } + + val cherryAndTarget = cp["cherry_and_target"] + if (cherryAndTarget !is HashMap<*, *>) { + throw parseException + } + + val cherry = cherryAndTarget["cherry"] + val target = cherryAndTarget["target"] + + if (cherry !is HashMap<*, *> || target !is HashMap<*, *>) { + throw parseException + } + + val cherryParents = cherry["parent_ids"] + val targetParents = target["parent_ids"] + if (cherryParents !is List<*> || targetParents !is List<*>) { + throw parseException + } + if (cherryParents.size != 1 || targetParents.size != 1) { + // We filter all cherry-pick scenarios with merges + continue + } + + val cherryId = cherry["id"] + val cherryParentId = cherryParents[0] + // The target of a cherry-pick is what we consider the expected result + // The parent of this target is our actual target to which we want to propagate the changes + val targetId = targetParents[0] + val expectedResultId = target["id"] + + if (cherryParentId !is String || + expectedResultId !is String || + cherryId !is String || + targetId !is String + ) { + return Optional.empty() + } + + cherryPicks.add( + CherryPick(id, cherryId, cherryParentId, targetId, expectedResultId, isTrivial) + ) + id++ + } + + return Optional.of( + CherryDataset(pathToYaml.fileName.toString(), repoName, language, cherryPicks) + ) +} + +fun prepareVariantDirectories(operations: EvalOperations, gitHubRepoPath: Path) { + Logger.debug("Creating new source and target variant copies.") + operations + .shell + .execute(CpCommand(gitHubRepoPath, operations.sourceVariantV0).recursive()) + .expect("Was not able to copy source variant V0.") + operations + .shell + .execute(CpCommand(gitHubRepoPath, operations.sourceVariantV1).recursive()) + .expect("Was not able to copy source variant V1.") + operations + .shell + .execute(CpCommand(gitHubRepoPath, operations.targetVariantV0).recursive()) + .expect("Was not able to copy target variant V0.") + operations + .shell + .execute(CpCommand(gitHubRepoPath, operations.targetVariantV1).recursive()) + .expect("Was not able to copy target variant V1.") + // Disable GPG signing locally, in case it is enabled for a user + operations + .shell + .execute(GitConfigCommand.DisableGPGSignLocally(), operations.targetVariantV0) + .expect("Was not able to configure git") +} + +fun cleanVariantDirectories(operations: EvalOperations) { + Logger.debug("Cleaning old variant files.") + if (Files.exists(operations.sourceVariantV0)) { + operations + .shell + .execute(RmCommand(operations.sourceVariantV0).recursive().force()) + .expect("Was not able to remove source variant V0.") + } + if (Files.exists(operations.sourceVariantV1)) { + operations + .shell + .execute(RmCommand(operations.sourceVariantV1).recursive().force()) + .expect("Was not able to remove source variant V1.") + } + if (Files.exists(operations.targetVariantV0)) { + operations + .shell + .execute(RmCommand(operations.targetVariantV0).recursive().force()) + .expect("Was not able to remove target variant V0.") + } + if (Files.exists(operations.targetVariantV1)) { + operations + .shell + .execute(RmCommand(operations.sourceVariantV1).recursive().force()) + .expect("Was not able to remove target variant V1.") + } +} + +fun loadCompletedRuns( + config: EvalConfig +): HashMap>> { + if (!Files.exists(config.EXPERIMENT_DIR_RESULTS())) { + return HashMap() + } + val completedRuns = loadProcessedRuns(config) + + val map = HashMap>>() + for (run in completedRuns) { + val datasetName = run.datasetName + val innerMap = map.getOrPut(run.repetition) { HashMap() } + val set = innerMap.getOrPut(datasetName) { HashSet() } + set.add(run) + } + return map +} + +// Get the difference between two directories using UNIX diff +fun getOriginalDiff(operations: EvalOperations, v0Path: Path, v1Path: Path): OriginalDiff { + return getOriginalDiff(operations, v0Path, v1Path, false) +} + +// Get the difference between two directories using UNIX diff +fun getOriginalDiff( + operations: EvalOperations, + v0Path: Path, + v1Path: Path, + ignoreBlanks: Boolean +): OriginalDiff { + val diffCommand: DiffCommand = + DiffCommand.Recommended( + operations.workDir.relativize(v0Path), + operations.workDir.relativize(v1Path) + ) + .exclude(".*") + if (ignoreBlanks) { + diffCommand.ignoreBlankLines() + } + val output = operations.shell.execute(diffCommand, operations.workDir) + // .expect("Was not able to diff variants.") + return if (output.isSuccess) { + DiffParser.toOriginalDiff(output.success) + } else { + // Assume that the error lines still contain valid diffs, which is usually the case + DiffParser.toOriginalDiff(output.failure.output) + } +} diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/EvaluationRun.kt b/src/main/kotlin/org/variantsync/evaluation/execution/EvaluationRun.kt index 9b28d3be..44027840 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/EvaluationRun.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/EvaluationRun.kt @@ -1,6 +1,11 @@ package org.variantsync.evaluation.execution -class EvaluationRun(val repetition: Int, val datasetName: String, val cherry: String, val pick: String) { +class EvaluationRun( + val repetition: Int, + val datasetName: String, + val cherry: String, + val pick: String +) { override fun equals(other: Any?): Boolean { if (this === other) return true if (javaClass != other?.javaClass) return false @@ -22,4 +27,5 @@ class EvaluationRun(val repetition: Int, val datasetName: String, val cherry: St result = 31 * result + pick.hashCode() return result } -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/IDProvider.kt b/src/main/kotlin/org/variantsync/evaluation/execution/IDProvider.kt index 5a17c1d0..a47791e1 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/IDProvider.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/IDProvider.kt @@ -11,4 +11,5 @@ class IDProvider(val start: ULong) { fun start(): ULong { return start } -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/Operations.kt b/src/main/kotlin/org/variantsync/evaluation/execution/Operations.kt index e602b6c3..71759528 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/Operations.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/Operations.kt @@ -1,8 +1,8 @@ package org.variantsync.evaluation.execution +import java.nio.file.Path import org.variantsync.evaluation.util.shell.AppliedPatchTracker import org.variantsync.evaluation.util.shell.ShellExecutor -import java.nio.file.Path abstract class Operations { abstract fun rejectsFile(): Path @@ -16,4 +16,5 @@ abstract class Operations { abstract fun filteredPatchFile(): Path abstract fun patchFile(): Path abstract fun sourceV0Path(name: String): Path -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/VariantRepoManager.kt b/src/main/kotlin/org/variantsync/evaluation/execution/VariantRepoManager.kt index 253493c4..176f0af9 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/VariantRepoManager.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/VariantRepoManager.kt @@ -1,13 +1,15 @@ package org.variantsync.evaluation.execution +import java.nio.file.Path import org.tinylog.kotlin.Logger import org.variantsync.evaluation.util.shell.* -import java.nio.file.Path class VariantRepoManager( - sourceVariantV0: Path, sourceVariantV1: Path, - private val targetVariantV0: Path, targetVariantV1: Path, - private val githubRepoPath: Path + sourceVariantV0: Path, + sourceVariantV1: Path, + private val targetVariantV0: Path, + targetVariantV1: Path, + private val githubRepoPath: Path ) { var lastCherry: CherryPick? = null @@ -21,8 +23,8 @@ class VariantRepoManager( Logger.debug("Checking out commits of next cherry pick") try { val command = GitCheckoutCommand.Recommended(cherryPick.cherryParentCommit) - if (this.shellSourceV0.execute(command).isFailure()) { - Logger.info("Was not able to find source variant V0 (source before changes)") + if (this.shellSourceV0.execute(command).isFailure) { + Logger.debug("Was not able to find source variant V0 (source before changes)") return false } } catch (e: Exception) { @@ -32,7 +34,7 @@ class VariantRepoManager( try { val command = GitCheckoutCommand.Recommended(cherryPick.cherryCommit) if (this.shellSourceV1.execute(command).isFailure) { - Logger.info("Was not able to find source variant V1 (source after changes)") + Logger.debug("Was not able to find source variant V1 (source after changes)") return false } } catch (e: Exception) { @@ -42,10 +44,12 @@ class VariantRepoManager( try { resetTargetVariant() val command = GitCheckoutCommand.Recommended(cherryPick.targetCommit) - if (this.shellTargetV0.execute(command).isFailure) { - Logger.info("Was not able to find target variant V0 (target before change propagation)") - return false - } + if (this.shellTargetV0.execute(command).isFailure) { + Logger.info( + "Was not able to find target variant V0 (target before change propagation)" + ) + return false + } } catch (e: Exception) { Logger.error(e.message) return false @@ -54,8 +58,10 @@ class VariantRepoManager( try { val command = GitCheckoutCommand.Recommended(cherryPick.expectedResultCommit) if (this.shellTargetV1.execute(command).isFailure) { - Logger.info("Was not able to find source variant V1 (expected result of change propagation)") - return false + Logger.info( + "Was not able to find source variant V1 (expected result of change propagation)" + ) + return false } } catch (e: Exception) { Logger.error(e.message) @@ -86,4 +92,5 @@ class VariantRepoManager( Logger.error("Was not able to clean target.", e) } } -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/io.kt b/src/main/kotlin/org/variantsync/evaluation/execution/io.kt index 8ab911cf..5df4c861 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/io.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/io.kt @@ -2,38 +2,32 @@ package org.variantsync.evaluation.execution import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper -import org.variantsync.evaluation.analysis.PatchOutcome -import org.variantsync.evaluation.analysis.ExperimentResult import java.io.* import java.nio.file.Files import java.nio.file.Path import java.nio.file.StandardOpenOption import java.util.function.Consumer +import org.variantsync.evaluation.analysis.TaskResult fun saveResult( - result: ExperimentResult, - runID: ULong, + result: TaskResult, + runID: ULong, ) { try { writeAsJSON(result.resultObj, result.pathToResultFile, true) } catch (e: IOException) { - panic( - "Was not able to write filtered patch result file for run " - + runID, e - ) + panic("Was not able to write filtered patch result file for run " + runID, e) } } fun markEvalRun( - evalRun: EvaluationRun, - path: Path, + evalRun: EvaluationRun, + path: Path, ) { try { writeAsJSON(evalRun, path, true) } catch (e: IOException) { - panic( - "Was not able to mark eval run in file $path ", e - ) + panic("Was not able to mark eval run in file $path ", e) } } @@ -55,14 +49,18 @@ fun writeAsJSON(obj: Any, pathToFile: Path, append: Boolean) { try { Files.createDirectories(pathToFile.parent) Files.createFile(pathToFile) - } catch (e: java.nio.file.FileAlreadyExistsException) { + } catch (_: java.nio.file.FileAlreadyExistsException) { // Ignore if the file already exists } if (append) { Files.writeString(pathToFile, jsonBuilder.toString(), StandardOpenOption.APPEND) } else { - Files.writeString(pathToFile, jsonBuilder.toString(), StandardOpenOption.TRUNCATE_EXISTING) + Files.writeString( + pathToFile, + jsonBuilder.toString(), + StandardOpenOption.TRUNCATE_EXISTING + ) } } } @@ -75,37 +73,15 @@ fun saveSample(path: Path, sample: ArrayList>) { } fun loadSample(path: Path): ArrayList> { - ObjectInputStream(FileInputStream(path.toFile())).use { return it.readObject() as ArrayList> } -} - - -@Throws(IOException::class) -fun loadResultObjects(paths: HashMap>): HashMap> { - val outcomes = HashMap>() - for (rep in paths.keys) { - for (path in paths[rep]!!) { - Files.newBufferedReader(path).use { reader -> - val outcomeLines: MutableList = ArrayList() - var line = reader.readLine() - while (line != null) { - if (line.isEmpty()) { - val outcome = parseResult(outcomeLines) - outcomes.getOrPut(rep) { ArrayList() }.add(outcome) - outcomeLines.clear() - } else { - outcomeLines.add(line) - } - line = reader.readLine() - } - } - } + ObjectInputStream(FileInputStream(path.toFile())).use { it -> + val obj = it.readObject() + @Suppress("UNCHECKED_CAST") return obj as ArrayList> } - return outcomes } @Throws(IOException::class) fun loadProcessedRuns(config: EvalConfig): MutableList { - val runs = ArrayList(); + val runs = ArrayList() if (!Files.exists(config.EXPERIMENT_PROCESSED_FILE())) { return runs } @@ -132,39 +108,4 @@ private fun parseEvalRun(lines: List): EvaluationRun { val mapper = jacksonObjectMapper() mapper.registerModule(JavaTimeModule()) return mapper.readValue(sb.toString(), EvaluationRun::class.java) -} - -private fun parseResult(lines: List): PatchOutcome { - val sb = StringBuilder() - lines.forEach(Consumer { l: String? -> sb.append(l).append("\n") }) - val mapper = jacksonObjectMapper() - mapper.registerModule(JavaTimeModule()) - return mapper.readValue(sb.toString(), PatchOutcome::class.java) -} - -fun listResultFiles(resultsDir: Path) : HashMap> { - val runDirs = ArrayList() - Files.list(resultsDir).use { files -> - files.filter { f: Path -> - val fileName = f.getName(f.nameCount-1).toString() - fileName.startsWith("rep") - }.forEach { f: Path -> - runDirs.add(f) - } - } - val resultFiles = HashMap>() - - for (runDir in runDirs) { - Files.list(runDir).use { files -> - files.filter { f: Path -> - val fileName = f.fileName.toString() - fileName.endsWith(".results") - }.forEach { f: Path -> - val runId = f.getName(f.nameCount-1).toString().split("-")[1].toInt() - resultFiles.getOrPut(runId) { ArrayList() }.add(f) - } - } - } - - return resultFiles } \ No newline at end of file diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/sampling.kt b/src/main/kotlin/org/variantsync/evaluation/execution/sampling.kt index 86840b2d..3a2dbb1e 100644 --- a/src/main/kotlin/org/variantsync/evaluation/execution/sampling.kt +++ b/src/main/kotlin/org/variantsync/evaluation/execution/sampling.kt @@ -3,7 +3,8 @@ package org.variantsync.evaluation.execution // where: // (n) is the sample size, // (N) is the population size, -// (z) is the Z-score corresponding to your desired confidence level (for a 95% confidence level, (z = 1.96)), +// (z) is the Z-score corresponding to your desired confidence level (for a 95% confidence level, +// (z = 1.96)), // (p) is the sample proportion (in percent, such as 50% = 0.5), // (e) is the margin of error (in percent, such as 5% = 0.05). fun determineSampleSize(config: EvalConfig, populationSize: Int): Int { @@ -16,5 +17,6 @@ fun determineSampleSize(config: EvalConfig, populationSize: Int): Int { // \frac{ z^2 p(1-p) } {e^2} + N - 1 } val lowerFrac = 1 + (((z * z) * p * (1 - p)) / ((e * e) * populationSize)) - return (upperFrac / lowerFrac).toInt() + 1; -} \ No newline at end of file + return (upperFrac / lowerFrac).toInt() + 1 +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/execution/threading.kt b/src/main/kotlin/org/variantsync/evaluation/execution/threading.kt deleted file mode 100644 index 6edaebf6..00000000 --- a/src/main/kotlin/org/variantsync/evaluation/execution/threading.kt +++ /dev/null @@ -1,83 +0,0 @@ -package org.variantsync.evaluation.execution - -import org.tinylog.kotlin.Logger -import org.variantsync.evaluation.analysis.TaskOutcome -import java.util.concurrent.ExecutorService -import java.util.concurrent.Future -import java.util.concurrent.TimeUnit -import java.util.concurrent.TimeoutException - -class FutureAndEvalRun (val future: Future, val evaluationRun: EvaluationRun) - -fun waitForShutdown( - threadPool: ExecutorService, - futuresAndRuns: MutableList, - config: EvalConfig, -): Boolean { - val timeoutLength = 10L - val timeoutUnit = TimeUnit.MINUTES - val allowedTimeouts = 3 - var timouts = 0 - var processed = 0uL - var hasTimeout = false - threadPool.shutdown() - for (fAndE in futuresAndRuns) { - val future = fAndE.future - processed++ - val runID: ULong - val taskOutCome: TaskOutcome - try { - // TODO: Make timeout configurable - taskOutCome = future.get(timeoutLength, timeoutUnit) - runID = taskOutCome.runID - if (processed == 1uL || processed % 25uL == 0uL) { - Logger.info( - String.format( - "Running task %s of %s with ID %s.", - processed.toString(), - futuresAndRuns.size.toString(), - runID, - ) - ) - } - - if (taskOutCome.result.isPresent) { - for (result in taskOutCome.result.get()) { - saveResult(result, runID) - timouts=0 - } - } - markEvalRun(taskOutCome.evalRun, config.EXPERIMENT_PROCESSED_FILE()) - } catch (e: TimeoutException) { - Logger.warn("Timed out while running task. Skipping this task") - future.cancel(true) - markEvalRun(fAndE.evaluationRun, config.EXPERIMENT_PROCESSED_FILE()) - timouts++ - if (timouts > allowedTimeouts) { - // if there are too many timeouts for a repository in a row, we cancel the evaluation for this repository - Logger.warn("Stopping all tasks for subject repository - too many timeouts!") - hasTimeout = true - break - } - } catch (e: Throwable) { - Logger.error("Failed to finish task!") - Logger.error(e) - e.printStackTrace() - markEvalRun(fAndE.evaluationRun, config.EXPERIMENT_PROCESSED_FILE()) - } - } - Logger.info("Waiting for thread pool shutdown") - threadPool.shutdownNow() - if (!threadPool.awaitTermination(timeoutLength, timeoutUnit)) { - Logger.error("Thread pool timeout.") - hasTimeout = true - } - - Logger.info( - String.format( - "Finished %s tasks.", - futuresAndRuns.size.toString() - ) - ) - return hasTimeout -} \ No newline at end of file diff --git a/src/main/kotlin/org/variantsync/evaluation/patching/Change.kt b/src/main/kotlin/org/variantsync/evaluation/patching/Change.kt index 58b9ce69..f22ec309 100644 --- a/src/main/kotlin/org/variantsync/evaluation/patching/Change.kt +++ b/src/main/kotlin/org/variantsync/evaluation/patching/Change.kt @@ -1,12 +1,11 @@ package org.variantsync.evaluation.patching +import java.nio.file.Path import org.variantsync.evaluation.util.diff.components.Hunk import org.variantsync.evaluation.util.diff.lines.AddedLine import org.variantsync.evaluation.util.diff.lines.ChangedLine import org.variantsync.evaluation.util.diff.lines.Line import org.variantsync.evaluation.util.diff.lines.RemovedLine -import java.nio.file.Path - class Change(val lineChange: Line, val hunk: Hunk, val path: Path) { override fun equals(other: Any?): Boolean { @@ -34,11 +33,12 @@ class Change(val lineChange: Line, val hunk: Hunk, val path: Path) { fun inverse(): Change { val changedText: String = lineChange.line().substring(1) - val l = if (lineChange is AddedLine) { - RemovedLine("-$changedText") - } else { - AddedLine("+$changedText") - } + val l = + if (lineChange is AddedLine) { + RemovedLine("-$changedText") + } else { + AddedLine("+$changedText") + } return Change(l, hunk, path) } @@ -52,15 +52,16 @@ class Change(val lineChange: Line, val hunk: Hunk, val path: Path) { val sb = StringBuilder() sb.appendLine(this.path) sb.appendLine( - String.format( - "@@ -%d,%d +%d,%d @@", - this.hunk.rawLocation().startLineSource, - 1, - this.hunk.rawLocation().startLineTarget, - 1 - ) + String.format( + "@@ -%d,%d +%d,%d @@", + this.hunk.rawLocation().startLineSource, + 1, + this.hunk.rawLocation().startLineTarget, + 1 + ) ) sb.appendLine(this.lineChange) return super.toString() } -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/patching/GNUPatch.kt b/src/main/kotlin/org/variantsync/evaluation/patching/GNUPatch.kt index 71028828..bd66eb8c 100644 --- a/src/main/kotlin/org/variantsync/evaluation/patching/GNUPatch.kt +++ b/src/main/kotlin/org/variantsync/evaluation/patching/GNUPatch.kt @@ -1,37 +1,41 @@ package org.variantsync.evaluation.patching +import java.io.IOException +import java.nio.file.Files +import java.nio.file.Path +import java.util.function.Consumer import org.tinylog.kotlin.Logger +import org.variantsync.evaluation.error.ShellException +import org.variantsync.evaluation.execution.EvalConfig import org.variantsync.evaluation.execution.Operations +import org.variantsync.evaluation.execution.panic +import org.variantsync.evaluation.execution.readContentSafely import org.variantsync.evaluation.util.diff.DiffParser import org.variantsync.evaluation.util.diff.components.OriginalDiff import org.variantsync.evaluation.util.shell.PatchCommand -import org.variantsync.evaluation.execution.panic -import org.variantsync.evaluation.error.ShellException -import org.variantsync.evaluation.execution.readContentSafely +import org.variantsync.evaluation.util.shell.ShellExecutor import org.variantsync.vevos.simulation.feature.Variant -import java.io.IOException -import java.nio.file.Files -import java.nio.file.Path -import java.util.function.Consumer -class GNUPatch(private val name: String, private val strip: Int) : Patcher { +class GNUPatch(private val config: EvalConfig, private val name: String, private val strip: Int) : Patcher { override fun applyPatch( - operations: Operations, - sourceVariant: Variant, - targetVariant: Variant, - withFiler: Boolean, + operations: Operations, + sourceVariant: Variant, + targetVariant: Variant, + withFiler: Boolean, ): Rejects { - val rejectFile = if (withFiler) { - operations.rejectsFileFiltered() - } else { - operations.rejectsFile() - } + val rejectFile = + if (withFiler) { + operations.rejectsFileFiltered() + } else { + operations.rejectsFile() + } - val pathToPatchFile = if (withFiler) { - operations.filteredPatchFile() - } else { - operations.patchFile() - } + val pathToPatchFile = + if (withFiler) { + operations.filteredPatchFile() + } else { + operations.patchFile() + } val patch = DiffParser.toOriginalDiff(readContentSafely(pathToPatchFile)) @@ -41,12 +45,15 @@ class GNUPatch(private val name: String, private val strip: Int) : Patcher { } // apply patch to target variant - val patchCommand = PatchCommand.Recommended(pathToPatchFile).strip(strip) - .rejectFile(rejectFile).force().ignoreWhitespace() - val result = operations.shell().execute( - patchCommand, - operations.patchDir() - ) + val patchCommand = + PatchCommand.Recommended(pathToPatchFile) + .strip(strip) + .rejectFile(rejectFile) + .force() + .ignoreWhitespace() + + val customShell = ShellExecutor(Logger::debug, Logger::debug, operations.workDir(),config.EXPERIMENT_TIMEOUT_LENGTH(), config.EXPERIMENT_TIMEOUT_UNIT()) + val result = customShell.execute(patchCommand, operations.patchDir()) val rejects = Rejects(ArrayList()) if (result.isSuccess) { @@ -61,18 +68,25 @@ class GNUPatch(private val name: String, private val strip: Int) : Patcher { } private fun readRejectsFromOutput( - patchError: ShellException, - patch: OriginalDiff, + patchError: ShellException, + patch: OriginalDiff, ): Rejects { // Handle rejects val skippedFiles: MutableSet = HashSet() val lines = patchError.output - Logger.debug("Failed to apply part of patch. See debug log and rejects file for more information") + Logger.debug( + "Failed to apply part of patch. See debug log and rejects file for more information" + ) var oldFile: Path for (nextLine in lines) { Logger.debug(nextLine) if (nextLine.startsWith("|---")) { - oldFile = Path.of(nextLine.split("\\s+".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray()[1]) + oldFile = + Path.of( + nextLine.split("\\s+".toRegex()) + .dropLastWhile { it.isEmpty() } + .toTypedArray()[1] + ) oldFile = oldFile.subpath(strip, oldFile.nameCount) skippedFiles.add(oldFile) } @@ -106,7 +120,11 @@ class GNUPatch(private val name: String, private val strip: Int) : Patcher { } // Read a rejects file - private fun readRejectsFromFile(operations: Operations, rejectFile: Path, patch: OriginalDiff): Rejects { + private fun readRejectsFromFile( + operations: Operations, + rejectFile: Path, + patch: OriginalDiff + ): Rejects { var rejectsDiff: OriginalDiff? = null if (Files.exists(rejectFile)) { try { @@ -119,11 +137,15 @@ class GNUPatch(private val name: String, private val strip: Int) : Patcher { val result: OriginalDiff = rejectsDiff ?: OriginalDiff(ArrayList()) if (operations.appliedPatchTracker().hasAnyError()) { - Logger.error("patch that caused the error: {}", patch.fileDiffs()[operations.appliedPatchTracker().patchId]) + Logger.error( + "patch that caused the error: {}", + patch.fileDiffs()[operations.appliedPatchTracker().patchId] + ) } if (operations.appliedPatchTracker().hasCriticalError()) { // There was a critical error due to a bug in patch - // We have to read which file caused the error from our tracker, and then add all patches that came afterward + // We have to read which file caused the error from our tracker, and then add all + // patches that came afterward // to the rejects, because patching was aborted val file = operations.appliedPatchTracker().lastPatchTarget() var afterError = false @@ -145,4 +167,5 @@ class GNUPatch(private val name: String, private val strip: Int) : Patcher { return Rejects(result.intoChanges(0)) } -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/patching/GitApply.kt b/src/main/kotlin/org/variantsync/evaluation/patching/GitApply.kt index df8836f1..a80264b9 100644 --- a/src/main/kotlin/org/variantsync/evaluation/patching/GitApply.kt +++ b/src/main/kotlin/org/variantsync/evaluation/patching/GitApply.kt @@ -1,51 +1,48 @@ package org.variantsync.evaluation.patching +import java.io.File +import java.io.IOException +import java.nio.file.Files +import java.nio.file.Path +import java.util.function.Consumer import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.IOFileFilter import org.apache.commons.io.filefilter.TrueFileFilter import org.tinylog.kotlin.Logger +import org.variantsync.evaluation.execution.EvalConfig import org.variantsync.evaluation.execution.Operations +import org.variantsync.evaluation.execution.panic +import org.variantsync.evaluation.execution.readContentSafely import org.variantsync.evaluation.util.diff.DiffParser import org.variantsync.evaluation.util.diff.components.Hunk import org.variantsync.evaluation.util.shell.GitApplyCommand import org.variantsync.evaluation.util.shell.ShellExecutor -import org.variantsync.evaluation.execution.panic -import org.variantsync.evaluation.execution.readContentSafely import org.variantsync.vevos.simulation.feature.Variant -import java.io.File -import java.io.IOException -import java.nio.file.Files -import java.nio.file.Path -import java.util.function.Consumer - -class GitApply(private val name: String, private val strip: Int) : Patcher { +class GitApply(private val config: EvalConfig, private val name: String, private val strip: Int) : Patcher { override fun applyPatch( - operations: Operations, - sourceVariant: Variant, - targetVariant: Variant, - withFiler: Boolean + operations: Operations, + sourceVariant: Variant, + targetVariant: Variant, + withFiler: Boolean ): Rejects { - val pathToPatchFile = if (withFiler) { - operations.filteredPatchFile() - } else { - operations.patchFile() - } + val pathToPatchFile = + if (withFiler) { + operations.filteredPatchFile() + } else { + operations.patchFile() + } if (!Files.exists(pathToPatchFile)) { // If there is nothing to patch, there is nothing to reject return Rejects(ArrayList()) } - val patchCommand = GitApplyCommand.Recommended(pathToPatchFile).strip(strip) - .reject() + val patchCommand = GitApplyCommand.Recommended(pathToPatchFile).strip(strip).reject() // apply patch to target variant - val customShell = ShellExecutor(Logger::debug, Logger::debug, operations.workDir()) - val result = customShell.execute( - patchCommand, - operations.patchDir() - ) + val customShell = ShellExecutor(Logger::debug, Logger::debug, operations.workDir(),config.EXPERIMENT_TIMEOUT_LENGTH(), config.EXPERIMENT_TIMEOUT_UNIT()) + val result = customShell.execute(patchCommand, operations.patchDir()) val rejects = Rejects(ArrayList()) if (result.isSuccess) { @@ -55,8 +52,6 @@ class GitApply(private val name: String, private val strip: Int) : Patcher { result.failure.output.forEach(Consumer { message: String? -> Logger.debug(message) }) } - rejects.rejects.addAll(readRejectsFromFile(operations, withFiler).rejects) - return rejects } @@ -64,87 +59,29 @@ class GitApply(private val name: String, private val strip: Int) : Patcher { return this.name } - // Read a rejects file - private fun readRejectsFromFile(operations: Operations, withFiler: Boolean): Rejects { - val rejectFiles = findRejects(operations) - val rejects = ArrayList() - for (rejectFile in rejectFiles) { - if (Files.exists(rejectFile.toPath())) { - rejects.addAll(parseRejects(operations, rejectFile.toPath())) - } - } - return Rejects(rejects) - } - private fun findRejects(operations: Operations): Collection { // Define a file filter to select .rej files - val rejectFilter: IOFileFilter = object : IOFileFilter { - override fun accept(file: File): Boolean { - return file.name.endsWith(".rej") - } - - override fun accept(dir: File?, name: String): Boolean { - return name.endsWith(".rej") - } - } + val rejectFilter: IOFileFilter = + object : IOFileFilter { + override fun accept(file: File): Boolean { + return file.name.endsWith(".rej") + } + + override fun accept(dir: File?, name: String): Boolean { + return name.endsWith(".rej") + } + } // Search for reject files recursively - val rejectFiles: Collection = FileUtils.listFiles( - operations.patchDir().toFile(), - rejectFilter, - TrueFileFilter.INSTANCE // This filter accepts all directories for recursive search - ) + val rejectFiles: Collection = + FileUtils.listFiles( + operations.patchDir().toFile(), + rejectFilter, + TrueFileFilter.INSTANCE // This filter accepts all directories for recursive + // search + ) return rejectFiles } - private fun parseRejects(operations: Operations, rejectFile: Path): List { - try { - val rejectContent = readContentSafely(rejectFile) - var rejectedFileDir = rejectFile.parent - var rejectedFileName = rejectFile.fileName.toString() - rejectedFileName = rejectedFileName.substring(0, rejectedFileName.length - 4) - var rejectedFile = rejectedFileDir.resolve(rejectedFileName) - rejectedFile = operations.patchDir().relativize(rejectedFile) - return parseRejects(rejectContent, rejectedFile) - } catch (e: IOException) { - panic("Was not able to read rejects file.", e) - } - return ArrayList() - } - - private fun parseRejects(lines: List, path: Path): List { - var index = 1 - val hunkStart = "@@ -" - var nextLine: String = lines[index] - - // Parse the hunks - val hunks = ArrayList() - var hunkLines: MutableList = java.util.ArrayList() - hunkLines.add(nextLine) - index += 1 - while (index < lines.size) { - nextLine = lines[index] - if (nextLine.startsWith(hunkStart)) { - hunks.add(DiffParser.parseHunk(hunkLines)) - hunkLines = java.util.ArrayList() - } - hunkLines.add(nextLine) - index++ - } - // Parse the content of the last hunk - hunks.add(DiffParser.parseHunk(hunkLines)) - - - // Filter the hunks of each patch to extract changed lines - val rejects = ArrayList() - for (hunk in hunks) { - for (changedLine in hunk.changedLines()) { - rejects.add(Change(changedLine, hunk, path)) - } - } - - return rejects - } - override fun clean(operations: Operations) { val rejectFiles = findRejects(operations) for (rejectFile in rejectFiles) { @@ -152,4 +89,4 @@ class GitApply(private val name: String, private val strip: Int) : Patcher { Files.delete(rejectFile.toPath()) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/patching/GitCP.kt b/src/main/kotlin/org/variantsync/evaluation/patching/GitCP.kt index 1c6c46dc..0b164250 100644 --- a/src/main/kotlin/org/variantsync/evaluation/patching/GitCP.kt +++ b/src/main/kotlin/org/variantsync/evaluation/patching/GitCP.kt @@ -1,36 +1,44 @@ package org.variantsync.evaluation.patching +import java.nio.file.Files +import java.util.function.Consumer import org.tinylog.kotlin.Logger +import org.variantsync.evaluation.error.ShellException +import org.variantsync.evaluation.execution.EvalConfig +import org.variantsync.evaluation.execution.EvalOperations import org.variantsync.evaluation.execution.Operations +import org.variantsync.evaluation.execution.readContentSafely import org.variantsync.evaluation.util.diff.DiffParser import org.variantsync.evaluation.util.shell.GitCherryPickCommand import org.variantsync.evaluation.util.shell.ShellExecutor -import org.variantsync.evaluation.execution.EvalOperations -import org.variantsync.evaluation.error.ShellException -import org.variantsync.evaluation.execution.readContentSafely import org.variantsync.vevos.simulation.feature.Variant -import java.nio.file.Files -import java.util.function.Consumer -class GitCP(private val name: String, private val strip: Int, private val strategy: MergeStrategy) : Patcher { +class GitCP( + private val config: EvalConfig, + private val name: String, + private val strip: Int, + private val strategy: MergeStrategy +) : Patcher { private var lastResult: org.variantsync.functjonal.Result, ShellException>? = null private val conflictDetectionText = "CONFLICT (content): Merge conflict in " override fun applyPatch( - operations: Operations, - sourceVariant: Variant, - targetVariant: Variant, - withFiler: Boolean + operations: Operations, + sourceVariant: Variant, + targetVariant: Variant, + withFiler: Boolean ): Rejects { - val pathToPatchFile = if (withFiler) { - operations.filteredPatchFile() - } else { - operations.patchFile() - } + val pathToPatchFile = + if (withFiler) { + operations.filteredPatchFile() + } else { + operations.patchFile() + } val patch = DiffParser.toOriginalDiff(readContentSafely(pathToPatchFile)) if (operations !is EvalOperations) { - // If this is not an evaluation of cherry picks, we cannot apply git cherry pick as patcher + // If this is not an evaluation of cherry picks, we cannot apply git cherry pick as + // patcher return Rejects(patch.intoChanges(strip)) } val cherry = operations.repoManager.lastCherry!!.cherryCommit @@ -38,11 +46,15 @@ class GitCP(private val name: String, private val strip: Int, private val strate val patchCommand = GitCherryPickCommand.Recommended(cherry) // apply patch to target variant - val customShell = ShellExecutor(Logger::debug, Logger::debug, operations.workDir()) - val result = customShell.execute( - patchCommand, - operations.patchDir() - ) + val customShell = + ShellExecutor( + Logger::debug, + Logger::debug, + operations.workDir(), + config.EXPERIMENT_TIMEOUT_LENGTH(), + config.EXPERIMENT_TIMEOUT_UNIT() + ) + val result = customShell.execute(patchCommand, operations.patchDir()) val rejects = Rejects(ArrayList()) val conflictingFiles = ArrayList() if (result.isSuccess) { @@ -75,9 +87,11 @@ class GitCP(private val name: String, private val strip: Int, private val strate } } - private fun applyMergeStrategy(operations: Operations, - cherry: String, - conflictingFiles: List) { + private fun applyMergeStrategy( + operations: Operations, + cherry: String, + conflictingFiles: List + ) { val headMarker = "<<<<<<< HEAD" val divideMarker = "=======" val endMarker = ">>>>>>> " + cherry.substring(0, 8) @@ -108,7 +122,7 @@ class GitCP(private val name: String, private val strip: Int, private val strate if (line.startsWith(divideMarker)) { Logger.debug("Found Divide Marker") state = TentativeState.Cherry - } else if (this.strategy == MergeStrategy.Ours){ + } else if (this.strategy == MergeStrategy.Ours) { updatedLines.add(line) } } else { @@ -116,7 +130,7 @@ class GitCP(private val name: String, private val strip: Int, private val strate if (line.startsWith(endMarker)) { Logger.debug("Found End Marker") state = TentativeState.Outside - } else if (this.strategy == MergeStrategy.Theirs){ + } else if (this.strategy == MergeStrategy.Theirs) { updatedLines.add(line) } } @@ -136,4 +150,4 @@ enum class MergeStrategy { Default, Ours, Theirs, -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/variantsync/evaluation/patching/MPatch.kt b/src/main/kotlin/org/variantsync/evaluation/patching/MPatch.kt index 0b3993d4..dfc3d903 100644 --- a/src/main/kotlin/org/variantsync/evaluation/patching/MPatch.kt +++ b/src/main/kotlin/org/variantsync/evaluation/patching/MPatch.kt @@ -1,33 +1,36 @@ package org.variantsync.evaluation.patching +import java.io.IOException +import java.nio.file.Files +import java.nio.file.Path +import java.util.function.Consumer import org.tinylog.kotlin.Logger +import org.variantsync.evaluation.execution.EvalConfig import org.variantsync.evaluation.execution.Operations +import org.variantsync.evaluation.execution.panic +import org.variantsync.evaluation.execution.readContentSafely import org.variantsync.evaluation.util.diff.DiffParser import org.variantsync.evaluation.util.diff.components.OriginalDiff import org.variantsync.evaluation.util.shell.MPatchCommand import org.variantsync.evaluation.util.shell.ShellExecutor -import org.variantsync.evaluation.execution.panic -import org.variantsync.evaluation.execution.readContentSafely import org.variantsync.vevos.simulation.feature.Variant -import java.io.IOException -import java.nio.file.Files -import java.nio.file.Path -import java.util.function.Consumer -class MPatch(private val name: String, private val strip: Int, private val maxMatchDistance: Int) : Patcher { +class MPatch(private val config: EvalConfig, val name: String, private val strip: Int, private val maxMatchDistance: Int) : + Patcher { override fun applyPatch( - operations: Operations, - sourceVariant: Variant, - targetVariant: Variant, - withFiler: Boolean + operations: Operations, + sourceVariant: Variant, + targetVariant: Variant, + withFiler: Boolean ): Rejects { - val pathToPatchFile = if (withFiler) { - operations.filteredPatchFile() - } else { - operations.patchFile() - } + val pathToPatchFile = + if (withFiler) { + operations.filteredPatchFile() + } else { + operations.patchFile() + } if (!Files.exists(pathToPatchFile)) { // If there is nothing to patch, there is nothing to reject @@ -36,27 +39,28 @@ class MPatch(private val name: String, private val strip: Int, private val maxMa val pathToSourceVariant = operations.sourceV0Path(sourceVariant.name) - val rejectFile = if (withFiler) { - operations.rejectsFileFiltered() - } else { - operations.rejectsFile() - } + val rejectFile = + if (withFiler) { + operations.rejectsFileFiltered() + } else { + operations.rejectsFile() + } - val patchCommand = MPatchCommand.Recommended(pathToSourceVariant, pathToPatchFile).strip(strip) - .rejectsFile(rejectFile).maxMatchDistance(this.maxMatchDistance) + val patchCommand = + MPatchCommand.Recommended(pathToSourceVariant, pathToPatchFile) + .strip(strip) + .rejectsFile(rejectFile) + .maxMatchDistance(this.maxMatchDistance) // apply patch to target variant - val customShell = ShellExecutor(Logger::debug, Logger::warn, operations.workDir()) - val result = customShell.execute( - patchCommand, - operations.patchDir() - ) + val customShell = ShellExecutor(Logger::debug, Logger::debug, operations.workDir(),config.EXPERIMENT_TIMEOUT_LENGTH(), config.EXPERIMENT_TIMEOUT_UNIT()) + val result = customShell.execute(patchCommand, operations.patchDir()) val rejects = Rejects(ArrayList()) if (result.isSuccess) { result.success.forEach(Consumer { message: String? -> Logger.debug(message) }) } else { - Logger.error("mpatch failed") + Logger.debug("mpatch failed to apply patch") throw UTF8Exception() } @@ -70,12 +74,17 @@ class MPatch(private val name: String, private val strip: Int, private val maxMa } // Read a rejects file - private fun readRejectsFromFile(operations: Operations, rejectFile: Path, withFiler: Boolean): Rejects { - val pathToPatchFile = if (withFiler) { - operations.filteredPatchFile() - } else { - operations.patchFile() - } + private fun readRejectsFromFile( + operations: Operations, + rejectFile: Path, + withFiler: Boolean + ): Rejects { + val pathToPatchFile = + if (withFiler) { + operations.filteredPatchFile() + } else { + operations.patchFile() + } val patch = DiffParser.toOriginalDiff(readContentSafely(pathToPatchFile)) if (Files.exists(rejectFile)) { try { @@ -100,7 +109,10 @@ class MPatch(private val name: String, private val strip: Int, private val maxMa var atHeader = true while (atHeader) { if (nextLine.startsWith("--- ")) { - oldFile = nextLine.split("\\s+".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray()[1] + oldFile = + nextLine.split("\\s+".toRegex()) + .dropLastWhile { it.isEmpty() } + .toTypedArray()[1] } else if (nextLine.startsWith("+++ ")) { atHeader = false } @@ -129,7 +141,6 @@ class MPatch(private val name: String, private val strip: Int, private val maxMa return rejects } - private fun parseRejects(patch: OriginalDiff, lines: List): Rejects { // The rejects are empty if (lines.isEmpty()) { @@ -140,7 +151,8 @@ class MPatch(private val name: String, private val strip: Int, private val maxMa var fileDiffStart = "" var fileDiffFollow = "" if (lines[0].startsWith("diff")) { - // Several files were processed, the diff of each file starts with the 'diff' command that was used + // Several files were processed, the diff of each file starts with the 'diff' command + // that was used fileDiffStart = "diff" fileDiffFollow = "--- " } else if (lines[0].startsWith("--- ")) { @@ -183,13 +195,14 @@ class MPatch(private val name: String, private val strip: Int, private val maxMa } } } - requireNotNull(fileDiffContent) { "The provided lines do not contain one of the expected fileDiffStart values" } + requireNotNull(fileDiffContent) { + "The provided lines do not contain one of the expected fileDiffStart values" + } fileDiffContent.add(line) } // Parse the content of the last file diff mPatchRejects.addAll(parseMPatchRejects(fileDiffContent)) - val rejects = Rejects(ArrayList()) for ((changeId, change) in patch.intoChanges(strip).withIndex()) { val id = RejectId(change.path, changeId) @@ -201,10 +214,8 @@ class MPatch(private val name: String, private val strip: Int, private val maxMa return rejects } - } - private class RejectId(val path: Path, val index: Int) { override fun equals(other: Any?): Boolean { if (this === other) return true @@ -225,5 +236,4 @@ private class RejectId(val path: Path, val index: Int) { } } -class UTF8Exception(): Exception("Input did not contain valid UTF-8") {} - +class UTF8Exception() : Exception("Input did not contain valid UTF-8") {} diff --git a/src/main/kotlin/org/variantsync/evaluation/patching/Patcher.kt b/src/main/kotlin/org/variantsync/evaluation/patching/Patcher.kt index 3b43b38e..955a2713 100644 --- a/src/main/kotlin/org/variantsync/evaluation/patching/Patcher.kt +++ b/src/main/kotlin/org/variantsync/evaluation/patching/Patcher.kt @@ -1,17 +1,17 @@ package org.variantsync.evaluation.patching +import java.nio.file.Files import org.tinylog.kotlin.Logger import org.variantsync.evaluation.execution.Operations import org.variantsync.vevos.simulation.feature.Variant -import java.nio.file.Files interface Patcher { fun applyPatch( - operations: Operations, - sourceVariant: Variant, - targetVariant: Variant, - withFiler: Boolean, + operations: Operations, + sourceVariant: Variant, + targetVariant: Variant, + withFiler: Boolean, ): Rejects fun name(): String @@ -26,4 +26,5 @@ interface Patcher { Files.delete(operations.rejectsFileFiltered()) } } -} \ No newline at end of file +} + diff --git a/src/main/kotlin/org/variantsync/evaluation/patching/Rejects.kt b/src/main/kotlin/org/variantsync/evaluation/patching/Rejects.kt index a8903602..cf9d7fc0 100644 --- a/src/main/kotlin/org/variantsync/evaluation/patching/Rejects.kt +++ b/src/main/kotlin/org/variantsync/evaluation/patching/Rejects.kt @@ -1,15 +1,13 @@ package org.variantsync.evaluation.patching -import org.variantsync.evaluation.util.diff.lines.ChangedLine import java.util.stream.Collectors +import org.variantsync.evaluation.util.diff.lines.ChangedLine data class Rejects(val rejects: MutableList) { fun toLines(): List { val lines: MutableList = ArrayList() rejects.stream().map { obj: Change -> obj.toString() }.forEach { c: String? -> - lines.add( - c!! - ) + lines.add(c!!) } return lines } @@ -19,6 +17,9 @@ data class Rejects(val rejects: MutableList) { } fun intoChangedLines(): List { - return rejects.stream().map { c: Change -> ChangedLine(c.path, c.lineChange) }.collect(Collectors.toList()) + return rejects.stream() + .map { c: Change -> ChangedLine(c.path, c.lineChange) } + .collect(Collectors.toList()) } -} \ No newline at end of file +} + diff --git a/src/main/python/pyproject.toml b/src/main/python/pyproject.toml index 5b320365..adfd0bd0 100644 --- a/src/main/python/pyproject.toml +++ b/src/main/python/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "pwm_eval" +name = "mpatch_eval" version = "0.1.0" description = "Result analysis scripts for patching with matching" authors = ["anonymous"] @@ -8,13 +8,14 @@ packages = [ ] [tool.poetry.dependencies] -python = "^3.12" +python = ">3.0" pyyaml = "*" matplotlib = "*" setuptools = "*" scipy = "*" statsmodels = "*" tqdm = "*" +pandas = "*" [tool.poetry.scripts] run-eval = "result_analysis.__main__:main" @@ -22,6 +23,8 @@ power-simulation = "result_analysis.simulation:main" find-example = "result_analysis.__main__:example" find-outliers = "result_analysis.__main__:outliers" compare-patchers = "result_analysis.__main__:compare" +runtime-comparison = "result_analysis.__main__:runtime" +calc_patch_sizes = "result_analysis.__main__:sizes" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/src/main/python/result_analysis/__main__.py b/src/main/python/result_analysis/__main__.py index 250c10c4..93588b5e 100644 --- a/src/main/python/result_analysis/__main__.py +++ b/src/main/python/result_analysis/__main__.py @@ -1,17 +1,17 @@ from result_analysis.tables import ( find_example, metrics_table_generation, + patch_sizes, venn_diagram, + direct_runtime_comparison, ) from result_analysis.analyze_results import find_outliers +from rq3_report import rq3_analysis -results_dir = "../../../evaluation-workdir/results/" -repo_sample = "../../../evaluation-workdir/data/repo-sample.yaml" -# metrics_file = "../../../evaluation-workdir/tables/metrics.tex" -metrics_file = "/home/alex/papers/self/patching-with-matching/paper/tables/metrics.tex" +import argparse -def main(): +def main(repo_sample, results_dir, metrics_file): metrics_table_generation( results_dir, repo_sample, @@ -21,17 +21,42 @@ def main(): ) -def example(): +def example(results_dir, repo_sample): find_example(results_dir + "rep-1/", repo_sample, False) -def outliers(): +def outliers(results_dir, repo_sample): find_outliers(results_dir + "rep-1/", repo_sample, False) -def compare(): +def compare(results_dir, repo_sample): venn_diagram(results_dir + "rep-1/", repo_sample, False) +def sizes(results_dir, repo_sample): + patch_sizes(results_dir + "rep-1/", repo_sample) + + +def runtime(results_dir, repo_sample): + direct_runtime_comparison(results_dir + "rep-1/", repo_sample, False) + + if __name__ == "__main__": - main() + parser = argparse.ArgumentParser( + description="Run evaluation scripts with specified paths." + ) + parser.add_argument( + "--repo_sample", required=True, help="Path to the repo sample YAML file" + ) + parser.add_argument( + "--results_dir", required=True, help="Path to the results directory" + ) + parser.add_argument( + "--metrics_file", required=True, help="Path to the metrics output file" + ) + args = parser.parse_args() + main( + args.repo_sample, + args.results_dir, + args.metrics_file, + ) diff --git a/src/main/python/result_analysis/analyze_results.py b/src/main/python/result_analysis/analyze_results.py index 83b9b8df..73cf8e31 100644 --- a/src/main/python/result_analysis/analyze_results.py +++ b/src/main/python/result_analysis/analyze_results.py @@ -10,11 +10,11 @@ languages = [ ("Python", "Python"), - ("JavaScript", "\\multicolumn{1}{c}{JavaS.}"), + ("JavaScript", "\\multicolumn{1}{c}{JS}"), ("Go", "Go"), ("C++", "\\multicolumn{1}{c}{C++}"), ("Java", "Java"), - ("TypeScript", "\\multicolumn{1}{c}{TypeS.}"), + ("TypeScript", "\\multicolumn{1}{c}{TS}"), ("C", "C"), ("C#", "C#"), ("PHP", "PHP"), diff --git a/src/main/python/result_analysis/eval_setup.py b/src/main/python/result_analysis/eval_setup.py index e3a03bf8..46f34d7f 100644 --- a/src/main/python/result_analysis/eval_setup.py +++ b/src/main/python/result_analysis/eval_setup.py @@ -3,7 +3,7 @@ class Patcher(Enum): - MPatch = "pwm_f2" + MPatch = "mpatch" UnixPatch = "unix_patch" GitApply = "git_apply" GitCherry = "git_cherry" @@ -16,7 +16,7 @@ def __repr__(self): def nice_name(self): return { - Patcher.MPatch: "\\approach{}", + Patcher.MPatch: "\\mpatch{}", Patcher.UnixPatch: "\\patch{}", Patcher.GitApply: "\\gitapply{}", Patcher.GitCherry: "\\gitcherrypickshort{}", @@ -52,27 +52,19 @@ def __init__(self, json_object): def __str__(self): return ( - f"PatchResult(dataset={self.dataset}, runID={ - self.run_id}, cherry={self.cherry_id}, " - f"target={self.pick_id}, normalActualVsExpected={ - self.num_actual_vs_expected}, " - f"lineNormal={self.num_changes_total}, lineSuccessNormal={ - self.num_changes_applied}, " - f"normalResult={self.outcome_classification}, patchDuration={ - self.patch_duration}, " + f"PatchResult(dataset={self.dataset}, runID={self.run_id}, cherry={self.cherry_id}, " + f"target={self.pick_id}, normalActualVsExpected={self.num_actual_vs_expected}, " + f"lineNormal={self.num_changes_total}, lineSuccessNormal={self.num_changes_applied}, " + f"normalResult={self.outcome_classification}, patchDuration={self.patch_duration}, " f"patchIsTrivial={self.patch_is_trivial})" ) def __repr__(self): return ( - f"PatchResult(dataset={repr(self.dataset)}, runID={ - repr(self.run_id)}, cherry={repr(self.cherry_id)}, " - f"target={repr(self.pick_id)}, normalActualVsExpected={ - repr(self.num_actual_vs_expected)}, " - f"lineNormal={repr(self.num_changes_total)}, lineSuccessNormal={ - repr(self.num_changes_applied)}, " - f"normalResult={repr(self.outcome_classification)}, patchDuration={ - repr(self.patch_duration)}, " + f"PatchResult(dataset={repr(self.dataset)}, runID={repr(self.run_id)}, cherry={repr(self.cherry_id)}, " + f"target={repr(self.pick_id)}, normalActualVsExpected={repr(self.num_actual_vs_expected)}, " + f"lineNormal={repr(self.num_changes_total)}, lineSuccessNormal={repr(self.num_changes_applied)}, " + f"normalResult={repr(self.outcome_classification)}, patchDuration={repr(self.patch_duration)}, " f"patchIsTrivial={repr(self.patch_is_trivial)})" ) @@ -174,9 +166,11 @@ def __repr__(self): class Metric(Enum): - F1Score = "f1_score" Automation = "patch_automation" EditDistance = "avg_edit_distance" + F1Score = "f1_score" + Precision = "precision" + Recall = "recall" Runtime = "avg_runtime" def __str__(self): @@ -187,9 +181,11 @@ def __repr__(self): def nice_name(self): return { - Metric.F1Score: "F1 Score", Metric.Automation: "Autom. (\\%)", Metric.EditDistance: "Req. Fixes", + Metric.F1Score: "F1 Score", + Metric.Precision: "Precision", + Metric.Recall: "Recall", Metric.Runtime: "Time (s)", }[self] @@ -234,7 +230,7 @@ def __str__(self): f"Patcher: {self.patcher:<12} " f"Precision: {np.mean(self.precision):1.2f}, " f"Recall: {np.mean(self.recall):1.2f}, " - f"Patch Automation: {100*np.mean(self.patch_automation):2.2f}%, " + f"Patch Automation: {100 * np.mean(self.patch_automation):2.2f}%, " f"Avg Edit Distance: {np.mean(self.avg_edit_distance):2.2f}, " f"Avg Runtime: {np.mean(self.avg_runtime):1.2f}s" ) diff --git a/src/main/python/result_analysis/fill_up_metadata.py b/src/main/python/result_analysis/fill_up_metadata.py deleted file mode 100644 index da5fd349..00000000 --- a/src/main/python/result_analysis/fill_up_metadata.py +++ /dev/null @@ -1,19 +0,0 @@ -import yaml -import glob -import os - -repo_sample_file = "../../../../../evaluation-workdir/data/repo-sample.yaml" -yamls_folder = "../../../../../evaluation-workdir/data/cherries/" -GITHUB_API_URL = "https://api.github.com" -# with open("access_token", "r") as file: -# ACCESS_TOKEN = file.read() - -if __name__ == "__main__": - with open(repo_sample_file, "r", encoding="utf-8") as file: - repo_info = yaml.safe_load(file) - yml_files = [ - f for f in glob.glob(os.path.join(yamls_folder, "**", "*.yaml"), recursive=True) - ] - - pass - diff --git a/src/main/python/result_analysis/latex.py b/src/main/python/result_analysis/latex.py index f65c96a5..c8ad9882 100644 --- a/src/main/python/result_analysis/latex.py +++ b/src/main/python/result_analysis/latex.py @@ -1,6 +1,6 @@ import numpy as np -from result_analysis.eval_setup import Metric +from result_analysis.eval_setup import Metric, Patcher def generate_metrics_result_table( @@ -8,8 +8,27 @@ def generate_metrics_result_table( ): language_names = [lang[1] for lang in languages] languages = [lang[0] for lang in languages] + total_vals = 0 + mpatch_over_t = 0 with open(file, "w") as file: + file.write("\\documentclass{article}") + file.write("\\usepackage{booktabs}") + file.write("\\usepackage{multirow}") + file.write("\\usepackage{siunitx}") + file.write("\\usepackage[table]{xcolor}") + file.write("\\usepackage{geometry}") + file.write("\\usepackage{graphicx}") + file.write("\\geometry{margin=1in}") + file.write("\\newcommand{\\mpatch}{\\textit{mpatch}}") + file.write("\\newcommand{\\patch}{\\textit{GNU patch}}") + file.write("\\newcommand{\\gitapply}{\\textit{Git apply}}") + file.write("\\newcommand{\\gitcherrypickshort}{\\textit{Git cp}}") + file.write("\\begin{document}") + file.write("\\begin{table}") + file.write(" \\centering") + file.write(" \\resizebox{\\textwidth}{!}{") + fmt = "S[table-format=2.2]" * (4 + len(languages)) # Begin the tabular environment file.write("\\begin{tabular}{lc" + fmt + "}\n") @@ -34,7 +53,7 @@ def generate_metrics_result_table( # Write the multi-rows and their corresponding rows file.write("\\toprule\n") for metric in Metric: - if metric != Metric.F1Score: + if metric != Metric.Automation: file.write("\\midrule\n") file.write( "\\multirow{" @@ -46,14 +65,21 @@ def generate_metrics_result_table( best_average = determine_best_average(differences, patcher_names, metric) for patcher in patcher_names: line = " & " + patcher + if patcher not in results_per_patcher: + continue for language in languages: value = 0 best_type = "" - results = results_per_patcher[patcher][language].per_patch - value = np.nanmean(results.get(metric)) + if language in results_per_patcher[patcher]: + results = results_per_patcher[patcher][language].per_patch + value = np.nanmean(results.get(metric)) postfix = "" if metric == Metric.F1Score: best_type = "max" + elif metric == Metric.Precision: + best_type = "max" + elif metric == Metric.Recall: + best_type = "max" elif metric == Metric.Automation: value = 100 * value best_type = "max" @@ -100,11 +126,19 @@ def generate_metrics_result_table( # End the tabular environment file.write("\\bottomrule\n") file.write("\\end{tabular}") + file.write("}") + file.write("\\end{table}") + file.write("\\end{document}") + print(f"mpatch over t: {mpatch_over_t} / {total_vals}") def determine_best(results_per_patcher, patcher_names, metric, best_type, language): values = [] for patcher in patcher_names: + if patcher not in results_per_patcher: + continue + if language not in results_per_patcher[patcher]: + return 0 results = results_per_patcher[patcher][language].per_patch if metric == Metric.Automation: values.append(100 * np.nanmean(results.get(metric))) @@ -120,11 +154,17 @@ def determine_best(results_per_patcher, patcher_names, metric, best_type, langua def determine_best_average(differences, patcher_names, metric): values = [] for patcher in patcher_names: + if patcher not in differences: + continue results = differences[patcher][metric][0] values.append(results) if metric == Metric.F1Score: best_type = "max" + elif metric == Metric.Precision: + best_type = "max" + elif metric == Metric.Recall: + best_type = "max" elif metric == Metric.Automation: best_type = "max" elif metric == Metric.EditDistance: diff --git a/src/main/python/result_analysis/result_handling.py b/src/main/python/result_analysis/result_handling.py index 33925563..46156d52 100644 --- a/src/main/python/result_analysis/result_handling.py +++ b/src/main/python/result_analysis/result_handling.py @@ -162,6 +162,8 @@ def accumulate_data_per_patcher( language = language[0] for patcher in Patcher: # Patcher is an enum results = load_all_results(result_dir, patcher) + if len(results) == 0: + continue # Filter trivial results if only_non_trivial: results = non_trivial_results(results) diff --git a/src/main/python/result_analysis/rq3_report.py b/src/main/python/result_analysis/rq3_report.py index 73a8bdc0..0cbda7d3 100644 --- a/src/main/python/result_analysis/rq3_report.py +++ b/src/main/python/result_analysis/rq3_report.py @@ -15,8 +15,6 @@ pd.set_option("display.expand_frame_repr", False) # Avoid line breaks in long columns pd.set_option("display.max_columns", None) # Display all columns -yaml_folder = "../../evaluation-workdir/data/cherries/" -repo_sample_yaml = "../../evaluation-workdir/data/repo-sample.yaml" language_count = 10 sample_per_language = 500 @@ -51,27 +49,27 @@ table_names = { c_repo_name: "repository", - "rq_git_cherry": "{\mymakecell{required \\\\ fixes \\\\ \\gitcherrypickshort}}", - "rq_pwm_f2": "{\mymakecell{required \\\\ fixes \\\\ \\approach}}", - "ap_git_cherry": "{\mymakecell{fully \\\\ automatable \\\\ \\gitcherrypickshort{} \%}}", - "ap_pwm_f2": "{\mymakecell{fully \\\\ automatable \\\\ \\approach{} \%}}", - c_projects_with_cherries: "{\mymakecell{projects \\\\ with \\\\ cherry \\\\ picks}}", - c_language: "{\mymakecell[l]{main \\\\ repository \\\\ language}}", - c_sampled_projects_per_language: "{\mymakecell[l]{sampled \\\\ projects}}", - c_cherries: "{\mymakecell{cherry \\\\ picks}}", - c_cherry_ratio: "{\mymakecell{cherry \\\\ pick \%}}", - c_trivial_cherries: "{\mymakecell{complex \\\\ cherry \\\\ pick \%}}", - c_sampled_cherries: "{\mymakecell{sampled \\\\ cherry \\\\ picks}}", + "rq_git_cherry": "{\\mymakecell{required \\\\ fixes \\\\ \\gitcherrypickshort}}", + "rq_mpatch": "{\\mymakecell{required \\\\ fixes \\\\ \\mpatch}}", + "ap_git_cherry": "{\\mymakecell{fully \\\\ automatable \\\\ \\gitcherrypickshort{} \\%}}", + "ap_mpatch": "{\\mymakecell{fully \\\\ automatable \\\\ \\mpatch{} \\%}}", + c_projects_with_cherries: "{\\mymakecell{projects \\\\ with \\\\ cherry \\\\ picks}}", + c_language: "{\\mymakecell[l]{main \\\\ repository \\\\ language}}", + c_sampled_projects_per_language: "{\\mymakecell[l]{sampled \\\\ projects}}", + c_cherries: "{\\mymakecell{cherry \\\\ picks}}", + c_cherry_ratio: "{\\mymakecell{cherry \\\\ pick \\%}}", + c_trivial_cherries: "{\\mymakecell{complex \\\\ cherry \\\\ pick \\%}}", + c_sampled_cherries: "{\\mymakecell{sampled \\\\ cherry \\\\ picks}}", } c_total = "total" table_pos = "!tb" plot_labels = { - c_commits: "\#Commits", + c_commits: "\\#Commits", c_language: "Language", - c_cherries: "\#Cherrypicks", - c_cherry_ratio: "$\frac{\#Cherrypicks}{\#Commits}$", + c_cherries: "\\#Cherrypicks", + c_cherry_ratio: "$\\frac{\\#Cherrypicks}{\\#Commits}$", } @@ -153,7 +151,7 @@ def find_trivial_cherries(file_name): return sum([1 if l == " is_trivial: true\n" else 0 for l in lines]) -def read_yamls(files): +def read_yamls(repo_sample_yaml, files): # first define (headers for) dataFrames header = read_cherry(files[0]) header[c_trivial_cherries] = np.nan @@ -166,7 +164,7 @@ def read_yamls(files): header = read_cherry(f) header[c_trivial_cherries] = find_trivial_cherries(f) - pr_df = pr_df.append(header, ignore_index=True) + pr_df = pd.concat([pr_df, pd.DataFrame([header])], ignore_index=True) # add new columns of interest @@ -250,7 +248,7 @@ def df_to_latex(pr_df): 1 - pr_df[c_trivial_cherries].sum() / pr_df[c_cherries].sum() ) * 100 - df[c_language].replace("C#", "C\#", inplace=True) + df[c_language].replace("C#", "C\\#", inplace=True) df = df.rename(columns=table_names) to_latex( @@ -278,13 +276,13 @@ def report_projects(pr_df): repos_with_cherries = len(pr_df[pr_df[c_cherries] > 0]) print( - f"{lang}, number of repositories with cherries: {repos_with_cherries}, and without cherries: {num_languages*sample_per_language - repos_with_cherries}, ratio: {repos_with_cherries/(num_languages*sample_per_language)}." + f"{lang}, number of repositories with cherries: {repos_with_cherries}, and without cherries: {num_languages * sample_per_language - repos_with_cherries}, ratio: {repos_with_cherries / (num_languages * sample_per_language)}." ) print( - f"{lang}, total number of commits, within all projects {'' if num_languages > 1 else 'of '+lang}: {sum(pr_df[c_commits])}" + f"{lang}, total number of commits, within all projects {'' if num_languages > 1 else 'of ' + lang}: {sum(pr_df[c_commits])}" ) print( - f"{lang}, total number of cherries, within all projects {'' if num_languages > 1 else 'of '+lang}: {sum(pr_df[c_cherries])}, mean cherry to commit ratio: {sum(pr_df[c_cherries])/sum(pr_df[c_commits]):.3g}" + f"{lang}, total number of cherries, within all projects {'' if num_languages > 1 else 'of ' + lang}: {sum(pr_df[c_cherries])}, mean cherry to commit ratio: {sum(pr_df[c_cherries]) / sum(pr_df[c_commits]):.3g}" ) column_report(lang, c_commits, c_commits, pr_df) column_report(lang, c_cherries, c_cherries, pr_df) @@ -387,11 +385,10 @@ def impact_file(file): return rq, ap / seen, seen -def impact_analysis(pr_df): - approaches = ["git_cherry", "pwm_f2"] - prefix = "C:\\work\\patching-with-matching-eval\\evaluation-workdir\\results\\cherries\\rep-1\\" +def impact_analysis(pr_df, path_to_results, path_to_output): + approaches = ["git_cherry", "mpatch"] - df = pr_df.sort_values(by=c_cherry_ratio)[-5:].append( + df = pr_df.sort_values(by=c_cherry_ratio)[-5:].concat( pr_df.sort_values(by=c_cherries)[-5:] ) @@ -410,7 +407,8 @@ def impact_analysis(pr_df): idf.loc[len(idf)] = [projects[i]] + [np.nan] * len(approaches) * 2 for approach in approaches: file = ( - prefix + f"{langs[i]}_{pnames0[i]}_{pnames1[i]}.yaml_{approach}.results" + path_to_results + + f"{langs[i]}_{pnames0[i]}_{pnames1[i]}.yaml_{approach}.results" ) rq, ap, seen = impact_file(file) idf.loc[len(idf) - 1, "rq_" + approach] = rq / seen @@ -441,34 +439,42 @@ def impact_analysis(pr_df): ) idf[c_cherries] = idf[c_cherries].astype(int, errors="ignore") idf[c_trivial_cherries] = idf[c_trivial_cherries].astype(int, errors="ignore") - idf = idf.rename(columns={c_language: "{\mymakecell{main \\\\ language}}"}) + idf = idf.rename(columns={c_language: "{\\mymakecell{main \\\\ language}}"}) idf = idf.rename( - columns={c_trivial_cherries: "{\mymakecell{complex \\\\ cherry \\\\ picks}}"} + columns={c_trivial_cherries: "{\\mymakecell{complex \\\\ cherry \\\\ picks}}"} ) idf = idf.rename(columns=table_names) tl = to_latex( idf, label="tab:impact", - caption="Potential impact for the projects with the most relative and most absolute cherry picks. We compare \\approach{} to \\gitcherrypick.", + caption="Potential impact for the projects with the most relative and most absolute cherry picks. We compare \\mpatch{} to \\gitcherrypick.", position=table_pos, column_format="llS[table-format=2.2, round-precision=2]S[table-format=5.0, round-precision=0]S[table-format=4.0, round-precision=0]S[table-format=2.1, round-precision=1]S[table-format=2.1, round-precision=1]S[table-format=2.1, round-precision=1]S[table-format=2.1, round-precision=1]", ) - tl = tl.replace("JetBrains", "\t\midrule\n" + "JetBrains") + tl = tl.replace("JetBrains", "\t\\midrule\n" + "JetBrains") + + with open(path_to_output, "w") as output_file: + output_file.write(tl) print(tl) return idf -if __name__ == "__main__": +def rq3_analysis( + path_to_repo_sample, path_to_mined_cherries, path_to_results, path_to_output +): yml_files = [ - f for f in glob.glob(os.path.join(yaml_folder, "**", "*.yaml"), recursive=True) + f + for f in glob.glob( + os.path.join(path_to_mined_cherries, "**", "*.yaml"), recursive=True + ) ] - pr_df, ch_df = read_yamls(yml_files) + pr_df, ch_df = read_yamls(path_to_repo_sample, yml_files) pr_df = report_projects(pr_df) - impact_analysis(pr_df) + impact_analysis(pr_df, path_to_results, path_to_output) correlate(pr_df) - plot_projects(pr_df) + # plot_projects(pr_df) for language in pr_df[c_language].unique(): report_projects(pr_df[pr_df[c_language] == language]) diff --git a/src/main/python/result_analysis/tables.py b/src/main/python/result_analysis/tables.py index 5d7860b1..947e6bee 100644 --- a/src/main/python/result_analysis/tables.py +++ b/src/main/python/result_analysis/tables.py @@ -1,11 +1,12 @@ import os import numpy as np -from result_analysis.eval_setup import Metric, Patcher +from result_analysis.eval_setup import Metric, PatchResult, Patcher from result_analysis.io import load_repositories from result_analysis.io import load_all_results from result_analysis.latex import generate_metrics_result_table from result_analysis.result_handling import ( + all_results_per_language, non_trivial_results, results_per_repo, ) @@ -16,11 +17,11 @@ languages = [ ("Python", "Python"), - ("JavaScript", "\\multicolumn{1}{c}{JavaS.}"), + ("JavaScript", "\\multicolumn{1}{c}{JS.}"), ("Go", "Go"), ("C++", "\\multicolumn{1}{c}{C++}"), ("Java", "Java"), - ("TypeScript", "\\multicolumn{1}{c}{TypeS.}"), + ("TypeScript", "\\multicolumn{1}{c}{TS}"), ("C", "C"), ("C#", "C#"), ("PHP", "PHP"), @@ -36,6 +37,29 @@ def list_all_dirs(path): ] +def patch_sizes(path_to_results, path_to_repo_list): + global languages + repos = load_repositories(path_to_repo_list) + + results_per_patcher = {} + patcher = Patcher.MPatch # Patcher is an enum + results = load_all_results(path_to_results, patcher) + results = non_trivial_results(results) + # Group results by repo + results = results_per_repo(results, repos) + results = all_results_per_language(results) + + from typing import List + + for language in languages: + res = results[language[0]] # type: List[PatchResult] + changes = [] + for r in res: + changes.append(r.num_changes_total) + m = np.mean(changes) + print(f"Mean number of changes for {language[0]}: {m}") + + def metrics_table_generation( path_to_results, path_to_repo_list, only_non_trivial, file_metrics, file_power ): @@ -53,12 +77,34 @@ def metrics_table_generation( only_non_trivial=only_non_trivial, ) + num_results_per_patcher = {} + for patcher in Patcher: + num_results_per_patcher[patcher] = 0 + for language in languages: language = language[0] print(language) for patcher in Patcher: # Patcher is an enum + if patcher.nice_name() not in results_per_patcher: + continue + if language not in results_per_patcher[patcher.nice_name()]: + continue patcher_data = results_per_patcher[patcher.nice_name()][language] print(patcher_data) + num = len(patcher_data.per_patch.f1_score) + num_results_per_patcher[patcher] += num + print( + "There are", + num, + "results for patcher", + patcher.nice_name(), + "in language", + language, + ) + + for patcher in Patcher: + print("There are", num_results_per_patcher[patcher], "results for", patcher) + patcher_names = [patcher.nice_name() for patcher in Patcher] # corrected_significance, corrected_alpha = significance(results_per_patcher) differences = relative_difference(Patcher.MPatch, results_per_patcher) @@ -80,6 +126,8 @@ def relative_difference(base_patcher: Patcher, results): effects = [] for other_patcher in Patcher: other_patcher = other_patcher.nice_name() + if other_patcher not in results: + continue for metric in Metric: base_values = [] other_values = [] @@ -128,6 +176,8 @@ def relative_difference(base_patcher: Patcher, results): i = 0 for patcher in Patcher: patcher = patcher.nice_name() + if patcher not in results: + continue for metric in Metric: if patcher == base: continue @@ -333,3 +383,64 @@ def venn_diagram(path_to_results, path_to_repo_list, only_non_trivial): print("patch worse: " + str(ru_worse / num_total)) print("apply worse: " + str(ra_worse / num_total)) print() + + +def direct_runtime_comparison(path_to_results, path_to_repo_list, only_non_trivial): + global languages + repos = load_repositories(path_to_repo_list) + + results_per_patcher = {} + for patcher in Patcher: # Patcher is an enum + results = load_all_results(path_to_results, patcher) + # Filter trivial results + if only_non_trivial: + results = non_trivial_results(results) + # Group results by repo + results_per_patcher[patcher] = results_per_repo(results, repos) + + results = results_per_patcher[Patcher.MPatch] + + mpatch_faster_than_cp = 0 + total = 0 + mpatch_faster_than_apply = 0 + + for repo in results.keys(): + repo_results_mpatch = results[repo] + repo_results_apply = results_per_patcher[Patcher.GitApply][repo] + repo_results_cherry = results_per_patcher[Patcher.GitCherry][repo] + + sorted(repo_results_mpatch, key=lambda x: x.pick_id) + sorted(repo_results_apply, key=lambda x: x.pick_id) + sorted(repo_results_cherry, key=lambda x: x.pick_id) + + repo_results_mpatch = {r.run_id: r for r in repo_results_mpatch} + repo_results_apply = {r.run_id: r for r in repo_results_apply} + repo_results_cherry = {r.run_id: r for r in repo_results_cherry} + + for i in repo_results_mpatch.keys(): + res_mpatch = repo_results_mpatch.get(i, None) + if res_mpatch is None: + continue + + res_cherry = repo_results_cherry.get(i, None) + res_apply = repo_results_apply.get(i, None) + + rm = res_mpatch.patch_duration + rc = res_cherry.patch_duration if res_cherry is not None else float("inf") + ra = res_apply.patch_duration if res_apply is not None else float("inf") + + if rm < ra: + mpatch_faster_than_apply += 1 + + if rm < rc: + mpatch_faster_than_cp += 1 + + total += 1 + + total_str = str(total) + print( + f"mpatch faster than apply: {mpatch_faster_than_apply} / {total_str} ({mpatch_faster_than_apply / total})" + ) + print( + f"mpatch faster than cp: {mpatch_faster_than_cp} / {total_str} ({mpatch_faster_than_cp / total})" + ) diff --git a/src/main/resources/config-debug.properties b/src/main/resources/config-debug.properties index 2641b7ed..6ee94e4d 100644 --- a/src/main/resources/config-debug.properties +++ b/src/main/resources/config-debug.properties @@ -1,30 +1,47 @@ -# The number of random experiment repetitions. In each run, random variants are generated and a random source variant -# is selected from which a diff is calculated. The higher the number of repeats the more random repeats for two specific -# SPL-commit pairs -# inclusive start and end -experiment.repeats.start=1 -experiment.repeats.end=1 -experiment.sample-file = /home/alex/data/cherry-picks/last-sample.ser -experiment.startid=0 -experiment.cherry-type = Complex -# The path to the list of dataset -experiment.datasets=./src/test/resources/troublesome-cherries -# The path to the main working directory of the experiment -experiment.dir.main=/home/alex/data/cherry-picks/study-files -# The path to the results directory -experiment.dir.results=/home/alex/data/cherry-picks/results -# The path to the directory to which the SPL repos are cloned -experiment.dir.repos=/home/alex/data/cherry-picks/REPOS -experiment.processed-file=/home/alex/data/cherry-picks/results/processed.txt -# Whether debug files should be written -experiment.debug=true +### +### GENERAL ### +### +# Should all repositories be cloned ahead of the evaluation? Warning: this may require several hundred GigaBytes of free disk space. +preload-repositories=false +# Delete each repository after it has been processed? This helps with freeing disk space, but makes it more difficult to rerun the evaluation later +# because the repositories have to be cloned again +clean-repositories=false +# The number of EXPERIMENT_TIMEOUT_UNIT to wait for a patcher to finish patching (long) +experiment.timeout.length=5 +# The time unit for the timeout, e.g., SECONDS, MINUTES, ... +experiment.timeout.unit=MINUTES +# Number of threads for parallel execution. Note that the bottleneck might be the IO capacity of your disk. +experiment.thread-count=1 # Minimum number of cherries in a repository for it to be considered for the evaluation. +# -1 means all repositories are included. experiment.dataset.min-size=-1 # Maximum number of commits in a repository for a dataset to be considered for the study. If a repository has # more commits, it is simply ignored. Values of 0 or less are automatically converted to Integer.MAX_VALUE. +# -1 means all repositories are included. experiment.dataset.max-size=-1 -# Number of threads for parallel execution. Note that the bottleneck might be the IO capacity of your disk. -experiment.thread-count=1 +# Which patchers should be enabled? +experiment.patcher.gnu-patch=true +experiment.patcher.git-apply=false +experiment.patcher.git-cp=true +experiment.patcher.mpatch=true + +### +### PATHS ### +### +# The path to the list of dataset +experiment.datasets=dataset/mined-cherries-verification +# The path to the main working directory of the experiment +experiment.dir.main=/home/alex/data/cherry-picks/study-files-verification +# The path to the results directory +experiment.dir.results=/home/alex/data/cherry-picks/results-verification +# The path to the directory to which the SPL repos are cloned +experiment.dir.repos=/home/alex/data/cherry-picks/REPOS +experiment.processed-file=/home/alex/data/cherry-picks/results-verification/processed.txt + +### +### SAMPLING ### +### +experiment.sample-file = evaluation-workdir/last-sample-verification.ser # Should the amount of data be reduced by sampling with a certain confidence? experiment.enable-sampling=false # (z) is the Z-score corresponding to your desired confidence level (for a 95% confidence level, (z = 1.96)), @@ -34,4 +51,21 @@ sampling.e=0.10 # (p) is the sample proportion (in percent, such as 50% = 0.5), sampling.p=0.5 # A seed that is used during sampling to ensure repeatability of the experiments -sampling.seed=3 \ No newline at end of file +sampling.seed=3 +# The number of random experiment repetitions. In each run, random variants are generated and a random source variant +# is selected from which a diff is calculated. The higher the number of repeats the more random repeats for two specific +# SPL-commit pairs (inclusive start and end) +# This setting only makes sense when samping is enabled +experiment.repeats.start=1 +experiment.repeats.end=1 + + +### +### MISC ### +### +# From which run id to start the experiments. Can be used to skip experimental runs. +experiment.startid=0 +# Which type of cherry-picks should be considered (Trivial, Complex, Both)? +experiment.cherry-type = Complex +# Whether debug files should be written. If true, the evaluation will create DEBUG directories with various files in the working directories. +experiment.debug=false diff --git a/src/test/java/DiffParserTest.java b/src/test/java/DiffParserTest.java index 821b484c..eda59516 100644 --- a/src/test/java/DiffParserTest.java +++ b/src/test/java/DiffParserTest.java @@ -76,7 +76,7 @@ public void loadOriginalDiff() throws IOException { } } } - + @Test public void parseDiffThatCausedException() throws IOException { Path diff = Path.of("src/test/resources/patch-breakdown/problem.txt"); @@ -111,13 +111,20 @@ public void parseRejectsFiltered() throws Exception { assert !originalDiff.isEmpty(); } + @Test + public void parseUnknownProblem() throws Exception { + Path diff = Path.of("src/test/resources/troublesome-diffs/sql-escapes.txt"); + List lines = Files.readAllLines(diff); + OriginalDiff originalDiff = DiffParser.toOriginalDiff(lines); + assert !originalDiff.isEmpty(); + } -@Test -public void parseCRLF() throws Exception { - Path diff = Path.of("src/test/resources/troublesome-diffs/crlf.txt"); - List lines = readContentSafely(diff); - OriginalDiff originalDiff = DiffParser.toOriginalDiff(lines); - assert !originalDiff.isEmpty(); - Assertions.assertEquals(21, originalDiff.toLines().size()); -} + @Test + public void parseCRLF() throws Exception { + Path diff = Path.of("src/test/resources/troublesome-diffs/crlf.txt"); + List lines = readContentSafely(diff); + OriginalDiff originalDiff = DiffParser.toOriginalDiff(lines); + assert !originalDiff.isEmpty(); + Assertions.assertEquals(21, originalDiff.toLines().size()); + } } diff --git a/src/test/resources/troublesome-diffs/sql-escapes.txt b/src/test/resources/troublesome-diffs/sql-escapes.txt new file mode 100644 index 00000000..ad5ee35d --- /dev/null +++ b/src/test/resources/troublesome-diffs/sql-escapes.txt @@ -0,0 +1,1141 @@ +diff --color -NurZ -B citus-V0/src/test/regress/expected/join.out citus-V1/src/test/regress/expected/join.out +--- citus-V0/src/test/regress/expected/join.out 2025-08-08 15:15:00.142241878 +0200 ++++ citus-V1/src/test/regress/expected/join.out 1970-01-01 01:00:00.000000000 +0100 +@@ -1,472 +0,0 @@ +--- +--- join with subquery pushdown support +--- +-SET citus.next_shard_id TO 9000000; +-CREATE SCHEMA join_schema; +-SET search_path TO join_schema, public; +-CREATE TABLE test_table_1(id int, val1 int); +-CREATE TABLE test_table_2(id bigint, val1 int); +-CREATE TABLE test_table_3(id int, val1 bigint); +-CREATE TABLE abcd(a int, b int, c int, d int); +-CREATE TABLE distributed_table(a int, b int); +-CREATE TABLE reference_table(a int, c int, b int); +-SELECT create_distributed_table('distributed_table', 'a'); +- create_distributed_table +---------------------------------------------------------------------- +- +-(1 row) +- +-SELECT create_reference_table('reference_table'); +- create_reference_table +---------------------------------------------------------------------- +- +-(1 row) +- +-SELECT create_distributed_table('test_table_1', 'id'); +- create_distributed_table +---------------------------------------------------------------------- +- +-(1 row) +- +-SELECT create_distributed_table('test_table_2', 'id'); +- create_distributed_table +---------------------------------------------------------------------- +- +-(1 row) +- +-SELECT create_distributed_table('test_table_3', 'id'); +- create_distributed_table +---------------------------------------------------------------------- +- +-(1 row) +- +-SELECT create_distributed_table('abcd', 'b'); +- create_distributed_table +---------------------------------------------------------------------- +- +-(1 row) +- +-INSERT INTO test_table_1 VALUES(1,1),(2,2),(3,3); +-INSERT INTO test_table_2 VALUES(2,2),(3,3),(4,4); +-INSERT INTO test_table_3 VALUES(1,1),(3,3),(4,5); +--- Simple full outer join +-SELECT id FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; +- id +---------------------------------------------------------------------- +- 1 +- 2 +- 3 +- 4 +-(4 rows) +- +--- Get all columns as the result of the full join +-SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; +- id | val1 | val1 +---------------------------------------------------------------------- +- 1 | 1 | 1 +- 2 | 2 | +- 3 | 3 | 3 +- 4 | | 5 +-(4 rows) +- +--- Join subqueries using single column +-SELECT * FROM +- (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 +- FULL JOIN +- (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 +- USING(id) +- ORDER BY 1; +- id +---------------------------------------------------------------------- +- 1 +- 2 +- 3 +- +- +-(5 rows) +- +--- Join subqueries using multiple columns +-SELECT * FROM +- (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 +- FULL JOIN +- (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 +- USING(id, val1) +- ORDER BY 1; +- id | val1 +---------------------------------------------------------------------- +- 1 | 1 +- 2 | 2 +- 3 | 3 +- | +- | +-(5 rows) +- +--- Full join using multiple columns +-SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1; +- id | val1 +---------------------------------------------------------------------- +- 1 | 1 +- 2 | 2 +- 3 | 3 +- 4 | 5 +-(4 rows) +- +--- Full join with complicated target lists +-SELECT count(DISTINCT id), (avg(test_table_1.val1) + id * id)::integer as avg_value, id::numeric IS NOT NULL as not_null +-FROM test_table_1 FULL JOIN test_table_3 using(id) +-WHERE id::bigint < 55 +-GROUP BY id +-ORDER BY 2 +-ASC LIMIT 3; +- count | avg_value | not_null +---------------------------------------------------------------------- +- 1 | 2 | t +- 1 | 6 | t +- 1 | 12 | t +-(3 rows) +- +-SELECT max(val1) +-FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) +-GROUP BY test_table_1.id +-ORDER BY 1; +- max +---------------------------------------------------------------------- +- 1 +- 2 +- 3 +- 5 +-(4 rows) +- +--- Test the left join as well +-SELECT max(val1) +-FROM test_table_1 LEFT JOIN test_table_3 USING(id, val1) +-GROUP BY test_table_1.id +-ORDER BY 1; +- max +---------------------------------------------------------------------- +- 1 +- 2 +- 3 +-(3 rows) +- +--- Full outer join with different distribution column types, should error out +-SELECT * FROM test_table_1 full join test_table_2 using(id); +-ERROR: cannot push down this subquery +-DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning +--- Test when the non-distributed column has the value of NULL +-INSERT INTO test_table_1 VALUES(7, NULL); +-INSERT INTO test_table_2 VALUES(7, NULL); +-INSERT INTO test_table_3 VALUES(7, NULL); +--- Get all columns as the result of the full join +-SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; +- id | val1 | val1 +---------------------------------------------------------------------- +- 1 | 1 | 1 +- 2 | 2 | +- 3 | 3 | 3 +- 4 | | 5 +- 7 | | +-(5 rows) +- +--- Get the same result (with multiple id) +-SELECT * FROM test_table_1 FULL JOIN test_table_3 ON (test_table_1.id = test_table_3.id) ORDER BY 1; +- id | val1 | id | val1 +---------------------------------------------------------------------- +- 1 | 1 | 1 | 1 +- 2 | 2 | | +- 3 | 3 | 3 | 3 +- 7 | | 7 | +- | | 4 | 5 +-(5 rows) +- +--- Full join using multiple columns +-SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1; +- id | val1 +---------------------------------------------------------------------- +- 1 | 1 +- 2 | 2 +- 3 | 3 +- 4 | 5 +- 7 | +- 7 | +-(6 rows) +- +--- In order to make the same test with different data types use text-varchar pair +--- instead of using int-bigint pair. +-DROP TABLE test_table_1; +-DROP TABLE test_table_2; +-DROP TABLE test_table_3; +-CREATE TABLE test_table_1(id int, val1 text); +-CREATE TABLE test_table_2(id int, val1 varchar(30)); +-SELECT create_distributed_table('test_table_1', 'id'); +- create_distributed_table +---------------------------------------------------------------------- +- +-(1 row) +- +-SELECT create_distributed_table('test_table_2', 'id'); +- create_distributed_table +---------------------------------------------------------------------- +- +-(1 row) +- +-INSERT INTO test_table_1 VALUES(1,'val_1'),(2,'val_2'),(3,'val_3'), (4, NULL); +-INSERT INTO test_table_2 VALUES(2,'val_2'),(3,'val_3'),(4,'val_4'), (5, NULL); +--- Simple full outer join +-SELECT id FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1; +- id +---------------------------------------------------------------------- +- 1 +- 2 +- 3 +- 4 +- 5 +-(5 rows) +- +--- Get all columns as the result of the full join +-SELECT * FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1; +- id | val1 | val1 +---------------------------------------------------------------------- +- 1 | val_1 | +- 2 | val_2 | val_2 +- 3 | val_3 | val_3 +- 4 | | val_4 +- 5 | | +-(5 rows) +- +--- Join subqueries using multiple columns +-SELECT * FROM +- (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j1 +- FULL JOIN +- (SELECT test_table_2.id, test_table_2.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j2 +- USING(id, val1) +- ORDER BY 1,2; +- id | val1 +---------------------------------------------------------------------- +- 1 | val_1 +- 2 | val_2 +- 3 | val_3 +- 4 | val_4 +- 4 | +- 5 | +- | +- | +-(8 rows) +- +--- Full join using multiple columns +-SELECT * FROM test_table_1 FULL JOIN test_table_2 USING(id, val1) ORDER BY 1,2; +- id | val1 +---------------------------------------------------------------------- +- 1 | val_1 +- 2 | val_2 +- 3 | val_3 +- 4 | val_4 +- 4 | +- 5 | +-(6 rows) +- +-SET citus.enable_repartition_joins to ON; +-SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); +- a | b +---------------------------------------------------------------------- +-(0 rows) +- +-ALTER TABLE reference_table DROP COLUMN c; +--- #4129: make sure a join after drop column works +-SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); +- a | b +---------------------------------------------------------------------- +-(0 rows) +- +-BEGIN; +-SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); +- a | b +---------------------------------------------------------------------- +-(0 rows) +- +-END; +-INSERT INTO abcd VALUES (1,2,3,4); +-INSERT INTO abcd VALUES (2,3,4,5); +-INSERT INTO abcd VALUES (3,4,5,6); +-SELECT * FROM abcd first join abcd second on first.a = second.a ORDER BY 1,2,3,4; +- a | b | c | d | a | b | c | d +---------------------------------------------------------------------- +- 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 +- 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 +- 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +- a | b | c | d | a | b | c | d +---------------------------------------------------------------------- +- 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 +- 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 +- 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +- a | b | c | d | a | b | c | d +---------------------------------------------------------------------- +- 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 +- 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 +- 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 +-(3 rows) +- +-BEGIN; +-SELECT * FROM abcd first join abcd second on first.a = second.a ORDER BY 1,2,3,4; +- a | b | c | d | a | b | c | d +---------------------------------------------------------------------- +- 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 +- 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 +- 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +- a | b | c | d | a | b | c | d +---------------------------------------------------------------------- +- 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 +- 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 +- 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +- a | b | c | d | a | b | c | d +---------------------------------------------------------------------- +- 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 +- 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 +- 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 +-(3 rows) +- +-END; +-ALTER TABLE abcd DROP COLUMN a; +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-BEGIN; +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-END; +-CREATE VIEW abcd_view AS SELECT * FROM abcd; +-SELECT * FROM abcd_view first join abcd_view second on first.b = second.b ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd_view first join abcd_view second on first.c = second.c ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-BEGIN; +-SELECT * FROM abcd_view first join abcd_view second on first.b = second.b ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-SELECT * FROM abcd_view first join abcd_view second on first.c = second.c ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-END; +-SELECT * FROM abcd first full join abcd second on first.b = second.b ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-BEGIN; +-SELECT * FROM abcd first full join abcd second on first.b = second.b ORDER BY 1,2,3,4; +- b | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-END; +-SELECT * FROM abcd_view first join abcd second USING(b) ORDER BY 1,2,3,4; +- b | c | d | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 3 | 4 +- 3 | 4 | 5 | 4 | 5 +- 4 | 5 | 6 | 5 | 6 +-(3 rows) +- +-BEGIN; +-SELECT * FROM abcd first join abcd second USING(b) ORDER BY 1,2,3,4; +- b | c | d | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 3 | 4 +- 3 | 4 | 5 | 4 | 5 +- 4 | 5 | 6 | 5 | 6 +-(3 rows) +- +-END; +-SELECT * FROM abcd first join abcd second USING(b) join abcd third on first.b=third.b ORDER BY 1,2,3,4; +- b | c | d | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-BEGIN; +-SELECT * FROM abcd first join abcd second USING(b) join abcd third on first.b=third.b ORDER BY 1,2,3,4; +- b | c | d | c | d | b | c | d +---------------------------------------------------------------------- +- 2 | 3 | 4 | 3 | 4 | 2 | 3 | 4 +- 3 | 4 | 5 | 4 | 5 | 3 | 4 | 5 +- 4 | 5 | 6 | 5 | 6 | 4 | 5 | 6 +-(3 rows) +- +-END; +-DROP SCHEMA join_schema CASCADE; +-NOTICE: drop cascades to 6 other objects +-DETAIL: drop cascades to table abcd +-drop cascades to table distributed_table +-drop cascades to table reference_table +-drop cascades to table test_table_1 +-drop cascades to table test_table_2 +-drop cascades to view abcd_view +diff --color -NurZ -B citus-V0/src/test/regress/expected/join_pushdown.out citus-V1/src/test/regress/expected/join_pushdown.out +--- citus-V0/src/test/regress/expected/join_pushdown.out 1970-01-01 01:00:00.000000000 +0100 ++++ citus-V1/src/test/regress/expected/join_pushdown.out 2025-08-08 15:14:46.857005235 +0200 +@@ -0,0 +1,472 @@ ++-- ++-- join with subquery pushdown support ++-- ++SET citus.next_shard_id TO 9000000; ++CREATE SCHEMA join_schema; ++SET search_path TO join_schema, public; ++CREATE TABLE test_table_1(id int, val1 int); ++CREATE TABLE test_table_2(id bigint, val1 int); ++CREATE TABLE test_table_3(id int, val1 bigint); ++CREATE TABLE abcd(a int, b int, c int, d int); ++CREATE TABLE distributed_table(a int, b int); ++CREATE TABLE reference_table(a int, c int, b int); ++SELECT create_distributed_table('distributed_table', 'a'); ++ create_distributed_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++SELECT create_reference_table('reference_table'); ++ create_reference_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++SELECT create_distributed_table('test_table_1', 'id'); ++ create_distributed_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++SELECT create_distributed_table('test_table_2', 'id'); ++ create_distributed_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++SELECT create_distributed_table('test_table_3', 'id'); ++ create_distributed_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++SELECT create_distributed_table('abcd', 'b'); ++ create_distributed_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++INSERT INTO test_table_1 VALUES(1,1),(2,2),(3,3); ++INSERT INTO test_table_2 VALUES(2,2),(3,3),(4,4); ++INSERT INTO test_table_3 VALUES(1,1),(3,3),(4,5); ++-- Simple full outer join ++SELECT id FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; ++ id ++--------------------------------------------------------------------- ++ 1 ++ 2 ++ 3 ++ 4 ++(4 rows) ++ ++-- Get all columns as the result of the full join ++SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; ++ id | val1 | val1 ++--------------------------------------------------------------------- ++ 1 | 1 | 1 ++ 2 | 2 | ++ 3 | 3 | 3 ++ 4 | | 5 ++(4 rows) ++ ++-- Join subqueries using single column ++SELECT * FROM ++ (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 ++ FULL JOIN ++ (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 ++ USING(id) ++ ORDER BY 1; ++ id ++--------------------------------------------------------------------- ++ 1 ++ 2 ++ 3 ++ ++ ++(5 rows) ++ ++-- Join subqueries using multiple columns ++SELECT * FROM ++ (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 ++ FULL JOIN ++ (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 ++ USING(id, val1) ++ ORDER BY 1; ++ id | val1 ++--------------------------------------------------------------------- ++ 1 | 1 ++ 2 | 2 ++ 3 | 3 ++ | ++ | ++(5 rows) ++ ++-- Full join using multiple columns ++SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1; ++ id | val1 ++--------------------------------------------------------------------- ++ 1 | 1 ++ 2 | 2 ++ 3 | 3 ++ 4 | 5 ++(4 rows) ++ ++-- Full join with complicated target lists ++SELECT count(DISTINCT id), (avg(test_table_1.val1) + id * id)::integer as avg_value, id::numeric IS NOT NULL as not_null ++FROM test_table_1 FULL JOIN test_table_3 using(id) ++WHERE id::bigint < 55 ++GROUP BY id ++ORDER BY 2 ++ASC LIMIT 3; ++ count | avg_value | not_null ++--------------------------------------------------------------------- ++ 1 | 2 | t ++ 1 | 6 | t ++ 1 | 12 | t ++(3 rows) ++ ++SELECT max(val1) ++FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ++GROUP BY test_table_1.id ++ORDER BY 1; ++ max ++--------------------------------------------------------------------- ++ 1 ++ 2 ++ 3 ++ 5 ++(4 rows) ++ ++-- Test the left join as well ++SELECT max(val1) ++FROM test_table_1 LEFT JOIN test_table_3 USING(id, val1) ++GROUP BY test_table_1.id ++ORDER BY 1; ++ max ++--------------------------------------------------------------------- ++ 1 ++ 2 ++ 3 ++(3 rows) ++ ++-- Full outer join with different distribution column types, should error out ++SELECT * FROM test_table_1 full join test_table_2 using(id); ++ERROR: cannot push down this subquery ++DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning ++-- Test when the non-distributed column has the value of NULL ++INSERT INTO test_table_1 VALUES(7, NULL); ++INSERT INTO test_table_2 VALUES(7, NULL); ++INSERT INTO test_table_3 VALUES(7, NULL); ++-- Get all columns as the result of the full join ++SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; ++ id | val1 | val1 ++--------------------------------------------------------------------- ++ 1 | 1 | 1 ++ 2 | 2 | ++ 3 | 3 | 3 ++ 4 | | 5 ++ 7 | | ++(5 rows) ++ ++-- Get the same result (with multiple id) ++SELECT * FROM test_table_1 FULL JOIN test_table_3 ON (test_table_1.id = test_table_3.id) ORDER BY 1; ++ id | val1 | id | val1 ++--------------------------------------------------------------------- ++ 1 | 1 | 1 | 1 ++ 2 | 2 | | ++ 3 | 3 | 3 | 3 ++ 7 | | 7 | ++ | | 4 | 5 ++(5 rows) ++ ++-- Full join using multiple columns ++SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1; ++ id | val1 ++--------------------------------------------------------------------- ++ 1 | 1 ++ 2 | 2 ++ 3 | 3 ++ 4 | 5 ++ 7 | ++ 7 | ++(6 rows) ++ ++-- In order to make the same test with different data types use text-varchar pair ++-- instead of using int-bigint pair. ++DROP TABLE test_table_1; ++DROP TABLE test_table_2; ++DROP TABLE test_table_3; ++CREATE TABLE test_table_1(id int, val1 text); ++CREATE TABLE test_table_2(id int, val1 varchar(30)); ++SELECT create_distributed_table('test_table_1', 'id'); ++ create_distributed_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++SELECT create_distributed_table('test_table_2', 'id'); ++ create_distributed_table ++--------------------------------------------------------------------- ++ ++(1 row) ++ ++INSERT INTO test_table_1 VALUES(1,'val_1'),(2,'val_2'),(3,'val_3'), (4, NULL); ++INSERT INTO test_table_2 VALUES(2,'val_2'),(3,'val_3'),(4,'val_4'), (5, NULL); ++-- Simple full outer join ++SELECT id FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1; ++ id ++--------------------------------------------------------------------- ++ 1 ++ 2 ++ 3 ++ 4 ++ 5 ++(5 rows) ++ ++-- Get all columns as the result of the full join ++SELECT * FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1; ++ id | val1 | val1 ++--------------------------------------------------------------------- ++ 1 | val_1 | ++ 2 | val_2 | val_2 ++ 3 | val_3 | val_3 ++ 4 | | val_4 ++ 5 | | ++(5 rows) ++ ++-- Join subqueries using multiple columns ++SELECT * FROM ++ (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j1 ++ FULL JOIN ++ (SELECT test_table_2.id, test_table_2.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j2 ++ USING(id, val1) ++ ORDER BY 1,2; ++ id | val1 ++--------------------------------------------------------------------- ++ 1 | val_1 ++ 2 | val_2 ++ 3 | val_3 ++ 4 | val_4 ++ 4 | ++ 5 | ++ | ++ | ++(8 rows) ++ ++-- Full join using multiple columns ++SELECT * FROM test_table_1 FULL JOIN test_table_2 USING(id, val1) ORDER BY 1,2; ++ id | val1 ++--------------------------------------------------------------------- ++ 1 | val_1 ++ 2 | val_2 ++ 3 | val_3 ++ 4 | val_4 ++ 4 | ++ 5 | ++(6 rows) ++ ++SET citus.enable_repartition_joins to ON; ++SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); ++ a | b ++--------------------------------------------------------------------- ++(0 rows) ++ ++ALTER TABLE reference_table DROP COLUMN c; ++-- #4129: make sure a join after drop column works ++SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); ++ a | b ++--------------------------------------------------------------------- ++(0 rows) ++ ++BEGIN; ++SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); ++ a | b ++--------------------------------------------------------------------- ++(0 rows) ++ ++END; ++INSERT INTO abcd VALUES (1,2,3,4); ++INSERT INTO abcd VALUES (2,3,4,5); ++INSERT INTO abcd VALUES (3,4,5,6); ++SELECT * FROM abcd first join abcd second on first.a = second.a ORDER BY 1,2,3,4; ++ a | b | c | d | a | b | c | d ++--------------------------------------------------------------------- ++ 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 ++ 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 ++ 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; ++ a | b | c | d | a | b | c | d ++--------------------------------------------------------------------- ++ 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 ++ 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 ++ 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; ++ a | b | c | d | a | b | c | d ++--------------------------------------------------------------------- ++ 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 ++ 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 ++ 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 ++(3 rows) ++ ++BEGIN; ++SELECT * FROM abcd first join abcd second on first.a = second.a ORDER BY 1,2,3,4; ++ a | b | c | d | a | b | c | d ++--------------------------------------------------------------------- ++ 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 ++ 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 ++ 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; ++ a | b | c | d | a | b | c | d ++--------------------------------------------------------------------- ++ 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 ++ 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 ++ 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; ++ a | b | c | d | a | b | c | d ++--------------------------------------------------------------------- ++ 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 ++ 2 | 3 | 4 | 5 | 2 | 3 | 4 | 5 ++ 3 | 4 | 5 | 6 | 3 | 4 | 5 | 6 ++(3 rows) ++ ++END; ++ALTER TABLE abcd DROP COLUMN a; ++SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++BEGIN; ++SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++END; ++CREATE VIEW abcd_view AS SELECT * FROM abcd; ++SELECT * FROM abcd_view first join abcd_view second on first.b = second.b ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd_view first join abcd_view second on first.c = second.c ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++BEGIN; ++SELECT * FROM abcd_view first join abcd_view second on first.b = second.b ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++SELECT * FROM abcd_view first join abcd_view second on first.c = second.c ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++END; ++SELECT * FROM abcd first full join abcd second on first.b = second.b ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++BEGIN; ++SELECT * FROM abcd first full join abcd second on first.b = second.b ORDER BY 1,2,3,4; ++ b | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++END; ++SELECT * FROM abcd_view first join abcd second USING(b) ORDER BY 1,2,3,4; ++ b | c | d | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 3 | 4 ++ 3 | 4 | 5 | 4 | 5 ++ 4 | 5 | 6 | 5 | 6 ++(3 rows) ++ ++BEGIN; ++SELECT * FROM abcd first join abcd second USING(b) ORDER BY 1,2,3,4; ++ b | c | d | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 3 | 4 ++ 3 | 4 | 5 | 4 | 5 ++ 4 | 5 | 6 | 5 | 6 ++(3 rows) ++ ++END; ++SELECT * FROM abcd first join abcd second USING(b) join abcd third on first.b=third.b ORDER BY 1,2,3,4; ++ b | c | d | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++BEGIN; ++SELECT * FROM abcd first join abcd second USING(b) join abcd third on first.b=third.b ORDER BY 1,2,3,4; ++ b | c | d | c | d | b | c | d ++--------------------------------------------------------------------- ++ 2 | 3 | 4 | 3 | 4 | 2 | 3 | 4 ++ 3 | 4 | 5 | 4 | 5 | 3 | 4 | 5 ++ 4 | 5 | 6 | 5 | 6 | 4 | 5 | 6 ++(3 rows) ++ ++END; ++DROP SCHEMA join_schema CASCADE; ++NOTICE: drop cascades to 6 other objects ++DETAIL: drop cascades to table abcd ++drop cascades to table distributed_table ++drop cascades to table reference_table ++drop cascades to table test_table_1 ++drop cascades to table test_table_2 ++drop cascades to view abcd_view +diff --color -NurZ -B citus-V0/src/test/regress/sql/join.sql citus-V1/src/test/regress/sql/join.sql +--- citus-V0/src/test/regress/sql/join.sql 2025-08-08 15:15:00.142241878 +0200 ++++ citus-V1/src/test/regress/sql/join.sql 1970-01-01 01:00:00.000000000 +0100 +@@ -1,185 +0,0 @@ +--- +--- join with subquery pushdown support +--- +- +-SET citus.next_shard_id TO 9000000; +- +-CREATE SCHEMA join_schema; +-SET search_path TO join_schema, public; +- +-CREATE TABLE test_table_1(id int, val1 int); +-CREATE TABLE test_table_2(id bigint, val1 int); +-CREATE TABLE test_table_3(id int, val1 bigint); +-CREATE TABLE abcd(a int, b int, c int, d int); +- +-CREATE TABLE distributed_table(a int, b int); +-CREATE TABLE reference_table(a int, c int, b int); +- +-SELECT create_distributed_table('distributed_table', 'a'); +-SELECT create_reference_table('reference_table'); +-SELECT create_distributed_table('test_table_1', 'id'); +-SELECT create_distributed_table('test_table_2', 'id'); +-SELECT create_distributed_table('test_table_3', 'id'); +-SELECT create_distributed_table('abcd', 'b'); +- +-INSERT INTO test_table_1 VALUES(1,1),(2,2),(3,3); +-INSERT INTO test_table_2 VALUES(2,2),(3,3),(4,4); +-INSERT INTO test_table_3 VALUES(1,1),(3,3),(4,5); +- +--- Simple full outer join +-SELECT id FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; +- +--- Get all columns as the result of the full join +-SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; +- +--- Join subqueries using single column +-SELECT * FROM +- (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 +- FULL JOIN +- (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 +- USING(id) +- ORDER BY 1; +- +--- Join subqueries using multiple columns +-SELECT * FROM +- (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 +- FULL JOIN +- (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 +- USING(id, val1) +- ORDER BY 1; +- +--- Full join using multiple columns +-SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1; +- +--- Full join with complicated target lists +-SELECT count(DISTINCT id), (avg(test_table_1.val1) + id * id)::integer as avg_value, id::numeric IS NOT NULL as not_null +-FROM test_table_1 FULL JOIN test_table_3 using(id) +-WHERE id::bigint < 55 +-GROUP BY id +-ORDER BY 2 +-ASC LIMIT 3; +- +-SELECT max(val1) +-FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) +-GROUP BY test_table_1.id +-ORDER BY 1; +- +--- Test the left join as well +-SELECT max(val1) +-FROM test_table_1 LEFT JOIN test_table_3 USING(id, val1) +-GROUP BY test_table_1.id +-ORDER BY 1; +- +--- Full outer join with different distribution column types, should error out +-SELECT * FROM test_table_1 full join test_table_2 using(id); +- +--- Test when the non-distributed column has the value of NULL +-INSERT INTO test_table_1 VALUES(7, NULL); +-INSERT INTO test_table_2 VALUES(7, NULL); +-INSERT INTO test_table_3 VALUES(7, NULL); +- +--- Get all columns as the result of the full join +-SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; +- +--- Get the same result (with multiple id) +-SELECT * FROM test_table_1 FULL JOIN test_table_3 ON (test_table_1.id = test_table_3.id) ORDER BY 1; +- +--- Full join using multiple columns +-SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1; +- +--- In order to make the same test with different data types use text-varchar pair +--- instead of using int-bigint pair. +-DROP TABLE test_table_1; +-DROP TABLE test_table_2; +-DROP TABLE test_table_3; +- +-CREATE TABLE test_table_1(id int, val1 text); +-CREATE TABLE test_table_2(id int, val1 varchar(30)); +- +-SELECT create_distributed_table('test_table_1', 'id'); +-SELECT create_distributed_table('test_table_2', 'id'); +- +-INSERT INTO test_table_1 VALUES(1,'val_1'),(2,'val_2'),(3,'val_3'), (4, NULL); +-INSERT INTO test_table_2 VALUES(2,'val_2'),(3,'val_3'),(4,'val_4'), (5, NULL); +- +--- Simple full outer join +-SELECT id FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1; +- +--- Get all columns as the result of the full join +-SELECT * FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1; +- +--- Join subqueries using multiple columns +-SELECT * FROM +- (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j1 +- FULL JOIN +- (SELECT test_table_2.id, test_table_2.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j2 +- USING(id, val1) +- ORDER BY 1,2; +- +--- Full join using multiple columns +-SELECT * FROM test_table_1 FULL JOIN test_table_2 USING(id, val1) ORDER BY 1,2; +- +-SET citus.enable_repartition_joins to ON; +- +-SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); +-ALTER TABLE reference_table DROP COLUMN c; +- +--- #4129: make sure a join after drop column works +-SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); +-BEGIN; +-SELECT distributed_table.* from distributed_table JOIN reference_table ON (true); +-END; +- +-INSERT INTO abcd VALUES (1,2,3,4); +-INSERT INTO abcd VALUES (2,3,4,5); +-INSERT INTO abcd VALUES (3,4,5,6); +- +-SELECT * FROM abcd first join abcd second on first.a = second.a ORDER BY 1,2,3,4; +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +- +-BEGIN; +-SELECT * FROM abcd first join abcd second on first.a = second.a ORDER BY 1,2,3,4; +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +-END; +- +-ALTER TABLE abcd DROP COLUMN a; +- +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +- +-BEGIN; +-SELECT * FROM abcd first join abcd second on first.b = second.b ORDER BY 1,2,3,4; +-SELECT * FROM abcd first join abcd second on first.c = second.c ORDER BY 1,2,3,4; +-END; +- +-CREATE VIEW abcd_view AS SELECT * FROM abcd; +- +-SELECT * FROM abcd_view first join abcd_view second on first.b = second.b ORDER BY 1,2,3,4; +-SELECT * FROM abcd_view first join abcd_view second on first.c = second.c ORDER BY 1,2,3,4; +- +-BEGIN; +-SELECT * FROM abcd_view first join abcd_view second on first.b = second.b ORDER BY 1,2,3,4; +-SELECT * FROM abcd_view first join abcd_view second on first.c = second.c ORDER BY 1,2,3,4; +-END; +- +-SELECT * FROM abcd first full join abcd second on first.b = second.b ORDER BY 1,2,3,4; +-BEGIN; +-SELECT * FROM abcd first full join abcd second on first.b = second.b ORDER BY 1,2,3,4; +-END; +- +-SELECT * FROM abcd_view first join abcd second USING(b) ORDER BY 1,2,3,4; +-BEGIN; +-SELECT * FROM abcd first join abcd second USING(b) ORDER BY 1,2,3,4; +-END; +- +-SELECT * FROM abcd first join abcd second USING(b) join abcd third on first.b=third.b ORDER BY 1,2,3,4; +-BEGIN; +-SELECT * FROM abcd first join abcd second USING(b) join abcd third on first.b=third.b ORDER BY 1,2,3,4; +-END; +- +- +- +- +-DROP SCHEMA join_schema CASCADE; diff --git a/stop-execution.sh b/stop-execution.sh index b5f0a5a4..38b0422a 100755 --- a/stop-execution.sh +++ b/stop-execution.sh @@ -1,4 +1,4 @@ #! /bin/bash echo "Stopping Docker container. This will take a moment..." -docker stop "$(docker ps -a -q --filter "ancestor=pwm-eval")" +docker stop "$(docker ps -a -q --filter "ancestor=mpatch-reproduction")" echo "...done."