diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index b188c661483..6afd53550d3 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -80,48 +80,61 @@ jobs: ut-modules: ${{ steps.ut-modules.outputs.modules }} it-modules: ${{ steps.it-modules.outputs.modules }} steps: - - uses: actions/checkout@v3 # required for push event + - uses: actions/checkout@v4 + with: + fetch-depth: '2000' + - name: checkout apache seatunnel dev branch + id: git_init + run: | + /usr/bin/git remote add apache https://github.com/apache/seatunnel + /usr/bin/git -c protocol.version=2 fetch --no-tags --prune --no-recurse-submodules --depth=2000 apache +refs/heads/dev*:refs/remotes/apache/dev* +refs/tags/dev*:refs/tags/dev* + /usr/bin/git checkout apache/dev + /usr/bin/git checkout '${{ github.ref }}' + echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT - uses: actions/setup-python@v4 with: python-version: '3.11.0' - - name: Check for file changes - uses: dorny/paths-filter@v2 + - name: Check for file changes by python id: filter - with: - token: ${{ github.token }} - list-files: json - # The following is a single composite pattern that allows next CI steps, - # the pattern is in form of [not (foo or bar)] to be safe. - # When new files come in, the CI will NOT ignore them unless listed, - # so remember to extend here if they do not serve functional purposes. - # NOTE: careful with using ** in expression, keep !**/{old, new things}. - filters: | - cv2: - - "seatunnel-connectors-v2/**" - cv2-e2e: - - "seatunnel-e2e/seatunnel-connector-v2-e2e/**" - api: - - "seatunnel-api/**" - - "seatunnel-common/**" - - "seatunnel-config/**" - - "seatunnel-connectors/**" - - "seatunnel-core/**" - - "seatunnel-e2e/seatunnel-e2e-common/**" - - "seatunnel-formats/**" - - "seatunnel-plugin-discovery/**" - - "seatunnel-transforms-v2/**" - - "seatunnel-translation/**" - - "seatunnel-e2e/seatunnel-transforms-v2-e2e/**" - - "seatunnel-connectors/**" - - "pom.xml" - - "**/workflows/**" - - "**/tools/**" - engine: - - "seatunnel-engine/**" - engine-e2e: - - "seatunnel-e2e/seatunnel-engine-e2e/**" - deleted-poms: - - deleted: "**/pom.xml" + run: | + current_branch='${{ steps.git_init.outputs.branch }}' + pip install GitPython + workspace="${GITHUB_WORKSPACE}" + cv2_files=`python tools/update_modules_check/check_file_updates.py ua $workspace apache/dev origin/$current_branch "seatunnel-connectors-v2/**"` + true_or_false=${cv2_files%%$'\n'*} + file_list=${cv2_files#*$'\n'} + echo "cv2=$true_or_false" >> $GITHUB_OUTPUT + echo "cv2_files=$file_list" >> $GITHUB_OUTPUT + + cv2_e2e_files=`python tools/update_modules_check/check_file_updates.py ua $workspace apache/dev origin/$current_branch "seatunnel-e2e/seatunnel-connector-v2-e2e/**"` + true_or_false=${cv2_e2e_files%%$'\n'*} + file_list=${cv2_e2e_files#*$'\n'} + echo "cv2-e2e=$true_or_false" >> $GITHUB_OUTPUT + echo "cv2-e2e_files=$file_list" >> $GITHUB_OUTPUT + + engine_files=`python tools/update_modules_check/check_file_updates.py ua $workspace apache/dev origin/$current_branch "seatunnel-engine/**"` + true_or_false=${engine_files%%$'\n'*} + file_list=${engine_files#*$'\n'} + echo "engine=$true_or_false" >> $GITHUB_OUTPUT + echo "engine_files=$file_list" >> $GITHUB_OUTPUT + + deleted_poms_files=`python tools/update_modules_check/check_file_updates.py d $workspace apache/dev origin/$current_branch "**/pom.xml"` + true_or_false=${deleted_poms_files%%$'\n'*} + file_list=${deleted_poms_files#*$'\n'} + echo "deleted-poms=$true_or_false" >> $GITHUB_OUTPUT + echo "deleted-poms_files=$file_list" >> $GITHUB_OUTPUT + + engine_e2e_files=`python tools/update_modules_check/check_file_updates.py ua $workspace apache/dev origin/$current_branch "seatunnel-e2e/seatunnel-engine-e2e/**"` + true_or_false=${engine_e2e_files%%$'\n'*} + file_list=${engine_e2e_files#*$'\n'} + echo "engine-e2e=$true_or_false" >> $GITHUB_OUTPUT + echo "engine-e2e_files=$file_list" >> $GITHUB_OUTPUT + + api_files=`python tools/update_modules_check/check_file_updates.py ua $workspace apache/dev origin/$current_branch "seatunnel-api/**" "seatunnel-common/**" "seatunnel-config/**" "seatunnel-connectors/**" "seatunnel-core/**" "seatunnel-e2e/seatunnel-e2e-common/**" "seatunnel-formats/**" "seatunnel-plugin-discovery/**" "seatunnel-transforms-v2/**" "seatunnel-translation/**" "seatunnel-e2e/seatunnel-transforms-v2-e2e/**" "seatunnel-connectors/**" "pom.xml" "**/workflows/**" "tools/**"` + true_or_false=${api_files%%$'\n'*} + file_list=${api_files#*$'\n'} + echo "api=$true_or_false" >> $GITHUB_OUTPUT + echo "api_files=$file_list" >> $GITHUB_OUTPUT - name: Check Connector V2 Update id: cv2-modules @@ -257,7 +270,7 @@ jobs: matrix: java: [ '8', '11' ] os: [ 'ubuntu-latest', 'windows-latest' ] - timeout-minutes: 36 + timeout-minutes: 60 steps: - uses: actions/checkout@v2 - name: Set up JDK ${{ matrix.java }} @@ -902,4 +915,4 @@ jobs: run: | ./mvnw -B -T 1C verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-rocketmq-e2e -am -Pci env: - MAVEN_OPTS: -Xmx4096m + MAVEN_OPTS: -Xmx4096m \ No newline at end of file diff --git a/.gitignore b/.gitignore index 74311a0fa05..80660071d40 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,5 @@ spark-warehouse *.flattened-pom.xml seatunnel-examples -/lib/* \ No newline at end of file +/lib/* +version.properties \ No newline at end of file diff --git a/tools/update_modules_check/check_file_updates.py b/tools/update_modules_check/check_file_updates.py new file mode 100644 index 00000000000..5e40ddb6937 --- /dev/null +++ b/tools/update_modules_check/check_file_updates.py @@ -0,0 +1,99 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !/usr/bin/python +import argparse +import git +import json +import glob + +def get_changed_files_between_branches(repo_path1, branch1, branch2, directorys): + repo1 = git.Repo(repo_path1) + + commit1 = repo1.commit(branch1) + commit2 = repo1.commit(branch2) + + diff = commit1.diff(commit2, create_patch=True) + + changed_files = [] + + for file_diff in diff: + for directory in directorys: + if file_diff.a_path != file_diff.b_path: + if file_diff.b_path is not None and glob.fnmatch.fnmatch(file_diff.b_path, directory): + changed_files.append(file_diff.b_path) + + if file_diff.a_path is not None and glob.fnmatch.fnmatch(file_diff.a_path, directory): + changed_files.append(file_diff.a_path) + else: + if glob.fnmatch.fnmatch(file_diff.b_path, directory): + changed_files.append(file_diff.b_path) + + return changed_files + +def get_deleted_files_between_branches(repo_path, branch1, branch2, directorys): + deleted_files = [] + + repo1 = git.Repo(repo_path) + + commit1 = repo1.commit(branch1) + commit2 = repo1.commit(branch2) + + diff = commit1.diff(commit2, create_patch=True) + + for file_diff in diff: + for directory in directorys: + if file_diff.a_path is not None and file_diff.b_path is None and glob.fnmatch.fnmatch(file_diff.a_path, directory): + deleted_files.append(file_diff.a_path) + + return deleted_files + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Compare changes in a specified directory between two branches in different repositories.") + parser.add_argument("type", help="ua will return update and add files, d will return delete files") + parser.add_argument("repo_path", help="Path to the first local Git repository") + parser.add_argument("branch1", help="Name of the first branch to compare") + parser.add_argument("branch2", help="Name of the second branch to compare") + parser.add_argument("directorys", nargs="+", help="Directory to compare") + + args = parser.parse_args() + + repo = git.Repo(args.repo_path) + + ref1 = repo.refs[args.branch1] + ref2 = repo.refs[args.branch2] + + common_ancestor = repo.merge_base(ref1, ref2)[0].hexsha + + if args.type == 'ua': + changed_files = get_changed_files_between_branches(args.repo_path, common_ancestor, args.branch2, args.directorys) + if changed_files: + print('true') + result = json.dumps(changed_files, indent=None) + print(result) + else: + print('false') + result = json.dumps([], indent=None) + print(result) + else: + delete_files = get_deleted_files_between_branches(args.repo_path, common_ancestor, args.branch2, args.directorys) + if delete_files: + print('true') + result = json.dumps(delete_files, indent=None) + print(result) + else: + print('false') + result = json.dumps([], indent=None) + print(result) \ No newline at end of file