|
| 1 | +""" |
| 2 | +Validate that CI and source and docs are in sync |
| 3 | +""" |
| 4 | +import re |
| 5 | +import os |
| 6 | +import pathlib |
| 7 | +import unittest |
| 8 | +import platform |
| 9 | +import itertools |
| 10 | +from typing import List, Optional, Union, Callable |
| 11 | + |
| 12 | +from dffml.plugins import PACKAGE_DIRECTORY_TO_NAME |
| 13 | + |
| 14 | + |
| 15 | +class IgnoreFile: |
| 16 | + """ |
| 17 | + Checks if files should be ignored by reading ignore files such as .gitignore |
| 18 | + and .dockerignore and parsing their rules. |
| 19 | +
|
| 20 | + Examples |
| 21 | + -------- |
| 22 | +
|
| 23 | + >>> import pathlib |
| 24 | + >>> from dffml import IgnoreFile |
| 25 | + >>> |
| 26 | + >>> root = pathlib.Path(".") |
| 27 | + >>> root.joinpath(".gitignore").write_text("subdir/**") |
| 28 | + >>> root.joinpath("subdir", ".gitignore").mkdir() |
| 29 | + >>> root.joinpath("subdir", ".gitignore").write_text("!sub2/**") |
| 30 | + >>> |
| 31 | + >>> ignorefile = IgnoreFile() |
| 32 | + >>> print(ignorefile("subdir/sub2/feedface")) |
| 33 | + False |
| 34 | + >>> print(ignorefile("subdir/other")) |
| 35 | + True |
| 36 | + """ |
| 37 | + |
| 38 | + def __init__( |
| 39 | + self, root: pathlib.Path, ignore_files: List[str] = [".gitignore"] |
| 40 | + ): |
| 41 | + self.root = root |
| 42 | + self.ignore_files = ignore_files |
| 43 | + self.compiled_regexes = {} |
| 44 | + |
| 45 | + @staticmethod |
| 46 | + def path_to_lines(path: pathlib.Path): |
| 47 | + return list( |
| 48 | + filter(bool, path.read_text().replace("\r\n", "\n").split("\n")) |
| 49 | + ) |
| 50 | + |
| 51 | + @staticmethod |
| 52 | + def compile_regexes( |
| 53 | + contents: List[str], |
| 54 | + ) -> List[Callable[[str], Union[None, bool, re.Match]]]: |
| 55 | + for line in contents: |
| 56 | + # Handle the case where we do not want to ignore files matching this |
| 57 | + # pattern |
| 58 | + do_not_ignore = False |
| 59 | + if line.startswith("!"): |
| 60 | + line = line[1:] |
| 61 | + do_not_ignore = True |
| 62 | + # Substitute periods for literal periods |
| 63 | + line = line.replace(".", r"\.") |
| 64 | + # Substitute * for regex version of *, which is .* |
| 65 | + line = line.replace("*", r".*") |
| 66 | + # Compile the regex |
| 67 | + yield do_not_ignore, re.compile(line) |
| 68 | + |
| 69 | + def __call__(self, filename: str) -> bool: |
| 70 | + # Get the absolute file path |
| 71 | + filepath = pathlib.Path(filename).absolute() |
| 72 | + # Read any ignore files and compile their regexes from the file path up |
| 73 | + # to the root of the repo |
| 74 | + for ignore_filename in self.ignore_files: |
| 75 | + for directory in list(filepath.parents)[ |
| 76 | + : filepath.parents.index(self.root) + 1 |
| 77 | + ]: |
| 78 | + ignore_path = directory / ignore_filename |
| 79 | + if ( |
| 80 | + directory not in self.compiled_regexes |
| 81 | + and ignore_path.is_file() |
| 82 | + ): |
| 83 | + self.compiled_regexes[directory] = list( |
| 84 | + self.compile_regexes(self.path_to_lines(ignore_path)) |
| 85 | + ) |
| 86 | + # Get all applicable regexes by looking through dict of compiled regexes |
| 87 | + # and grabbing any that are in the files parents |
| 88 | + directories = [] |
| 89 | + for directory in self.compiled_regexes.keys(): |
| 90 | + if directory.resolve() in filepath.parents: |
| 91 | + directories.append(directory) |
| 92 | + # Check if any match |
| 93 | + ignore = False |
| 94 | + for directory in directories: |
| 95 | + for do_not_ignore, regex in self.compiled_regexes[directory]: |
| 96 | + if not do_not_ignore and regex.match( |
| 97 | + str(filepath.relative_to(directory)).replace(os.sep, "/") |
| 98 | + ): |
| 99 | + ignore = True |
| 100 | + # Check if any are supposed to not be ignored even though they match |
| 101 | + # other patterns |
| 102 | + for directory in directories: |
| 103 | + for do_not_ignore, regex in self.compiled_regexes[directory]: |
| 104 | + if ( |
| 105 | + do_not_ignore |
| 106 | + and ignore |
| 107 | + and regex.match( |
| 108 | + str(filepath.relative_to(directory)).replace( |
| 109 | + os.sep, "/" |
| 110 | + ) |
| 111 | + ) |
| 112 | + ): |
| 113 | + ignore = False |
| 114 | + return ignore |
| 115 | + |
| 116 | + |
| 117 | +class TestGitIgnore(unittest.TestCase): |
| 118 | + def test_ignore(self): |
| 119 | + ignorefile = IgnoreFile(root=pathlib.Path(__file__).parents[1]) |
| 120 | + |
| 121 | + self.assertFalse(ignorefile("setup.py")) |
| 122 | + self.assertFalse(ignorefile("dffml/skel/common/setup.py")) |
| 123 | + self.assertTrue(ignorefile("dffml/skel/model/setup.py")) |
| 124 | + self.assertTrue( |
| 125 | + ignorefile( |
| 126 | + "examples/shouldi/tests/downloads/cri-resource-manager-download/.gopath/pkg/mod/github.com/apache/thrift@v0.12.0/contrib/fb303/py/setup.py" |
| 127 | + ) |
| 128 | + ) |
| 129 | + |
| 130 | + |
| 131 | +REPO_ROOT = pathlib.Path(__file__).parents[1] |
| 132 | + |
| 133 | + |
| 134 | +@unittest.skipUnless(platform.system() == "Linux", "Only runs on Linux") |
| 135 | +class TestCI(unittest.TestCase): |
| 136 | + maxDiff = None |
| 137 | + SKIP_SETUP_PY_FILES = [ |
| 138 | + REPO_ROOT / "setup.py", |
| 139 | + REPO_ROOT / "dffml" / "skel" / "common" / "setup.py", |
| 140 | + REPO_ROOT / "examples" / "source" / "setup.py", |
| 141 | + REPO_ROOT |
| 142 | + / "examples" |
| 143 | + / "tutorials" |
| 144 | + / "sources" |
| 145 | + / "file" |
| 146 | + / "dffml-source-ini" |
| 147 | + / "setup.py", |
| 148 | + ] |
| 149 | + |
| 150 | + def test_all_plugins_appear_in_dffml_plugins(self): |
| 151 | + """ |
| 152 | + Make sure that any setup.py files associated with a plugin appear in |
| 153 | + dffml/plugins.py |
| 154 | + """ |
| 155 | + ignorefile = IgnoreFile(REPO_ROOT) |
| 156 | + # A list of directory tupples, relative to the root of the repo, which |
| 157 | + # contain setup.py files. Directories who have setup.py files listed in |
| 158 | + # SKIP_SETUP_PY_FILES will not be in this list |
| 159 | + setup_py_directories = sorted( |
| 160 | + map( |
| 161 | + lambda path: path.parent.relative_to(REPO_ROOT).parts, |
| 162 | + filter( |
| 163 | + lambda path: path not in self.SKIP_SETUP_PY_FILES, |
| 164 | + itertools.filterfalse( |
| 165 | + ignorefile, REPO_ROOT.rglob("setup.py") |
| 166 | + ), |
| 167 | + ), |
| 168 | + ) |
| 169 | + ) |
| 170 | + self.assertListEqual( |
| 171 | + setup_py_directories, sorted(PACKAGE_DIRECTORY_TO_NAME.keys()) |
| 172 | + ) |
| 173 | + |
| 174 | + def test_all_plugins_being_tested(self): |
| 175 | + """ |
| 176 | + Make sure that plugins are included in the test matrix and therefore |
| 177 | + being tested by the CI. |
| 178 | + """ |
| 179 | + # We compare against PACKAGE_DIRECTORY_TO_NAME as the truth because the |
| 180 | + # test_all_plugins_appear_in_dffml_plugins() validates that every |
| 181 | + # directory that has a setup.py appears in PACKAGE_DIRECTORY_TO_NAME. |
| 182 | + should_be = sorted( |
| 183 | + list( |
| 184 | + map( |
| 185 | + lambda directories: "/".join(directories), |
| 186 | + PACKAGE_DIRECTORY_TO_NAME.keys(), |
| 187 | + ) |
| 188 | + ) |
| 189 | + + ["."] |
| 190 | + ) |
| 191 | + # Load the ci testing workflow avoid requiring the yaml module as that |
| 192 | + # has C dependencies. |
| 193 | + # We read the file, split it by lines., filter by lines mentioning PyPi |
| 194 | + lines = ( |
| 195 | + pathlib.Path(REPO_ROOT, ".github", "workflows", "testing.yml",) |
| 196 | + .read_text() |
| 197 | + .split("\n") |
| 198 | + ) |
| 199 | + # filter by lines mentioning PyPi |
| 200 | + # tokens, and make a list of tuples which contain the left hand side of |
| 201 | + # the lines '=', split on the '/' character. |
| 202 | + # We skip the line which the default TWINE_PASSWORD environment |
| 203 | + # variable, since that's for the main package (not any of the plugins). |
| 204 | + plugins_tested_by_ci = [] |
| 205 | + # Once we see plugins: we start adding the subsequent list of plugins to |
| 206 | + # our list of plugins tested by CI. |
| 207 | + start_adding_plugins = 0 |
| 208 | + # Go over each line in the YAML file |
| 209 | + for line in lines: |
| 210 | + if line.strip() == "plugin:": |
| 211 | + # Start adding when we see the list of plugins |
| 212 | + start_adding_plugins += 1 |
| 213 | + elif start_adding_plugins and ":" in line: |
| 214 | + # If we've reached the next YAML object key we're done adding to |
| 215 | + # the list of plugins |
| 216 | + break |
| 217 | + elif start_adding_plugins: |
| 218 | + # Add plugins to list of plugins being tested |
| 219 | + # Line is in the format of: "- plugin/path" |
| 220 | + plugins_tested_by_ci.append(line.strip().split()[-1]) |
| 221 | + # Make sure there was only one list |
| 222 | + self.assertTrue(plugins_tested_by_ci, "No plugins found!") |
| 223 | + self.assertEqual( |
| 224 | + start_adding_plugins, 1, "More than one list of plugins found!" |
| 225 | + ) |
| 226 | + # Sort them |
| 227 | + plugins_tested_by_ci = sorted(plugins_tested_by_ci) |
| 228 | + # Compare to truth |
| 229 | + self.assertListEqual(should_be, plugins_tested_by_ci) |
| 230 | + |
| 231 | + def test_all_plugins_have_pypi_tokens(self): |
| 232 | + """ |
| 233 | + Make sure every plugin is listed with a PyPi API token to enable |
| 234 | + automatic releases. |
| 235 | + """ |
| 236 | + # Load the ci testing workflow avoid requiring the yaml module as that |
| 237 | + # has C dependencies. |
| 238 | + # We read the file, split it by lines, filter by lines mentioning PyPi |
| 239 | + # tokens, and make a list of tuples which contain the left hand side of |
| 240 | + # the lines '=', split on the '/' character. |
| 241 | + # We skip the line which the default TWINE_PASSWORD environment |
| 242 | + # variable, since that's for the main package (not any of the plugins). |
| 243 | + # Example: |
| 244 | + # model/vowpalWabbit=${{ secrets.PYPI_MODEL_VOWPALWABBIT }} |
| 245 | + # This line results in a list entry of: ('model', 'vowpalWabbit') |
| 246 | + plugins_with_pypi_tokens = sorted( |
| 247 | + map( |
| 248 | + lambda i: tuple(i.strip().split("=")[0].split("/")), |
| 249 | + filter( |
| 250 | + lambda line: "secrets.PYPI_" in line |
| 251 | + and not "TWINE_PASSWORD" in line, |
| 252 | + pathlib.Path( |
| 253 | + REPO_ROOT, ".github", "workflows", "testing.yml" |
| 254 | + ) |
| 255 | + .read_text() |
| 256 | + .split("\n"), |
| 257 | + ), |
| 258 | + ) |
| 259 | + ) |
| 260 | + # We compare list list to the list of packages dffml.plugins knows |
| 261 | + # about, to make sure that every package has a secret so it can be |
| 262 | + # auto-deployed to PyPi. |
| 263 | + self.assertListEqual( |
| 264 | + plugins_with_pypi_tokens, sorted(PACKAGE_DIRECTORY_TO_NAME.keys()) |
| 265 | + ) |
0 commit comments