## Import

In [1]:
from crimson.filter_beta.filter import re_filter
from crimson.filter_beta.printer import print_json
import pytest
import re


## Default Includes All 


In [2]:
texts = [
    "Hello world",
    "Python is great",
    "OpenAI GPT",
    "Machine learning",
    "Deep learning with Python",
    "Please, open the Python interpreter"
]


all_included = re_filter(texts)

print_json(all_included)

[
  "Hello world",
  "Python is great",
  "OpenAI GPT",
  "Machine learning",
  "Deep learning with Python",
  "Please, open the Python interpreter"
]


## Excludes only with default flag

In [3]:
texts = [
    "Hello world",
    "Python is great",
    "OpenAI GPT",
    "Machine learning",
    "Deep learning with Python",
    "Please, open the Python interpreter"
]

exclude = [r'Open']
all_included = re_filter(texts, exclude=exclude)

print_json(all_included)

[
  "Hello world",
  "Python is great",
  "Machine learning",
  "Deep learning with Python"
]


## Ignore Case

In [4]:
texts = [
    "Hello world",
    "Python is great",
    "OpenAI GPT",
    "Machine learning",
    "Deep learning with Python",
    "Please, open the Python interpreter"
]

include = [r'Python', r'learning']
exclude = [r'Open']

case_ignored = re_filter(texts, include, exclude, flags=[re.IGNORECASE])
case_strict =  re_filter(texts, include, exclude, flags=[])

result = {
	"case_ignored": case_ignored,
	"case_strict": case_strict,
}

print_json(result)
	

{
  "case_ignored": [
    "Python is great",
    "Machine learning",
    "Deep learning with Python"
  ],
  "case_strict": [
    "Python is great",
    "Machine learning",
    "Deep learning with Python",
    "Please, open the Python interpreter"
  ]
}


In [5]:
# test

agreed = {
  "case_ignored": [
    "Python is great",
    "Machine learning",
    "Deep learning with Python"
  ],
  "case_strict": [
    "Python is great",
    "Machine learning",
    "Deep learning with Python",
    "Please, open the Python interpreter"
  ]
}
assert result == agreed

## Multiline

In [6]:
texts = [
    "  Python at start with indent\nMiddle line\nPython at end",
    "Python at start without indent\nMiddle line\nPython at end",
    "Not at start\nPython in middle\nNot at end",
    "python lowercase\nPYTHON UPPERCASE",
    "No match here\nNeither here",
    "^Python with caret\n$Python with dollar$",
    "Multiple\nPython\nlines\nwithout empty space, ends with Python\n",
    "Multiple\nPython\nlines\nwith empty space, ends with Python   \n"
]

include = [r'^Python', r'Python$']  # Python으로 시작하거나 끝나는 줄 찾기
exclude = [r'PYTHON']  # 대문자 PYTHON 제외


multiline_on = re_filter(texts, include, exclude, flags=[re.MULTILINE])
multiline_off = re_filter(texts, include, exclude, flags=[])

result = {
    "multiline_on": multiline_on,
    "multiline_off": multiline_off,
}

print_json(result)

{
  "multiline_on": [
    "  Python at start with indent\nMiddle line\nPython at end",
    "Python at start without indent\nMiddle line\nPython at end",
    "Not at start\nPython in middle\nNot at end",
    "Multiple\nPython\nlines\nwithout empty space, ends with Python\n",
    "Multiple\nPython\nlines\nwith empty space, ends with Python   \n"
  ],
  "multiline_off": [
    "Python at start without indent\nMiddle line\nPython at end",
    "Multiple\nPython\nlines\nwithout empty space, ends with Python\n"
  ]
}


In [7]:
# test

agreed = {
  "multiline_on": [
    "  Python at start with indent\nMiddle line\nPython at end",
    "Python at start without indent\nMiddle line\nPython at end",
    "Not at start\nPython in middle\nNot at end",
    "Multiple\nPython\nlines\nwithout empty space, ends with Python\n",
    "Multiple\nPython\nlines\nwith empty space, ends with Python   \n"
  ],
  "multiline_off": [
    "Python at start without indent\nMiddle line\nPython at end",
    "Multiple\nPython\nlines\nwithout empty space, ends with Python\n"
  ]
}

assert result == agreed

## Paths

In [8]:
paths = [
    "/home/user/documents/file1.txt",
    "/home/user/images/image.jpg",
    "/home/user/projects/project_01/README.md",
    "/home/user/example.py",
    "/home/user/.hidden_file",
]

include = [r".*\.py$", r"/projects/.*"]
exclude = [r"^/home/user/\..*"]  # exclude folder starts with .

filtered_paths = re_filter(paths, include, exclude)
print(filtered_paths)

['/home/user/projects/project_01/README.md', '/home/user/example.py']


In [9]:
# test

agreed = ['/home/user/projects/project_01/README.md', '/home/user/example.py']

assert filtered_paths == agreed

## Email


In [10]:
texts = [
    "user@example.com",
    "invalid.email@com",
    "another_user@sub.domain.com",
    "not_an_email",
    "user@example.co.uk"
]

include = [r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"]
exclude = [r"@example\.com$"]

filtered = re_filter(texts, include, exclude)

filtered

['another_user@sub.domain.com', 'user@example.co.uk']

In [11]:
# test

agreed = ['another_user@sub.domain.com', 'user@example.co.uk']

assert filtered == agreed

# Error cell

with pytest.raises(Exception):
	re_filter(["test"], ["["], [])  # 잘못된 정규표현식