Skip to content

Commit

Permalink
Merge pull request #13 from jaspersiebring/regex_fix
Browse files Browse the repository at this point in the history
Regex fix (named and optional groups)
  • Loading branch information
jaspersiebring committed Dec 19, 2023
2 parents d1bf06f + 96e4347 commit 982d499
Show file tree
Hide file tree
Showing 8 changed files with 460 additions and 722 deletions.
25 changes: 14 additions & 11 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,32 @@ jobs:
id: setup-python
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: 3.9
cache: poetry

- name: Install dependencies
run: |
sudo apt update
sudo apt install -y \
libgstreamer-plugins-base1.0-0 \
dpkg-dev \
build-essential \
libjpeg-dev \
libtiff-dev \
libsdl1.2-dev \
libnotify-dev \
freeglut3-dev \
libsm-dev \
libgtk-3-dev \
libwebkit2gtk-4.0-dev \
libxtst-dev \
libgl1-mesa-dev \
libglu1-mesa-dev \
libgstreamer-plugins-base1.0-dev \
libgtk-3-dev \
libjpeg-dev \
libnotify-dev \
libpng-dev \
libsdl2-dev \
libsm-dev \
libunwind-dev \
libtiff-dev \
libwebkit2gtk-4.0-dev \
libxtst-dev \
libgtk2.0-dev
if: matrix.os == 'ubuntu-latest'
- name: Installing Poetry environment
Expand All @@ -58,17 +61,17 @@ jobs:
- name: Running mypy
id: mypy
run: poetry run mypy libretro_finder/ config/ tests/
- name: Running pylint
id: pylint
run: poetry run pylint libretro_finder/ config/ tests/ --fail-under=8
- name: Running ruff
id: ruff
run: poetry run ruff check libretro_finder/ config/ tests/
- name: Checking code coverage
id: coverage
run: poetry run pytest --cov=config --cov=libretro_finder --cov-fail-under=75

- name: Build source and .whl archives with Poetry
id: build
run: poetry build
if: steps.pytest.outcome == 'success' && steps.mypy.outcome == 'success' && steps.pylint.outcome == 'success' && steps.coverage.outcome == 'success' && github.event_name == 'push'
if: steps.pytest.outcome == 'success' && steps.mypy.outcome == 'success' && steps.ruff.outcome == 'success' && steps.coverage.outcome == 'success' && github.event_name == 'push'

- name: Authorize GitHub Actions to publish on PYPI
run: poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
Expand Down
38 changes: 38 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
FROM python:3.9.18-bookworm

# wxPython dependencies
RUN apt-get update && apt install -y \
libgstreamer-plugins-base1.0-0 \
dpkg-dev \
build-essential \
libjpeg-dev \
libtiff-dev \
libsdl1.2-dev \
libnotify-dev \
freeglut3-dev \
libsm-dev \
libgtk-3-dev \
libwebkit2gtk-4.0-dev \
libxtst-dev \
libgl1-mesa-dev \
libglu1-mesa-dev \
libgstreamer-plugins-base1.0-dev \
libnotify-dev \
libpng-dev \
libsdl2-dev \
libunwind-dev \
libgtk2.0-dev

# Copying libretro_finder, installing poetry and adding it to PATH
WORKDIR /app
COPY . /app
ENV POETRY_HOME=/opt/poetry
RUN python3 -m venv $POETRY_HOME && $POETRY_HOME/bin/pip install poetry
ENV PATH="/opt/poetry/bin:${PATH}"

# Installing libretro_finder's deps
RUN poetry config installer.max-workers 10
RUN poetry install

# Default start
CMD ["/bin/bash"]
30 changes: 11 additions & 19 deletions config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,25 @@
print("Done.")

# Parsing Libretro's system.dat and formatting as pandas dataframe
index = 0 # pylint: disable=invalid-name

system_series = []
with open(FILE_PATH, "r", encoding="utf-8") as file:
for line in file:
line = line.strip()
if line.startswith("comment"):
current_system = line.split('"')[1]
elif line.startswith("rom"):
match = re.search(
r"name (\S+)(?: size (\S+))?(?: crc (\S+))?(?: md5 (\S+))?(?: sha1 (\S+))?",
line,
)
if match:
data = {
"system": current_system,
"name": match.group(1).replace('"', "").replace("'", ""),
"size": match.group(2) if match.group(2) else None,
"crc": match.group(3) if match.group(3) else None,
"md5": match.group(4) if match.group(4) else None,
"sha1": match.group(5) if match.group(5) else None,
}
system_series.append(pd.DataFrame(data, index=[index]))
index += 1
continue

regex_string = r'\brom.+name\s+(?P<name>"[^"]+"|\S+)(?:(?:(?:\s+size\s+(?P<size>\S+))|(?:\s+crc\s+(?P<crc>\S+))|(?:\s+md5\s+(?P<md5>\S+))|(?:\s+sha1\s+(?P<sha1>\S+)))(?=\s|$))*'
match = re.search(regex_string, line)

if match:
data = match.groupdict()
data["system"] = current_system
data["name"] = data["name"].replace('"', "")
system_series.append(data)

# join dfs and drop features without checksums
SYSTEMS = pd.concat(system_series)
SYSTEMS = pd.DataFrame(system_series)
SYSTEMS = SYSTEMS[~SYSTEMS["md5"].isnull()].reset_index(drop=True)

# path to retroarch/system (if found)
Expand Down
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ name: bscraper
channels:
- conda-forge
dependencies:
- python=3.8
- poetry>=1.5
- python=3.9
- poetry

0 comments on commit 982d499

Please sign in to comment.