Skip to content

Commit

Permalink
Merge pull request #27 from dimitrismistriotis/update-library-to-1dot…
Browse files Browse the repository at this point in the history
…3dot1

Update library to 1dot3dot1
  • Loading branch information
dimitrismistriotis committed Oct 13, 2023
2 parents d3b60cf + 8a5c942 commit 47e13b7
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 13 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## Version 1.3.1

* scikit-learn to 1.3.1
* pandas to 2.1.1
* numpy to 1.26.0
* scipy to 1.11.3

## Version 1.3.0

* scikit-learn to 1.3.0
Expand Down
Binary file modified profanity_check/data/model.joblib
Binary file not shown.
27 changes: 21 additions & 6 deletions profanity_check/data/train_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Train Model from data"""
import hashlib
import subprocess
import sys
from pathlib import Path

import pandas as pd
Expand All @@ -18,8 +19,8 @@ def sha256sum(filename) -> str:
Helper method from StackOverflow: https://stackoverflow.com/a/44873382
"""
with open(filename, "rb", buffering=0) as f:
return hashlib.file_digest(f, "sha512").hexdigest()
with open(filename, "rb", buffering=0) as file_to_check:
return hashlib.file_digest(file_to_check, "sha512").hexdigest()


if __name__ == "__main__":
Expand All @@ -30,19 +31,33 @@ def sha256sum(filename) -> str:
data_file = Path(__file__).parent / DATA_FILE_NAME
if not data_file.exists():
print(f"Could not find {DATA_FILE_NAME}, will try to extract.")
subprocess.run(["./decompress_data"])
#
# Note: check=False as per pylint's recommendation, in detail
# > The ``check`` keyword is set to False by default. It means the process
# > launched by ``subprocess.run`` can exit with a non-zero exit code and
# > fail silently. It's better to set it explicitly to make clear what the
# > error-handling behavior is.
#
# A check raising an exception should be added at some point
#
decompression = subprocess.run(["./decompress_data"], check=False)

hash_sha512 = sha256sum(data_file)
# Check if decompression without errors
if decompression.returncode != 0:
print("Error in decompressing the data.")
sys.exit(1)

hash_sha256 = sha256sum(data_file)
hash_file = Path(__file__).parent / HASH_OF_DATA_FILE
stored_hash = hash_file.read_text().strip().split(" ")[0]
print(stored_hash)

print()
print(f"SHA512 hash of {DATA_FILE_NAME}: {hash_sha512}")
print(f"SHA256 hash of {DATA_FILE_NAME}: {hash_sha256}")
print(f"Stored hash to check against: {stored_hash}")
print()

assert hash_sha512 == stored_hash, (
assert hash_sha256 == stored_hash, (
f"Hash of {DATA_FILE_NAME} does not match stored hash. "
"Please download the data again."
)
Expand Down
Binary file modified profanity_check/data/vectorizer.joblib
Binary file not shown.
12 changes: 7 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
joblib==1.3.1
numpy==1.24.4
joblib==1.3.2
numpy==1.24.4; python_version <= '3.8'
numpy==1.26.0; python_version >= '3.9'
# Used for the training script:
pandas==2.0.3
scikit-learn==1.3.0
scipy==1.11.1; python_version >= '3.10'
pandas==2.0.3; python_version < '3.8'
pandas==2.1.1; python_version >= '3.9'
scikit-learn==1.3.1
scipy==1.11.3; python_version >= '3.10'
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setuptools.setup(
name="alt-profanity-check",
version="1.3.0",
version="1.3.1",
author="Victor Zhou (original author), Menelaos Kotoglou, Dimitrios Mistriotis",
author_email="dimitrios@mistriotis.com",
description=(
Expand All @@ -17,7 +17,7 @@
long_description_content_type="text/markdown",
url="https://github.com/dimitrismistriotis/alt-profanity-check",
packages=setuptools.find_packages(),
install_requires=["scikit-learn==1.3.0", "joblib>=1.3.1"],
install_requires=["scikit-learn==1.3.1", "joblib>=1.3.1"],
python_requires=">=3.8",
package_data={"profanity_check": ["data/model.joblib", "data/vectorizer.joblib"]},
classifiers=[
Expand Down

0 comments on commit 47e13b7

Please sign in to comment.