Skip to content

Commit

Permalink
Loggerの導入
Browse files Browse the repository at this point in the history
pre-commitの設定をpyproject.tomlに移動
  • Loading branch information
johtani committed Jul 7, 2023
1 parent f88bfc6 commit fddd5f4
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 11 deletions.
4 changes: 1 addition & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@ repos:
rev: 23.3.0
hooks:
- id: black
args: ["--line-length", "120", "."]
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
args: ["--max-line-length", "120"]
additional_dependencies: [flake8-pyproject]
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args: ["--profile", "black", "--filter-files", "--multi-line", "3"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.4.1
hooks:
Expand Down
182 changes: 181 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,26 @@ jupyterlab = "^4.0.2"
[tool.poetry.group.dev.dependencies]
ipywidgets = "^8.0.6"
pre-commit = "^3.3.3"
black = "^23.3.0"
flake8 = "^6.0.0"
isort = "^5.12.0"
mypy = "^1.4.1"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]

[tool.black]
line-length = 120

[tool.isort]
profile = "black"
filter_files = true
multi_line_output = 3

[tool.flake8]
max_line_length = 120

[tool.mypy]
19 changes: 12 additions & 7 deletions tools/extract-products.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,36 @@
import logging
import pathlib

import pandas as pd

logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p")
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.INFO)


def main():
print("Starting to create JSONL file from esci-data products...")
LOGGER.info("Starting to create JSONL file from esci-data products...")
esci_path = pathlib.Path("./esci-data/shopping_queries_dataset")
print(" Reading parquet file...")
LOGGER.info(" Reading parquet file...")
df_products = pd.read_parquet(esci_path.joinpath("shopping_queries_dataset_products.parquet"))

print(" Making output path...")
LOGGER.info(" Making output path...")
output_path = pathlib.Path("./esci-raw-jsonl/products")
output_path.mkdir(exist_ok=True, parents=True)

print(" Extracting products data that product_locale is 'jp'...")
LOGGER.info(" Extracting products data that product_locale is 'jp'...")
df_products[df_products["product_locale"] == "jp"].to_json(
output_path.joinpath("esci-data-products-jp.json"), orient="records", lines=True
)
print(" Extracting products data that product_locale is 'us'...")
LOGGER.info(" Extracting products data that product_locale is 'us'...")
df_products[df_products["product_locale"] == "us"].to_json(
output_path.joinpath("esci-data-products-us.json"), orient="records", lines=True
)
print(" Extracting products data that product_locale is 'es'...")
LOGGER.info(" Extracting products data that product_locale is 'es'...")
df_products[df_products["product_locale"] == "es"].to_json(
output_path.joinpath("esci-data-products-es.json"), orient="records", lines=True
)
print("Finish extract-products")
LOGGER.info("Finish extract-products")


if __name__ == "__main__":
Expand Down

0 comments on commit fddd5f4

Please sign in to comment.