Skip to content

Commit

Permalink
Merge pull request #29 from erlichsefi/DEV
Browse files Browse the repository at this point in the history
  • Loading branch information
erlichsefi committed Oct 17, 2023
2 parents 5ebb102 + 5c2ef1e commit e4cfe30
Show file tree
Hide file tree
Showing 13 changed files with 150 additions and 109 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/test-suite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Unit & Integration Tests
env:
DISABLED_SCRAPPERS: NETIV_HASED

on:
push:
Expand Down Expand Up @@ -39,4 +41,4 @@ jobs:
- name: Build with Docker
run: docker build -t erlichsefi/israeli-supermarket-scarpers:test --target test .
- name: Test with pytest
run: docker run --rm --name test-run erlichsefi/israeli-supermarket-scarpers:test
run: docker run --rm --name test-run -e DISABLED_SCRAPPERS="${{ env.DISABLED_SCRAPPERS }}" erlichsefi/israeli-supermarket-scarpers:test
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Israel Supermarket Scraper: Clients to download the data published by the supermarkets.
=======================================
This is a scraper for ALL the supermarket chains listed in the GOV.IL site.

שקיפות מחירים (השוואת מחירים) - https://www.gov.il/he/departments/legalInfo/cpfta_prices_regulations


Expand All @@ -15,8 +16,12 @@ This is a scraper for ALL the supermarket chains listed in the GOV.IL site.

----
Scheduled Automatic Testing:
The test-suite is scheduled to run every week, so you can see if the supermarket chains has chanced something in their interface and the package will not work probably, status: [![Scheduled Tests](https://github.com/erlichsefi/israeli-supermarket-scarpers/actions/workflows/test-suite.yml/badge.svg?event=schedule)](https://github.com/erlichsefi/israeli-supermarket-scarpers/actions/workflows/test-suite.yml)
The test-suite is scheduled to run every three days, so you can see if the supermarket chains has chanced something in their interface and the package will not work probably. however, the tests running here are also integration tests with the APIs which may fail.

Status: [![Scheduled Tests](https://github.com/erlichsefi/israeli-supermarket-scarpers/actions/workflows/test-suite.yml/badge.svg?event=schedule)](https://github.com/erlichsefi/israeli-supermarket-scarpers/actions/workflows/test-suite.yml)

Notice:
- NETIV_HASED is disabled in testing becuase it started failing at Oct' 2023.


--------
Expand All @@ -37,7 +42,7 @@ If you think you've found a bug:
What is il_supermarket_scarper?
-------------

There are alot of projects in github tring to scrape the supermarket data, must of them are not stable or wasn't updated of a while, it's about time there will be one codebase that those the work completely.
There are alot of projects in github trying to scrape the supermarket data, most of them are not stable or wasn't updated for a while, it's about time there will be one codebase that does the work completely.

You only need to run the following code to get all the data currently shared by the supermarkets.

Expand All @@ -46,7 +51,6 @@ from il_supermarket_scarper import MainScrapperRunner

scraper = MainScrapperRunner()
scraper.run()

```


Expand All @@ -66,7 +70,7 @@ repo directly:
python3 -m pip install -U git+https://github.com/erlichsefi/israeli-supermarket-scarpers.git
# or if you don't have 'git' installed
python3 -m pip install -U https://github.com/erlichsefi/israeli-supermarket-scarpers/master
#



Running Docker
Expand Down
4 changes: 3 additions & 1 deletion il_supermarket_scarper/engines/cerberus.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ def persist_from_ftp(self, file_name):
downloaded = True

if ext == ".gz":
Logger.info(f"File size is {os.path.getsize(temporary_gz_file_path)} bytes.")
Logger.info(
f"File size is {os.path.getsize(temporary_gz_file_path)} bytes."
)
extract_xml_file_from_gz_file(temporary_gz_file_path)

Logger.info(f"Done persisting file {file_name}")
Expand Down
4 changes: 3 additions & 1 deletion il_supermarket_scarper/engines/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,9 @@ def _save_and_extract(self, file_link, file_save_path):
downloaded = True

if file_save_path_with_ext.endswith("gz"):
Logger.info(f"File size is {os.path.getsize(file_save_path_with_ext)} bytes.")
Logger.info(
f"File size is {os.path.getsize(file_save_path_with_ext)} bytes."
)
extract_xml_file_from_gz_file(file_save_path_with_ext)

os.remove(file_save_path_with_ext)
Expand Down
2 changes: 1 addition & 1 deletion il_supermarket_scarper/engines/multipage_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def collect_files_details_from_site(
files_types=files_types,
store_id=store_id,
only_latest=only_latest,
files_names_to_scrape=files_names_to_scrape
files_names_to_scrape=files_names_to_scrape,
)

return download_urls, file_names
Expand Down
2 changes: 1 addition & 1 deletion il_supermarket_scarper/scrappers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .machsani_ashuk import MahsaniAShuk
from .mega_market import MegaMarket
from .mega import Mega
from .nativ_hashed import NetivHasef
from .nativ_hashed import NetivHased
from .osherad import Osherad
from .polizer import Polizer
from .ramilevy import RamiLevy
Expand Down
6 changes: 3 additions & 3 deletions il_supermarket_scarper/scrappers/nativ_hashed.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from il_supermarket_scarper.utils import _is_saturday_in_israel, _is_holiday_in_israel


class NetivHasef(WebBase):
"""scraper for nativ hasef"""
class NetivHased(WebBase):
"""scraper for nativ Hased"""

def __init__(self, folder_name=None):
super().__init__(
"Netiv Hasef",
"Netiv Hased",
chain_id="7290058160839",
url="http://141.226.222.202/",
folder_name=folder_name,
Expand Down
62 changes: 32 additions & 30 deletions il_supermarket_scarper/scrappers/tests/test_all.py
Original file line number Diff line number Diff line change
@@ -1,150 +1,152 @@
# pylint: disable=missing-class-docstring,wildcard-import
import pytest

from il_supermarket_scarper.scrappers import *
from il_supermarket_scarper.scrappers_factory import ScraperFactory
from .test_cases import make_test_case


@pytest.mark.run(order=1)
class BareketTestCase(make_test_case(Bareket, 2)):
class BareketTestCase(make_test_case(ScraperFactory.BAREKET, 2)):
pass


@pytest.mark.run(order=2)
class YaynotBitanTestCase(make_test_case(YaynotBitan, 6)):
class YaynotBitanTestCase(make_test_case(ScraperFactory.YAYNO_BITAN, 6)):
pass


@pytest.mark.run(order=3)
class CofixTestCase(make_test_case(Cofix, 299)):
class CofixTestCase(make_test_case(ScraperFactory.COFIX, 299)):
pass


@pytest.mark.run(order=4)
class DorAlonTestCase(make_test_case(DorAlon, 501)):
class DorAlonTestCase(make_test_case(ScraperFactory.DOR_ALON, 501)):
pass


@pytest.mark.run(order=5)
class GoodPharmTestCase(make_test_case(GoodPharm, 952)):
class GoodPharmTestCase(make_test_case(ScraperFactory.GOOD_PHARM, 952)):
pass


@pytest.mark.run(order=6)
class HaziHinamTestCase(make_test_case(HaziHinam, 2)):
class HaziHinamTestCase(make_test_case(ScraperFactory.HAZI_HINAM, 2)):
pass


@pytest.mark.run(order=7)
class KeshetTestCase(make_test_case(Keshet, 5)):
class KeshetTestCase(make_test_case(ScraperFactory.KESHET, 5)):
pass


@pytest.mark.run(order=8)
class KingStoreTestCase(make_test_case(KingStore, 334)):
class KingStoreTestCase(make_test_case(ScraperFactory.KING_STORE, 334)):
pass


@pytest.mark.run(order=9)
class Maayan2000TestCase(make_test_case(Maayan2000, 60)):
class Maayan2000TestCase(make_test_case(ScraperFactory.MAAYAN_2000, 60)):
pass


@pytest.mark.run(order=10)
class MahsaniAShukTestCase(make_test_case(MahsaniAShuk, 98)):
class MahsaniAShukTestCase(make_test_case(ScraperFactory.MAHSANI_ASHUK, 98)):
pass


@pytest.mark.run(order=11)
class MegaMarketTestCase(make_test_case(MegaMarket, 2150)):
class MegaMarketTestCase(make_test_case(ScraperFactory.MEGA_MARKET, 2150)):
pass


@pytest.mark.run(order=12)
class MegaTestCase(make_test_case(Mega, 37)):
class MegaTestCase(make_test_case(ScraperFactory.MEGA, 37)):
pass


@pytest.mark.run(order=13)
class NetivHasefTestCase(make_test_case(NetivHasef, 1)):
class NetivHasefTestCase(make_test_case(ScraperFactory.NETIV_HASED, 1)):
pass


@pytest.mark.run(order=14)
class OsheradTestCase(make_test_case(Osherad, 1)):
class OsheradTestCase(make_test_case(ScraperFactory.OSHER_AD, 1)):
pass


@pytest.mark.run(order=15)
class PolizerTestCase(make_test_case(Polizer, 1)):
class PolizerTestCase(make_test_case(ScraperFactory.POLIZER, 1)):
pass


@pytest.mark.run(order=16)
class RamiLevyTestCase(make_test_case(RamiLevy, 1)):
class RamiLevyTestCase(make_test_case(ScraperFactory.RAMI_LEVY, 1)):
pass


@pytest.mark.run(order=17)
class SalachDabachTestCase(make_test_case(SalachDabach, 4)):
class SalachDabachTestCase(make_test_case(ScraperFactory.SALACH_DABACH, 4)):
pass


@pytest.mark.run(order=18)
class ShefaBarcartAshemTestCase(make_test_case(ShefaBarcartAshem, 41)):
class ShefaBarcartAshemTestCase(make_test_case(ScraperFactory.SHEFA_BARCART_ASHEM, 41)):
pass


@pytest.mark.run(order=19)
class ShufersalTestCase(make_test_case(Shufersal, 176)):
class ShufersalTestCase(make_test_case(ScraperFactory.SHUFERSAL, 176)):
pass


@pytest.mark.run(order=20)
class ShukAhirTestCase(make_test_case(ShukAhir, 4)):
class ShukAhirTestCase(make_test_case(ScraperFactory.SHUK_AHIR, 4)):
pass


@pytest.mark.run(order=21)
class StopMarketTestCase(make_test_case(StopMarket, 5)):
class StopMarketTestCase(make_test_case(ScraperFactory.STOP_MARKET, 5)):
pass


@pytest.mark.run(order=22)
class SuperPharmTestCase(make_test_case(SuperPharm, 224)):
class SuperPharmTestCase(make_test_case(ScraperFactory.SUPER_PHARM, 224)):
pass


@pytest.mark.run(order=23)
class SuperYudaTestCase(make_test_case(SuperYuda, 40)):
class SuperYudaTestCase(make_test_case(ScraperFactory.SUPER_YUDA, 40)):
pass


@pytest.mark.run(order=24)
class FreshMarketAndSuperDoshTestCase(make_test_case(FreshMarketAndSuperDosh, 1)):
class FreshMarketAndSuperDoshTestCase(
make_test_case(ScraperFactory.FRESH_MARKET_AND_SUPER_DOSH, 1)
):
pass


@pytest.mark.run(order=25)
class TivTaamTestCase(make_test_case(TivTaam, 2)):
class TivTaamTestCase(make_test_case(ScraperFactory.TIV_TAAM, 2)):
pass


@pytest.mark.run(order=26)
class VictoryTestCase(make_test_case(Victory, 1)):
class VictoryTestCase(make_test_case(ScraperFactory.VICTORY, 1)):
pass


@pytest.mark.run(order=27)
class YellowTestCase(make_test_case(Yellow, 100)):
class YellowTestCase(make_test_case(ScraperFactory.YELLOW, 100)):
pass


@pytest.mark.run(order=28)
class YohananofTestCase(make_test_case(Yohananof, 1)):
class YohananofTestCase(make_test_case(ScraperFactory.YOHANANOF, 1)):
pass


@pytest.mark.run(order=29)
class ZolVeBegadolTestCase(make_test_case(ZolVeBegadol, 4)):
class ZolVeBegadolTestCase(make_test_case(ScraperFactory.ZOL_VEBEGADOL, 4)):
pass
Loading

0 comments on commit e4cfe30

Please sign in to comment.