diff --git a/anyway/parsers/infographics_data_cache_updater.py b/anyway/parsers/infographics_data_cache_updater.py index 1aa1bb86..de067ff4 100755 --- a/anyway/parsers/infographics_data_cache_updater.py +++ b/anyway/parsers/infographics_data_cache_updater.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from sqlalchemy import or_ from datetime import datetime from anyway.models import ( Base, @@ -9,6 +10,7 @@ InfographicsStreetDataCacheTemp, InfographicsStreetDataCache, Streets, + AccidentMarker, ) from typing import Dict, Iterable from anyway.constants import CONST @@ -190,23 +192,35 @@ def build_street_cache_into_temp(): start = datetime.now() db.session.query(InfographicsStreetDataCacheTemp).delete() db.session.commit() - for chunk in chunked_generator(get_street_infographic_keys(), 4960): - db.get_engine().execute( - InfographicsStreetDataCacheTemp.__table__.insert(), # pylint: disable=no-member - [ - { - "yishuv_symbol": d["yishuv_symbol"], - "street": d["street1"], - "years_ago": d["years_ago"], - "data": anyway.infographics_utils.create_infographics_data_for_location(d), - } - for d in chunk - ], - ) + for n, chunk in enumerate(chunked_generator(get_street_infographic_keys(), 4960)): + cache_chunk = [ + { + "yishuv_symbol": d["yishuv_symbol"], + "street": d["street1"], + "years_ago": d["years_ago"], + "data": anyway.infographics_utils.create_infographics_data_for_location(d), + } + for d in chunk + if street_has_accidents(d["yishuv_symbol"], d["street1"]) + ] + if cache_chunk: + logging.debug(f"Adding chunk num {n}, {len(chunk)} entries.") + # pylint: disable=no-member + db.get_engine().execute(InfographicsStreetDataCacheTemp.__table__.insert(), cache_chunk) db.session.commit() logging.info(f"cache rebuild took:{str(datetime.now() - start)}") +def street_has_accidents(yishuv_symbol: int, street: int) -> bool: + return ( + db.session.query(AccidentMarker) + .filter(AccidentMarker.yishuv_symbol == yishuv_symbol) + .filter(or_(AccidentMarker.street1 == street, AccidentMarker.street2 == street)) + .count() + > 0 + ) + + def get_road_segments() -> Iterable[RoadSegments]: t = RoadSegments segment_iter = iter(db.session.query(t.segment_id).all()) diff --git a/anyway/parsers/streets.py b/anyway/parsers/streets.py new file mode 100644 index 00000000..6ec60453 --- /dev/null +++ b/anyway/parsers/streets.py @@ -0,0 +1,90 @@ +import requests +import json +from typing import Iterable, Dict, Any, List +from anyway.models import Streets +from anyway.app_and_db import db +import logging + +CBS_STREETS_RESOURCES_URL = "https://data.gov.il/api/3/action/package_show?id=321" +RESOURCE_NAME = "רשימת רחובות בישראל - מתעדכן" +BASE_GET_DATA_GOV = "https://data.gov.il/dataset/321" +RESOURCE_DOWNLOAD_TEMPLATE = ( + "https://data.gov.il/api/3/action/datastore_search?resource_id={id}&limit=100000" +) +STREETS_FIlE_YISHUV_NAME = "שם_ישוב" +STREETS_FIlE_YISHUV_SYMBOL = "סמל_ישוב" +STREETS_FIlE_STREET_NAME = "שם_רחוב" +STREETS_FIlE_STREET_SYMBOL = "סמל_רחוב" +CHUNK_SIZE = 1000 + + +class UpdateStreetsFromCSB: + def __init__(self): + self.s = requests.Session() + + def get_cbs_streets_download_url(self): + response = self.s.get(CBS_STREETS_RESOURCES_URL) + if not response.ok: + raise Exception( + f"Could not get streets url. reason:{response.reason}:{response.status_code}" + ) + data = json.loads(response.text) + if ( + not data.get("success") + and not data.get("result") + and not data["result"].get("resources") + ): + raise Exception(f"Could not get streets url. received bad data:{data.get('success')}") + it = filter( + lambda x: x["name"] == RESOURCE_NAME and x["format"] == "CSV", + data["result"].get("resources"), + ) + item = list(it)[0] + url = RESOURCE_DOWNLOAD_TEMPLATE.format(id=item["id"]) + logging.info(f"Streets data last updated: {item['last_modified']}") + # url_part = item["url"][take_from + 1:] + return url + # return f"{BASE_GET_DATA_GOV}/{url_part}" + + def get_streets_data_chunks(self, url: str, chunk_size: int) -> Iterable[List[Dict[str, Any]]]: + # r = requests.get(url, stream=True, allow_redirects=True) + r = self.s.get(url) + if not r.ok: + raise Exception(f"Could not get streets url. reason:{r.reason}:{r.status_code}") + data = json.loads(r.text) + chunk = [] + logging.debug(f"read {len(data['result']['records'])} records from {url}.") + for item in data["result"]["records"]: + street_name = item[STREETS_FIlE_STREET_NAME] + street_name_len = len(street_name) + street_entry = { + "yishuv_symbol": item[STREETS_FIlE_YISHUV_SYMBOL], + "street": item[STREETS_FIlE_STREET_SYMBOL], + "street_hebrew": street_name[: min(street_name_len, Streets.MAX_NAME_LEN)], + } + chunk.append(street_entry) + if len(chunk) == chunk_size: + yield chunk + chunk = [] + if chunk: + logging.debug(f"last chunk: {len(chunk)}.") + yield chunk + + def import_street_file_into_db(self, url: str, chunk_size: int): + num = 0 + db.session.query(Streets).delete() + for chunk in self.get_streets_data_chunks(url=url, chunk_size=chunk_size): + db.session.bulk_insert_mappings(Streets, chunk) + num += len(chunk) + db.session.commit() + logging.info(f"{num} records written to Streets table.") + + +def parse(chunk_size=CHUNK_SIZE): + instance = UpdateStreetsFromCSB() + res = instance.get_cbs_streets_download_url() + instance.import_street_file_into_db(res, chunk_size) + + +if __name__ == "__main__": + parse() diff --git a/anyway/widgets/no_location_widgets/vision_zero_bike_widget.py b/anyway/widgets/no_location_widgets/vision_zero_bike_widget.py index 0bcadfc5..dae8ea87 100644 --- a/anyway/widgets/no_location_widgets/vision_zero_bike_widget.py +++ b/anyway/widgets/no_location_widgets/vision_zero_bike_widget.py @@ -36,3 +36,7 @@ def localize_items(request_params: RequestParams, items: Dict) -> Dict: @classmethod def update_result(cls, request_params: RequestParams, cached_items: Dict) -> Optional[Dict]: return cached_items if cls.is_included_according_to_request_params(request_params) else None + + @staticmethod + def is_relevant(request_params: RequestParams) -> bool: + return request_params.news_flash_description is not None diff --git a/main.py b/main.py index 7805f351..1b5f1fdd 100755 --- a/main.py +++ b/main.py @@ -136,6 +136,7 @@ def rsa(filename): @process.command() @click.argument("filename", type=str, default="static/data/segments/road_segments.xlsx") def road_segments(filename): + """Update road_segments table from xlsx file""" from anyway.parsers.road_segments import parse return parse(filename) @@ -144,11 +145,21 @@ def road_segments(filename): @process.command() @click.argument("filename", type=str, default="static/data/suburban_junctions/suburban_junctions.xlsx") def suburban_junctions(filename): + """Update suburban_junction table from xlsx file""" from anyway.parsers.suburban_junctions import parse return parse(filename) +@process.command() +@click.argument("chunk-size", type=int, default=1000) +def streets(chunk_size): + """Update streets table from CBS site""" + from anyway.parsers.streets import parse + + return parse(chunk_size) + + @process.command() @click.argument("filepath", type=str, default="static/data/schools/schools.csv") @click.option("--batch_size", type=int, default=5000)