Skip to content

Commit

Permalink
Merge pull request #2668 from ziv17/2478-add-streets-table-from-datagov
Browse files Browse the repository at this point in the history
2478 add streets table from datagov
  • Loading branch information
ziv17 committed Jun 14, 2024
2 parents 30fc234 + 24ea9d1 commit dc7b6c2
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 13 deletions.
40 changes: 27 additions & 13 deletions anyway/parsers/infographics_data_cache_updater.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-

from sqlalchemy import or_
from datetime import datetime
from anyway.models import (
Base,
Expand All @@ -9,6 +10,7 @@
InfographicsStreetDataCacheTemp,
InfographicsStreetDataCache,
Streets,
AccidentMarker,
)
from typing import Dict, Iterable
from anyway.constants import CONST
Expand Down Expand Up @@ -190,23 +192,35 @@ def build_street_cache_into_temp():
start = datetime.now()
db.session.query(InfographicsStreetDataCacheTemp).delete()
db.session.commit()
for chunk in chunked_generator(get_street_infographic_keys(), 4960):
db.get_engine().execute(
InfographicsStreetDataCacheTemp.__table__.insert(), # pylint: disable=no-member
[
{
"yishuv_symbol": d["yishuv_symbol"],
"street": d["street1"],
"years_ago": d["years_ago"],
"data": anyway.infographics_utils.create_infographics_data_for_location(d),
}
for d in chunk
],
)
for n, chunk in enumerate(chunked_generator(get_street_infographic_keys(), 4960)):
cache_chunk = [
{
"yishuv_symbol": d["yishuv_symbol"],
"street": d["street1"],
"years_ago": d["years_ago"],
"data": anyway.infographics_utils.create_infographics_data_for_location(d),
}
for d in chunk
if street_has_accidents(d["yishuv_symbol"], d["street1"])
]
if cache_chunk:
logging.debug(f"Adding chunk num {n}, {len(chunk)} entries.")
# pylint: disable=no-member
db.get_engine().execute(InfographicsStreetDataCacheTemp.__table__.insert(), cache_chunk)
db.session.commit()
logging.info(f"cache rebuild took:{str(datetime.now() - start)}")


def street_has_accidents(yishuv_symbol: int, street: int) -> bool:
return (
db.session.query(AccidentMarker)
.filter(AccidentMarker.yishuv_symbol == yishuv_symbol)
.filter(or_(AccidentMarker.street1 == street, AccidentMarker.street2 == street))
.count()
> 0
)


def get_road_segments() -> Iterable[RoadSegments]:
t = RoadSegments
segment_iter = iter(db.session.query(t.segment_id).all())
Expand Down
90 changes: 90 additions & 0 deletions anyway/parsers/streets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import requests
import json
from typing import Iterable, Dict, Any, List
from anyway.models import Streets
from anyway.app_and_db import db
import logging

CBS_STREETS_RESOURCES_URL = "https://data.gov.il/api/3/action/package_show?id=321"
RESOURCE_NAME = "רשימת רחובות בישראל - מתעדכן"
BASE_GET_DATA_GOV = "https://data.gov.il/dataset/321"
RESOURCE_DOWNLOAD_TEMPLATE = (
"https://data.gov.il/api/3/action/datastore_search?resource_id={id}&limit=100000"
)
STREETS_FIlE_YISHUV_NAME = "שם_ישוב"
STREETS_FIlE_YISHUV_SYMBOL = "סמל_ישוב"
STREETS_FIlE_STREET_NAME = "שם_רחוב"
STREETS_FIlE_STREET_SYMBOL = "סמל_רחוב"
CHUNK_SIZE = 1000


class UpdateStreetsFromCSB:
def __init__(self):
self.s = requests.Session()

def get_cbs_streets_download_url(self):
response = self.s.get(CBS_STREETS_RESOURCES_URL)
if not response.ok:
raise Exception(
f"Could not get streets url. reason:{response.reason}:{response.status_code}"
)
data = json.loads(response.text)
if (
not data.get("success")
and not data.get("result")
and not data["result"].get("resources")
):
raise Exception(f"Could not get streets url. received bad data:{data.get('success')}")
it = filter(
lambda x: x["name"] == RESOURCE_NAME and x["format"] == "CSV",
data["result"].get("resources"),
)
item = list(it)[0]
url = RESOURCE_DOWNLOAD_TEMPLATE.format(id=item["id"])
logging.info(f"Streets data last updated: {item['last_modified']}")
# url_part = item["url"][take_from + 1:]
return url
# return f"{BASE_GET_DATA_GOV}/{url_part}"

def get_streets_data_chunks(self, url: str, chunk_size: int) -> Iterable[List[Dict[str, Any]]]:
# r = requests.get(url, stream=True, allow_redirects=True)
r = self.s.get(url)
if not r.ok:
raise Exception(f"Could not get streets url. reason:{r.reason}:{r.status_code}")
data = json.loads(r.text)
chunk = []
logging.debug(f"read {len(data['result']['records'])} records from {url}.")
for item in data["result"]["records"]:
street_name = item[STREETS_FIlE_STREET_NAME]
street_name_len = len(street_name)
street_entry = {
"yishuv_symbol": item[STREETS_FIlE_YISHUV_SYMBOL],
"street": item[STREETS_FIlE_STREET_SYMBOL],
"street_hebrew": street_name[: min(street_name_len, Streets.MAX_NAME_LEN)],
}
chunk.append(street_entry)
if len(chunk) == chunk_size:
yield chunk
chunk = []
if chunk:
logging.debug(f"last chunk: {len(chunk)}.")
yield chunk

def import_street_file_into_db(self, url: str, chunk_size: int):
num = 0
db.session.query(Streets).delete()
for chunk in self.get_streets_data_chunks(url=url, chunk_size=chunk_size):
db.session.bulk_insert_mappings(Streets, chunk)
num += len(chunk)
db.session.commit()
logging.info(f"{num} records written to Streets table.")


def parse(chunk_size=CHUNK_SIZE):
instance = UpdateStreetsFromCSB()
res = instance.get_cbs_streets_download_url()
instance.import_street_file_into_db(res, chunk_size)


if __name__ == "__main__":
parse()
4 changes: 4 additions & 0 deletions anyway/widgets/no_location_widgets/vision_zero_bike_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@ def localize_items(request_params: RequestParams, items: Dict) -> Dict:
@classmethod
def update_result(cls, request_params: RequestParams, cached_items: Dict) -> Optional[Dict]:
return cached_items if cls.is_included_according_to_request_params(request_params) else None

@staticmethod
def is_relevant(request_params: RequestParams) -> bool:
return request_params.news_flash_description is not None
11 changes: 11 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def rsa(filename):
@process.command()
@click.argument("filename", type=str, default="static/data/segments/road_segments.xlsx")
def road_segments(filename):
"""Update road_segments table from xlsx file"""
from anyway.parsers.road_segments import parse

return parse(filename)
Expand All @@ -144,11 +145,21 @@ def road_segments(filename):
@process.command()
@click.argument("filename", type=str, default="static/data/suburban_junctions/suburban_junctions.xlsx")
def suburban_junctions(filename):
"""Update suburban_junction table from xlsx file"""
from anyway.parsers.suburban_junctions import parse

return parse(filename)


@process.command()
@click.argument("chunk-size", type=int, default=1000)
def streets(chunk_size):
"""Update streets table from CBS site"""
from anyway.parsers.streets import parse

return parse(chunk_size)


@process.command()
@click.argument("filepath", type=str, default="static/data/schools/schools.csv")
@click.option("--batch_size", type=int, default=5000)
Expand Down

0 comments on commit dc7b6c2

Please sign in to comment.