Skip to content

Commit

Permalink
Prioritize guessed categories, don't add non-informational names
Browse files Browse the repository at this point in the history
  • Loading branch information
Zaczero committed Jul 4, 2023
1 parent b2727fd commit b9e4eae
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 35 deletions.
48 changes: 27 additions & 21 deletions ai_name_convert.py
Expand Up @@ -29,30 +29,36 @@ def ai_name_convert(pois: Sequence[UmPoi]) -> Sequence[UmPoi]:
answer = complete(
('You are provided with a list of restaurant names, which are sometimes descriptive. '
'These names originate from a Polish data source. '
'Your task is to extract an own name for each restaurant. '
'Your task is to extract an own name for each restaurant - if there isn\'t one, you output nothing. '
'The output will be in CSV format, just like the input.'),

('1,lokal gastronomiczny Sao Do Asia Food\n'
'2,"BoQ"\n'
'3,Restauracja Garden w hotelu Double Tree by Hilton\n'
'4,Klub Sosnowy\n'
'5,Spiewajaca Lipka\n'
'6,Bar Alchemy w hotelu\n'
'7,w klubie tenisowym Wilga\n'
'8,bar w Hotelu "Boss"\n'
'9,ruchomy punkt gastronomiczny (room service, mini bar, VIP Lounge)\n'
'10,na terenie tymczasowego targowiska "Kawiarnia Olkuska"'),
('1,mała gastronomia\n'
'2,lokal gastronomiczny Sao Do Asia Food\n'
'3,"BoQ"\n'
'4,Restauracja Garden w hotelu Double Tree by Hilton\n'
'5,Klub Sosnowy\n'
'6,Barek z Wygrodzonym Ogródkiem\n'
'7,Spiewajaca Lipka\n'
'8,Bar Alchemy w hotelu "Lisbon"\n'
'9,w klubie tenisowym Wilga\n'
'10,bar w Hotelu "Boss"\n'
'11,bufet dla publiczności w Kasynie\n'
'12,ruchomy punkt gastronomiczny (room service, mini bar, VIP Lounge)\n'
'13,restauracja Indyjska\n'),

('1,Sao Do Asia Food\n'
'2,BoQ\n'
'3,Garden\n'
'4,Klub Sosnowy\n'
'5,Śpiewająca Lipka\n'
'6,Alchemy\n'
'7,Wilga\n'
'8,Boss\n'
'9,Ruchomy punkt gastronomiczny\n'
'10,Kawiarnia Olkuska'),
('1,\n'
'2,Sao Do Asia Food\n'
'3,BoQ\n'
'4,Garden\n'
'5,Klub Sosnowy\n'
'6,\n'
'7,Śpiewająca Lipka\n'
'8,Alchemy\n'
'9,Wilga\n'
'10,Boss\n'
'11,\n'
'12,\n'
'13,\n'),

query)

Expand Down
7 changes: 5 additions & 2 deletions config.py
Expand Up @@ -77,6 +77,7 @@

UM_GUESS_CATEGORY = {
'bistro': 'restauracja',
'burger': 'burgerownia',
'cafe': 'kawiarnia',
'caffe': 'kawiarnia',
'ciastkarnia': 'cukiernia',
Expand All @@ -90,12 +91,14 @@
'piwny': 'piwiarnia',
'pizza': 'pizzeria',
'restaurant': 'restauracja',
'sushi': 'restauracja',
'ristorante': 'restauracja',
'tawerna': 'restauracja',
'winiarnia': 'winiarnia',
'zajazd': 'restauracja',
}

assert not set(UM_CATEGORY_TAGS).intersection(UM_GUESS_CATEGORY), \
'UM_CATEGORY_TAGS and UM_GUESS_CATEGORY must be disjoint'

DEFAULT_POI_TAGS = {
'source': 'mapa.um.warszawa.pl',
}
Expand Down
8 changes: 7 additions & 1 deletion main.py
Expand Up @@ -32,6 +32,9 @@ def main():
for um_poi, osm_match in zip(um_pois, osm_matches):
um_poi_name = beautify_name(um_poi.name)

if not um_poi_name:
continue

if osm_match is None:
missing_pois.append(um_poi)
print(f'[1/2] ❌ {um_poi_name!r} ({um_poi.category!r})')
Expand All @@ -48,6 +51,9 @@ def main():
for um_poi, osm_match in zip(um_pois, osm_matches):
um_poi_name = beautify_name(um_poi.name)

if not um_poi_name:
continue

if osm_match is None:
missing_pois.append(um_poi)
print(f'[2/2] ❌ {um_poi_name!r} ({um_poi.category!r})')
Expand All @@ -56,7 +62,7 @@ def main():
print(f'[2/2] ✅ {um_poi_name!r} ({um_poi.category!r}) ↔ {osm_match_name!r}')

missing_pois = missing_pois[:LIMIT_CHANGES_PER_CHANGESET]
print(f'🛟 Limiting to {LIMIT_CHANGES_PER_CHANGESET} POIs for safety')
print(f'🛟 Limiting to {LIMIT_CHANGES_PER_CHANGESET} POIs')
print(f'📍 Total POIs to be added: {len(missing_pois)}')

if not missing_pois:
Expand Down
1 change: 0 additions & 1 deletion osm_search.py
@@ -1,6 +1,5 @@
import re
from collections import defaultdict
from pprint import pprint
from typing import Sequence

from rapidfuzz import fuzz, process
Expand Down
6 changes: 2 additions & 4 deletions overpass.py
Expand Up @@ -8,10 +8,8 @@ def build_pois_query(timeout: int) -> str:
return (
f'[out:json][timeout:{timeout}];'
'relation(id:336075);' # https://www.openstreetmap.org/relation/336075
'map_to_area->.r;'
'nwr[amenity](area.r);'
'out tags center qt;'
'nwr[shop](area.r);'
'map_to_area;'
'nwr[!highway][name](area);'
'out tags center qt;'
)

Expand Down
19 changes: 13 additions & 6 deletions um_fetch.py
Expand Up @@ -6,7 +6,7 @@
from rapidfuzz import fuzz, process, utils
from tenacity import retry, stop_after_attempt, wait_exponential

from config import UM_GUESS_CATEGORY, UM_VALID_CATEGORIES
from config import UM_CATEGORY_TAGS, UM_GUESS_CATEGORY, UM_VALID_CATEGORIES
from um_poi import UmPoi
from utils import get_http_client, nice_hash

Expand Down Expand Up @@ -47,13 +47,16 @@ def _guess_category(name: str) -> str:
matches = process.extract(name, _GUESS_CATEGORY_CHOICES,
scorer=fuzz.partial_ratio,
processor=utils.default_process,
limit=1,
limit=3,
score_cutoff=85)

if not matches:
return ''

match = matches[0][0]
if len(matches) == 1:
match = matches[0][0]
else:
match = max(matches, key=lambda m: len(UM_CATEGORY_TAGS[UM_GUESS_CATEGORY.get(m[0], m[0])]))[0]

return UM_GUESS_CATEGORY.get(match, match)

Expand All @@ -67,11 +70,15 @@ def um_fetch_restaurants() -> Sequence[UmPoi]:

for p in foiarray:
lat, lng = _PROJ_TRANSFORMER.transform(p['y'], p['x'])
category, name, address = _parse_details(p['name'])
um_category, name, address = _parse_details(p['name'])

if not category:
category = _guess_category(name)
category = _guess_category(name)

if category:
print(f'🧩 Guessed category {category!r} for {name!r}')
if not category:
category = um_category
print(f'🏱 Using UM category {category!r} for {name!r}')

p_id = nice_hash((name, address))

Expand Down

0 comments on commit b9e4eae

Please sign in to comment.