In [1]:
import requests
import pandas as pd

OVERPASS_URL = "https://overpass-api.de/api/interpreter"
#OVERPASS_URL = "https://overpass.openstreetmap.ru/api/interpreter"

def get_bars_regensburg_df(center_lat=49.019533, center_lon=12.097487, radius_m=1200):
    query = f"""
    [out:json][timeout:60];
    (
      nwr["amenity"="bar"](around:{radius_m},{center_lat},{center_lon});
      nwr["amenity"="pub"](around:{radius_m},{center_lat},{center_lon});
    );
    out center tags;
    """
    r = requests.get(OVERPASS_URL, params={"data": query}, timeout=90)
    r.raise_for_status()
    data = r.json()

    rows = []
    for el in data.get("elements", []):
        tags = el.get("tags", {})
        lat = el.get("lat") or el.get("center", {}).get("lat")
        lon = el.get("lon") or el.get("center", {}).get("lon")
        rows.append({
            "name": tags.get("name"),
            "amenity": tags.get("amenity"),
            "lat": lat,
            "lon": lon,
            "opening_hours": tags.get("opening_hours"),
            #"website": tags.get("website") or tags.get("contact:website"),
            #"phone": tags.get("phone") or tags.get("contact:phone"),
            "street": tags.get("addr:street"),
            "housenumber": tags.get("addr:housenumber"),
            "postcode": tags.get("addr:postcode"),
            "city": tags.get("addr:city"),
            #"osm_type": el.get("type"),
            #"osm_id": el.get("id"),
        })

    df = pd.DataFrame(rows).dropna(subset=["lat","lon"])
    return df

# Beispiel:
df = get_bars_regensburg_df(radius_m=1200)
df.head()


HTTPError: 504 Server Error: Gateway Timeout for url: https://overpass-api.de/api/interpreter?data=%0A++++%5Bout%3Ajson%5D%5Btimeout%3A60%5D%3B%0A++++%28%0A++++++nwr%5B%22amenity%22%3D%22bar%22%5D%28around%3A1200%2C49.019533%2C12.097487%29%3B%0A++++++nwr%5B%22amenity%22%3D%22pub%22%5D%28around%3A1200%2C49.019533%2C12.097487%29%3B%0A++++%29%3B%0A++++out+center+tags%3B%0A++++

In [2]:
#save the data as a backup cause of timeout errors 
df.to_csv("regensburg_bars_backup.csv", index=False)

NameError: name 'df' is not defined

Luftlinie zwischen zwei Objekten (User oder Bars)

In [3]:
import math




def haversine_m(lat1, lon1, lat2, lon2):
    R = 6371000  # meters
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dlambda = math.radians(lon2 - lon1)

    a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
    return 2 * R * math.asin(math.sqrt(a))




In [4]:
user_lat, user_lon = 49.019533, 12.097487
bar_lat, bar_lon = 49.0208, 12.0959

dist_m = haversine_m(user_lat, user_lon, bar_lat, bar_lon)
print(f"Distanz: {dist_m:.0f} m")


Distanz: 182 m


Aus Adresse machen wir long, lat für haversine_m

In [5]:
def geocode_address(address: str):
    url = "https://nominatim.openstreetmap.org/search"
    params = {
        "q": address,
        "format": "json",
        "limit": 1,
    }
    headers = {
        "User-Agent": "pubcrawl-uni-app/1.0 (contact: d.sowada99@gmail.com)"
    }
    r = requests.get(url, params=params, headers=headers, timeout=30)
    r.raise_for_status()
    data = r.json()
    if not data:
        return None
    return float(data[0]["lat"]), float(data[0]["lon"])

# Beispiel:
lat, lon = geocode_address("Neupfarrplatz 1, 93047 Regensburg")
print(lat)
print(lon)

49.0183411
12.0963412


Workflow generieren:
- ergebniss entfernung in m von user zu bar addr


In [6]:
#Adresse
bar = "Neupfarrplatz 1, 93047 Regensburg"
user = "Im Anger 89, 93098 Mintraching"

bar_lat, bar_long = geocode_address(bar)
user_lat, user_long = geocode_address(user)

print(haversine_m(bar_lat, bar_long,user_lat, user_long ))


13075.5693429144


Nächste schritt: 
Manipulieren der Daten und Gewichtungsalgorithmus der Präferenzen
- Opening hours
- Essen
- 

In [14]:
def add_distance(df, user_lat, user_lon):
    distances = []

    for _, row in df.iterrows():
        lat = row["lat"]
        lon = row["lon"]

        if pd.isna(lat) or pd.isna(lon):
            distances.append(None)
        else:
            distances.append(haversine_m(user_lat, user_lon, lat, lon))

    df = df.copy()
    df["distance_m"] = distances
    return df



In [None]:
#user input k -> output 2k+2 nearest bars for further computing
def sort_df(df, input):
    new_df=(
    df.dropna(subset=["lat", "lon", "distance_m"]) 
      .sort_values("distance_m", ascending=True)    
      .head(2 * input +2)                                  
      .reset_index(drop=True)
    )
    return new_df


In [32]:
import re
from datetime import datetime

def is_open_now_basic(opening_hours: str, now: datetime):
    # minimal support for strings like: "Mo-Sa 20:00-02:00"
    if not isinstance(opening_hours, str) or not opening_hours.strip():
        return None

    s = opening_hours.strip()
    m = re.search(r"(Mo|Tu|We|Th|Fr|Sa|Su)\s*-\s*(Mo|Tu|We|Th|Fr|Sa|Su)\s+(\d{1,2}:\d{2})\s*-\s*(\d{1,2}:\d{2})", s)
    if not m:
        return None

    a, b, start, end = m.group(1), m.group(2), m.group(3), m.group(4)
    days = ["Mo","Tu","We","Th","Fr","Sa","Su"]
    ia, ib = days.index(a), days.index(b)
    valid_days = days[ia:ib+1] if ia <= ib else days[ia:] + days[:ib+1]

    wd = days[now.weekday()]

    def to_min(hm):
        h, mm = hm.split(":")
        return int(h) * 60 + int(mm)

    now_min = now.hour * 60 + now.minute
    start_min = to_min(start)
    end_min = to_min(end)

    if wd not in valid_days:
        return False

    # handle crossing midnight (e.g. 20:00-02:00)
    if start_min <= end_min:
        return start_min <= now_min <= end_min
    return (now_min >= start_min) or (now_min <= end_min)


def add_opening_hours_features(df, now: datetime):
    df = df.copy()

    # take opening_hours_raw if available, else opening_hours
    col = "opening_hours_raw" if "opening_hours_raw" in df.columns else "opening_hours"

    open_now_list = []
    open_score_list = []

    for _, row in df.iterrows():
        v = is_open_now_basic(row.get(col), now)  # True/False/None
        open_now_list.append(v)

        if v is True:
            open_score_list.append(1.0)
        elif v is False:
            open_score_list.append(0.0)
        else:
            open_score_list.append(0.5)

    df["open_now"] = open_now_list
    df["open_score"] = open_score_list
    return df


Ranking after different factorsdistance, openening time

In [37]:
def rank_and_select(df) -> pd.DataFrame:
    c = df.copy()

    dmin = c["distance_m"].min()
    dmax = c["distance_m"].max()

    if dmax > dmin:
        c["distance_score"] = 1.0 - (c["distance_m"] - dmin) / (dmax - dmin)
    else:
        c["distance_score"] = 1.0

    c["final_score"] = 0.8 * c["open_score"] + 0.2 * c["distance_score"]

    ranked = (
        c.sort_values(["final_score", "distance_m"], ascending=[False, True])
         .reset_index(drop=True)
    )
    return ranked


In [25]:
from datetime import datetime

def get_bars_to_visit(csv_path, user_lat, user_lon, k):
    df = pd.read_csv(csv_path)

    # Features hinzufügen
    df = add_distance(df, user_lat, user_lon)
    df = sort_df(df,k)
    df = add_opening_hours_features(df, now=datetime.now())

    # 2k Kandidaten
    #candidates = select_candidates(df, k)

    # Ranking + finale Auswahl
    result = rank_and_select(df)
    return result


In [40]:
user_lat, user_lon = 49.01851116386774, 12.093437072491486 
csv_path = "regensburg_bars_backup.csv"
final_df = get_bars_to_visit(csv_path,user_lat,user_long,8)
final_df.head(20)

Unnamed: 0,name,amenity,lat,lon,opening_hours,street,housenumber,postcode,city,distance_m,open_now,open_score,distance_score,final_score
0,Clixx Billard & Bar,pub,49.009734,12.100087,"Mo-Th 18:00-02:00, Fr,Sa 18:00-03:00, Su 14:00...",Galgenbergstraße,2c,93053.0,Regensburg,10770.297162,True,1.0,0.25595,0.85119
1,Rotkäppchen,pub,49.015321,12.097836,Mo-Su 20:00-02:00,Fröhliche-Türken-Straße,14,93047.0,Regensburg,10895.296638,True,1.0,0.130701,0.82614
2,Filmbühne,pub,49.020844,12.097123,Mo-Su 20:30-02:00,Taubengäßchen,2,93047.0,Regensburg,10944.034816,True,1.0,0.081865,0.816373
3,Irish Harp,pub,49.020842,12.096939,"Su-Th 18:00-01:00, Fr,Sa 16:00-02:00",,,,,10957.419943,True,1.0,0.068453,0.813691
4,Barock Bar,bar,49.019993,12.096814,Mo-Su 20:00-24:00,,,,,10964.766285,True,1.0,0.061092,0.812218
5,Klappe,pub,49.025576,12.097004,Mo-Su 16:00-23:30,Stadtamhof,13,93059.0,Regensburg,10977.232312,True,1.0,0.048601,0.80972
6,Kinokneipe,pub,49.017978,12.109652,,,,,,10027.730935,,0.5,1.0,0.6
7,Mamonia Lounge,bar,49.01472,12.106143,,,,,,10292.425829,,0.5,0.734776,0.546955
8,Dudes4,bar,49.020147,12.100506,Mo-Su 19:00+,,,,,10695.894259,,0.5,0.330502,0.4661
9,Lok.in,bar,49.012293,12.099879,,,,,,10763.091181,,0.5,0.26317,0.452634
