In [1]:
from config import Config
from models import create_llm 
from langchain.prompts import SystemMessagePromptTemplate, ChatPromptTemplate,PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.chains import LLMChain
from pydantic import BaseModel, Field

llm = create_llm(Config.MODEL) 

In [2]:
class OverpassQueryModel(BaseModel):
    area_name: str = Field(..., description="The name of the area to be searched, e.g., 'France', 'Paris', etc.")
    region_name: str = Field(..., description="The name of the region, e.g., 'Asia', 'Europe' etc.")
    admin_level: int = Field(..., description="The OSM administrative level to search for, e.g., 2 for countries, 4 for regions.")
    overpass_query: str = Field(..., description="A complete Overpass QL query for retrieving the requested administrative boundaries.")

In [39]:
prompt_template = """
You are a geospatial data engineer specializing in OpenStreetMap (OSM) and the Overpass API.
Your task is to generate a valid, well-formatted Overpass QL query that returns full polygon boundaries as relation elements based on the user's natural language query.

Rules:

1. If the user requests multiple countries inside a recognized group (e.g., "European Union", "GCC countries", "countries in Asia"):
   - Replace the group name with the exact list of its member countries using their common English names.
   - Generate a union query with multiple relation filters, one per country, wrapped inside parentheses, like this:

     [out:json][timeout:60];
     (
       relation["boundary"="administrative"]["admin_level"=2]["name"="Country1"];
       relation["boundary"="administrative"]["admin_level"=2]["name"="Country2"];
       ...
     );
     out geom qt;

2. For queries requesting administrative areas within a single area (e.g., "departments in France", "arrondissements in Paris"):
   - Use this pattern to define the area and query relations inside it:

     [out:json][timeout:25];
     area["name"="{{area_name}}"]->.searchArea;
     relation["boundary"="administrative"]["admin_level"={{admin_level}}](area.searchArea);
     out geom qt;

   - Use the appropriate `admin_level` based on the requested administrative division:
     - Countries: admin_level = 2
     - Regions / provinces: admin_level = 4 or 6 (based on country)
     - Departments: admin_level = 4
     - Arrondissements / districts: admin_level = 8 or 9
     - Default to admin_level = 8 if uncertain.

3. Always:
   - Use valid Overpass QL syntax.
   - Format the query with line breaks and indentation exactly as shown.
   - DO NOT put `out` statements inside parentheses.
   - Do NOT include `>;` if using `out geom qt` (this already returns full geometry).
   - Return a JSON object with these keys:
     - `area_name` (string) — the main queried area or group name.
     - `admin_level` (int) — used in the query.
     - `overpass_query` (string) — the full Overpass QL query, correctly formatted.
   - Return only the JSON object, no explanations or extra text.

{format_instructions}

User query: {query}
"""



parser = PydanticOutputParser(pydantic_object=OverpassQueryModel)


prompt = PromptTemplate(

    template=prompt_template,

    input_variables=["query"],

    partial_variables={"format_instructions": parser.get_format_instructions()}

)



chain = LLMChain(llm= llm, prompt=prompt)

query = "show me boundry of uae"

result = chain.invoke({"query": query}) 

result

{'query': 'show me boundry of uae',
 'text': '```json\n{\n  "area_name": "United Arab Emirates",\n  "region_name": "GCC countries",\n  "admin_level": 2,\n  "overpass_query": "[out:json][timeout:60];\\n(relation[\\"boundary\\"=\\"administrative\\"][\\"admin_level\\"=2][\\"name\\"=\\"United Arab Emirates\\"]);\\nout geom qt;"\n}\n```'}

In [40]:
structured_output = parser.parse(result['text'])
print(structured_output.overpass_query)


[out:json][timeout:60];
(relation["boundary"="administrative"]["admin_level"=2]["name"="United Arab Emirates"]);
out geom qt;


In [5]:
# import requests

# OVERPASS_URL = "https://overpass-api.de/api/interpreter"

# # Example Overpass QL query to fetch admin_level=2 boundaries with ISO country codes
# query = """
# [out:json][timeout:25];
# area["name"="France"]->.searchArea;
# relation["boundary"="administrative"]["admin_level"=4](area.searchArea);
# >;
# out geom qt;"""

# def run_overpass_query(query: str):
#     response = requests.post(OVERPASS_URL, data={"data": query})
    
#     if response.status_code == 200:
#         return response.json()  # parsed JSON
#     else:
#         raise Exception(f"Overpass query failed with status {response.status_code}:\n{response.text}")
# try:
#     result = run_overpass_query(query)
#     print(f"Returned {len(result.get('elements', []))} elements.")
 
# except Exception as e:
#     print("Error:", e)    


In [6]:
# # import osm2geojson
# # import folium

# # shapes_with_props = osm2geojson.json2geojson(result)
 

# from geojson import Feature, FeatureCollection, Polygon

# features = []
# for el in result["elements"]:
#     if el["type"] == "relation" and "geometry" in el:
#         coords = [(pt["lon"], pt["lat"]) for pt in el["geometry"]]
#         # Simple polygon: assumes single ring (no holes)
#         polygon = Polygon([coords])
#         features.append(Feature(geometry=polygon, properties={"name": el.get("tags", {}).get("name", "unknown")}))

# geojson_data = FeatureCollection(features)
# geojson_data


In [7]:
# # Folium map centered around Paris
# map_center = [48.8566, 2.3522]
# m = folium.Map(location=map_center, zoom_start=12)
# # Add polygons to map
# folium.GeoJson(shapes_with_props).add_to(m)
# # Save or display map
# m

In [41]:
import geopandas as gpd
import folium

# Load your global GeoJSON or shapefile
gdf = gpd.read_file("/Users/noamananwaar/Desktop/Projects/python/OSM_LLM/level1.json")

# Keep only polygons
gdf = gdf[gdf.geometry.type.isin(["Polygon", "MultiPolygon"])]

# List of EU countries (common English names)
eu_countries = [
    "United Arab Emirates","France"
]

# Determine which column has country names
country_col = None
for col in ["name", "country", "admin", "NAME", "CNTRY_NAME"]:
    if col in gdf.columns:
        country_col = col
        break

if country_col is None:
    raise ValueError("No country name column found in GeoDataFrame!")

# Filter for EU countries
gdf_eu = gdf[gdf[country_col].isin(eu_countries)]

# Map centered roughly on Europe
m = folium.Map(location=[54, 15], zoom_start=4)

# Add filtered countries to map
folium.GeoJson(
    gdf_eu,
    name="European Union Countries",
    tooltip=folium.GeoJsonTooltip(fields=[country_col])
).add_to(m)

# Save or display map
m.save("eu_countries_map.html")
print("✅ Map saved as eu_countries_map.html")


✅ Map saved as eu_countries_map.html


In [42]:
m

In [64]:
import geopandas as gpd
import folium

# Load shapefile
gdf = gpd.read_file("/Users/noamananwaar/Desktop/Projects/python/OSM_LLM/geodata/countries/FRA/fra_0.shp")

# Filter for France (since it's already France data, you might skip this)
filtered = gdf[gdf["COUNTRY"] == "France"]

# Create a folium map centered on France
m = folium.Map(location=[46.5, 2.5], zoom_start=6)

# Add polygons to map
folium.GeoJson(
    filtered,
    name="France Departments",
  
).add_to(m)

# Save or display the map
m.save("france_departments_map.html")
print("Map saved as france_departments_map.html")


Map saved as france_departments_map.html


In [57]:
m

In [71]:
import geopandas as gpd
import folium

# Load level 3 shapefile (arrondissements/districts)
gdf = gpd.read_file("/Users/noamananwaar/Desktop/Projects/python/OSM_LLM/geodata/countries/FRA/fra_7.shp")

# Inspect columns
# print(gdf.columns)
# print(gdf.head())
gdf
# # Filter to Paris by name (usually 'NAME_2' or 'NAME_3' fields)
# # First, check which field corresponds to city or department name
# # For Paris districts, often NAME_2 = "Paris" and NAME_3 = arrondissement name

# paris_districts = gdf[gdf["NAME_2"] == "Paris"]

# print(paris_districts[["NAME_2", "NAME_3"]])

# # Create map centered on Paris
# m = folium.Map(location=[48.8566, 2.3522], zoom_start=12)

# # Add Paris districts polygons
# folium.GeoJson(
#     paris_districts,
#     name="Paris Districts",
#     tooltip=folium.GeoJsonTooltip(fields=["NAME_3"], aliases=["District:"])
# ).add_to(m)

# m


Unnamed: 0,GID_3,GID_0,COUNTRY,GID_1,NAME_1,NL_NAME_1,GID_2,NAME_2,NL_NAME_2,NAME_3,VARNAME_3,NL_NAME_3,TYPE_3,ENGTYPE_3,CC_3,HASC_3,geometry
0,FRA.1.1.1_1,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.1_1,Ain,,Belley,,,Arrondissement,Districts,,,"POLYGON ((5.64833 45.62741, 5.63841 45.62302, ..."
1,FRA.1.1.2_1,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.1_1,Ain,,Bourg-en-Bresse,,,Arrondissement,Districts,,,"POLYGON ((4.90902 45.87411, 4.88383 45.87999, ..."
2,FRA.1.1.3_1,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.1_1,Ain,,Gex,,,Arrondissement,Districts,,,"POLYGON ((5.88999 46.08715, 5.86626 46.08554, ..."
3,FRA.1.1.4_1,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.1_1,Ain,,Nantua,,,Arrondissement,Districts,,,"POLYGON ((5.52754 46.01693, 5.51732 46.02378, ..."
4,FRA.1.2.1_1,FRA,France,FRA.1_1,Auvergne-Rhône-Alpes,,FRA.1.2_1,Allier,,Montluçon,,,Arrondissement,Districts,,,"POLYGON ((3.11242 46.12284, 3.11466 46.12183, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,FRA.13.5.2_1,FRA,France,FRA.13_1,Provence-Alpes-Côte d'Azur,,FRA.13.5_1,Var,,Draguignan,,,Arrondissement,Districts,,,"POLYGON ((6.49681 43.15097, 6.49681 43.1507, 6..."
346,FRA.13.5.3_1,FRA,France,FRA.13_1,Provence-Alpes-Côte d'Azur,,FRA.13.5_1,Var,,Toulon,,,Arrondissement,Districts,,,"MULTIPOLYGON (((6.39708 42.98903, 6.39708 42.9..."
347,FRA.13.6.1_1,FRA,France,FRA.13_1,Provence-Alpes-Côte d'Azur,,FRA.13.6_1,Vaucluse,,Apt,,,Arrondissement,Districts,,,"POLYGON ((5.48808 43.66765, 5.44529 43.6801, 5..."
348,FRA.13.6.2_1,FRA,France,FRA.13_1,Provence-Alpes-Côte d'Azur,,FRA.13.6_1,Vaucluse,,Avignon,,,Arrondissement,Districts,,,"MULTIPOLYGON (((4.81297 43.98801, 4.84094 43.9..."


In [72]:
import os
from collections import defaultdict

folder = "/Users/noamananwaar/Desktop/Projects/python/OSM_LLM/geodata/countries/ARE"

# Group files by number suffix
grouped = defaultdict(list)

# Collect and group files
for filename in os.listdir(folder):
    if filename.startswith("gadm41_ARE_"):
        parts = filename.split("_")
        level_and_ext = parts[-1]  # e.g. "3.shp"
        level = level_and_ext.split(".")[0]
        grouped[level].append(filename)

# Sort levels to get consistent ordering
for new_index, level in enumerate(sorted(grouped.keys())):
    for filename in grouped[level]:
        old_path = os.path.join(folder, filename)
        # Replace prefix and update number
        ext = filename.split(".")[-1]
        new_filename = f"are_{new_index+4}.{ext}"
        new_path = os.path.join(folder, new_filename)
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} → {new_filename}")


Renamed: gadm41_ARE_0.shx → are_4.shx
Renamed: gadm41_ARE_0.shp → are_4.shp
Renamed: gadm41_ARE_0.cpg → are_4.cpg
Renamed: gadm41_ARE_0.dbf → are_4.dbf
Renamed: gadm41_ARE_0.prj → are_4.prj
Renamed: gadm41_ARE_1.shx → are_5.shx
Renamed: gadm41_ARE_1.cpg → are_5.cpg
Renamed: gadm41_ARE_1.shp → are_5.shp
Renamed: gadm41_ARE_1.dbf → are_5.dbf
Renamed: gadm41_ARE_1.prj → are_5.prj
Renamed: gadm41_ARE_2.cpg → are_6.cpg
Renamed: gadm41_ARE_2.shp → are_6.shp
Renamed: gadm41_ARE_2.shx → are_6.shx
Renamed: gadm41_ARE_2.dbf → are_6.dbf
Renamed: gadm41_ARE_2.prj → are_6.prj
Renamed: gadm41_ARE_3.shp → are_7.shp
Renamed: gadm41_ARE_3.cpg → are_7.cpg
Renamed: gadm41_ARE_3.shx → are_7.shx
Renamed: gadm41_ARE_3.dbf → are_7.dbf
Renamed: gadm41_ARE_3.prj → are_7.prj
