Topics

* LLM calls
* RAG
  * Retrieval
  * Generation
* Query interpretation
* Conversation


Queries building by attribute (name):

        "Tell me about the Zwinger Palace in Dresden",
        "Zwinger",
        "Semperoper",
        "Frauenkirche",
        "Where is the Frauenkirche located?",
        "Show me information on the Semperoper",
        "I want to find the Dresden Castle",
        "What can you tell me about the Military History Museum?",
        "Give me details on the Green Vault",
        "Find the Blue Wonder bridge",
        "Show me the Hygiene Museum",
        "Tell me about the Yenidze building",
        "Locate the Albertinum in Dresden"

Queries building by type:

        "List all museums in Dresden",
        "Find the hospitals in Dresden",
        "Show me art galleries in Dresden",
        "Are there any public libraries in Dresden?",
        "What schools are there in Dresden?",
        "Search for historical buildings in Dresden",
        "Look up churches in Dresden",
        "Which theaters are in Dresden?",
        "What kind of tourist attractions are in Dresden?",
        "Find universities in Dresden"

Advanced queries:

        "Restaurants in Dresden Neustadt" # (poly/poly => intersection, point/poly => contains)
        "Historic buildings in a 2km radius around the Semperoper" # Requires a tool for buffered search
        "Restaurants on the main street"
        "Buildings with a size greater than 100 square meters"

# Part 1: Simple LLM Call

In [1]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key: ··········


In [2]:
!pip install -qU langchain-openai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m61.4/64.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.5/64.5 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [4]:
messages = [
    (
        "system",
        "You are a helpful assistant that return names of places in a given sentence, along with the number of place names.",
    ),
    ("human", "is the capital city of the German state of Saxony and its second most populous city after Leipzig."),
]
ai_msg = llm.invoke(messages)
ai_msg

AIMessage(content='The place names in the sentence are:\n\n1. German\n2. Saxony\n3. Leipzig\n\nTotal number of place names: 3', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 54, 'total_tokens': 83, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'id': 'chatcmpl-BeXlym23N8y2oxe7YF84QjeukBBuM', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--1a494405-2f58-4252-aa68-d7aa8bb5393e-0', usage_metadata={'input_tokens': 54, 'output_tokens': 29, 'total_tokens': 83, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [5]:
print(ai_msg.content)

The place names in the sentence are:

1. German
2. Saxony
3. Leipzig

Total number of place names: 3


# Part 2: Retrieval

In [6]:
!pip -q install chromadb langchain-chroma

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.3/19.3 MB[0m [31m106.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m76.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.6/101.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:

In [7]:
import chromadb
chroma_client = chromadb.HttpClient(host='https://klimakonform-maps.geo.tu-dresden.de/chromadb')
chroma_client.heartbeat()

1749002669212071319

In [8]:
!pip install -qU langchain-huggingface

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m102.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m89.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [9]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_hf = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vector_store_building_with_names = Chroma(
    client=chroma_client,
    collection_name="buildings_with_names",
    embedding_function=embeddings_hf,
    create_collection_if_not_exists=False
)

vector_store_buildings_grouped_by_type = Chroma(
    client=chroma_client,
    collection_name="buildings_grouped_by_type",
    embedding_function=embeddings_hf,
    create_collection_if_not_exists=False
)

## Response

In [11]:
# https://python.langchain.com/docs/integrations/vectorstores/chroma/
response = vector_store_building_with_names.similarity_search("Show me the Hygiene Museum")
response

[Document(id="('relation', 5651)", metadata={'heritage': 'yes', 'internet_access:fee': 'no', 'wikipedia': 'de:Deutsches Hygiene-Museum', 'changing_table': 'yes', 'building:architecture': 'neoclassicism', 'geometry': '{"type": "Polygon", "coordinates": [[[13.7455854, 51.0450922], [13.7459654, 51.0449423], [13.7460484, 51.0449096], [13.7461418, 51.0448728], [13.7467326, 51.0446398], [13.7467446, 51.0446519], [13.7468328, 51.0447402], [13.7468451, 51.0447526], [13.7468679, 51.0447436], [13.7475539, 51.0444731], [13.7475832, 51.0444615], [13.7475561, 51.0444343], [13.7473531, 51.0442308], [13.7473475, 51.0442252], [13.7470317, 51.0443497], [13.7470192, 51.0443372], [13.7470022, 51.0443201], [13.7469718, 51.0443321], [13.7469586, 51.0443188], [13.746949, 51.0443226], [13.7469196, 51.0442932], [13.7468869, 51.0442605], [13.7467879, 51.0441612], [13.7467389, 51.044112], [13.7467484, 51.0441082], [13.7467325, 51.0440922], [13.746756, 51.044083], [13.7467312, 51.0440582], [13.7469297, 51.043979

In [12]:
import json

def create_geojson_feature(response, rank):
    feature = json.loads(response['geometry'])
    geojson = {
        "type": "Feature",
        "properties": feature['properties'],
        "geometry": { "type": "Polygon", "coordinates": feature['coordinates']}
    }
    geojson["properties"]['rank'] = rank
    print(geojson["properties"]['name'], "- rank:", rank)
    return geojson

features = []

for i, document in enumerate(response):
   feature = create_geojson_feature(document.metadata, i+1)
   features.append(feature)


results = {
    "type": "FeatureCollection",
    "features": features
}

Deutsches Hygiene-Museum - rank: 1
IMO Car Wash - rank: 2
Kieferorthopädie am Königswald - rank: 3
Total Wash - rank: 4


In [13]:
import folium

popup = folium.GeoJsonPopup(fields=["name", "rank"])

m = folium.Map([51.05, 13.73], zoom_start=14)

folium.GeoJson(results, popup=popup).add_to(m)

m

## Response with score

In [14]:
response_with_score = vector_store_building_with_names.similarity_search_with_score("Semperoper")
response_with_score

[(Document(id="('way', 242305584)", metadata={'roof:material': 'copper', 'name:es': 'Ópera Semper', 'architect:wikidata': 'Q61342', 'image': 'File:Semperoper at night.jpg', 'building:levels': '2', 'name:be': 'Опера Земпера', 'addr:postcode': '01067', 'name:zh': '森柏歌劇院', 'architect': 'Gottfried Semper', 'roof:shape': 'flat', 'tourism': 'attraction', 'addr:city': 'Dresden', 'source:height': 'Schätzung per Photogrammetrie;http://commons.wikimedia.org/wiki/File:Semperoper_at_night.jpg', 'addr:housenumber': '2', 'heritage': '4', 'amenity': 'theatre', 'phone': '+49 351 49110', 'heritage:operator': 'lfd', 'building:architecture': 'neo-renaissance', 'addr:street': 'Theaterplatz', 'addr:country': 'DE', 'name': 'Semperoper', 'wikipedia': 'de:Semperoper', 'id': "('way', 242305584)", 'contact:youtube': 'https://www.youtube.com/user/semperoperdd', 'contact:facebook': 'https://www.facebook.com/semperoper', 'name:ru': 'Опера Земпера', 'wikidata': 'Q127097', 'building_description': 'yes', 'building:ma

In [15]:
import json

def create_geojson_feature(response, rank, score):
    feature = json.loads(response['geometry'])
    geojson = {
        "type": "Feature",
        "properties": feature['properties'],
        "geometry": { "type": "Polygon", "coordinates": feature['coordinates']}
    }
    geojson["properties"]['rank'] = rank
    geojson["properties"]['score'] = score
    print(geojson["properties"]['name'], "; rank:", rank, "; score:", score)
    return geojson

features = []

for i, item in enumerate(response_with_score):
   document, score  = item
   feature = create_geojson_feature(document.metadata, i+1, score)
   features.append(feature)


results = {
    "type": "FeatureCollection",
    "features": features
}

Semperoper ; rank: 1 ; score: 0.98220587
Sempa Systems GmbH ; rank: 2 ; score: 1.1242414
Semper Zwei ; rank: 3 ; score: 1.2290049
Semper 2 (Probebühne) ; rank: 4 ; score: 1.2389565


## Response using MMR

MMR (Maximum Margin Relevance): select search targets that are relevant to a query **and** diverse among each other

In [16]:
#https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.max_marginal_relevance_search
response_with_mmr = vector_store_building_with_names.max_marginal_relevance_search("Semperoper")
response_with_mmr

[Document(id="('way', 242305584)", metadata={'geometry': '{"type": "Polygon", "coordinates": [[[13.7346417, 51.054768], [13.7346713, 51.0547806], [13.7346683, 51.0547836], [13.7350268, 51.0549319], [13.7350653, 51.0548945], [13.7350989, 51.0548641], [13.7351417, 51.0548808], [13.7351717, 51.0548939], [13.7351686, 51.054896], [13.7351887, 51.0549042], [13.7351864, 51.0549065], [13.7351981, 51.0549114], [13.7352007, 51.054909], [13.735206, 51.0549114], [13.7352029, 51.0549138], [13.7352143, 51.0549192], [13.7352179, 51.0549168], [13.7352419, 51.0549271], [13.7352402, 51.0549296], [13.7352524, 51.0549348], [13.7352546, 51.0549328], [13.7352594, 51.0549348], [13.7352574, 51.0549367], [13.7352693, 51.0549415], [13.7352713, 51.05494], [13.7352916, 51.0549476], [13.735352, 51.0548902], [13.7353478, 51.0548883], [13.7353654, 51.0548708], [13.7353699, 51.0548726], [13.7353775, 51.054866], [13.7353725, 51.0548636], [13.7353933, 51.0548441], [13.7353969, 51.0548457], [13.7354056, 51.0548377], [13

In [17]:
import json

def create_geojson_feature(response, rank):
    feature = json.loads(response['geometry'])
    geojson = {
        "type": "Feature",
        "properties": feature['properties'],
        "geometry": { "type": "Polygon", "coordinates": feature['coordinates']}
    }
    geojson["properties"]['rank'] = rank
    print(geojson["properties"]['name'], "- rank:", rank)
    return geojson

features = []

for i, document in enumerate(response_with_mmr):
   feature = create_geojson_feature(document.metadata, i+1)
   features.append(feature)


results = {
    "type": "FeatureCollection",
    "features": features
}

Semperoper - rank: 1
Sempa Systems GmbH - rank: 2
Semper-Schulen - rank: 3
Esso Snack & Shop - rank: 4


In [18]:
import folium

popup = folium.GeoJsonPopup(fields=["name", "rank"])

m = folium.Map([51.05, 13.73], zoom_start=14)

folium.GeoJson(results, popup=popup).add_to(m)

m

# Part 3: Generation

In [19]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [20]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x7ae3f7c22810>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x7ae3ec175cd0>, root_client=<openai.OpenAI object at 0x7ae3ec160c90>, root_async_client=<openai.AsyncOpenAI object at 0x7ae3ec175a50>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [21]:
chain = prompt | llm
chain

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x7ae3f7c22810>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x7ae3ec175cd0>, root_client=<openai.OpenAI object at 0x7ae3ec160c90>, root_async_client=<openai.AsyncOpenAI object at 0x7ae3ec175a50>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [22]:
query = "Show me the Hygiene Museum"

docs = response

chain.invoke({"context": docs, "question": query})

AIMessage(content='The Deutsches Hygiene-Museum, also known as the German Hygiene Museum, is located in Dresden, Germany. Here are the details:\n\n- **Name**: Deutsches Hygiene-Museum\n- **Address**: Lingnerplatz 1, 01069 Dresden, DE\n- **Website**: [www.dhmd.de](https://www.dhmd.de)\n- **Email**: service@dhmd.de\n- **Phone**: +49 351 4846400\n- **Opening Hours**: Tuesday to Sunday, 10:00 AM - 6:00 PM\n- **Architect**: Wilhelm Kreis\n- **Building Architecture**: Neoclassicism\n- **Heritage**: Yes\n- **Wheelchair Accessible**: Yes\n- **Changing Tables**: Yes (located in both female and male toilets)\n- **Toilets**: Yes (wheelchair accessible)\n- **Fee**: Yes\n- **Image**: ![Deutsches Hygiene-Museum](https://commons.wikimedia.org/wiki/File%3ADeutsches_Hygienemuseum%2C_mit_Plastik_Ballwerfer_von_Richard_Daniel_Fabricius-9716.jpg)\n\nFor more information, you can visit their [Wikipedia page](https://de.wikipedia.org/wiki/Deutsches_Hygiene-Museum).', additional_kwargs={'refusal': None}, res

RAG formatted

In [23]:
chain.invoke({"context": docs, "question": query}).content

'The Deutsches Hygiene-Museum, also known as the German Hygiene Museum, is located in Dresden, Germany. Here are the details:\n\n- **Name**: Deutsches Hygiene-Museum\n- **Address**: Lingnerplatz 1, 01069 Dresden, DE\n- **Website**: [www.dhmd.de](https://www.dhmd.de)\n- **Email**: service@dhmd.de\n- **Phone**: +49 351 4846400\n- **Opening Hours**: Tuesday to Sunday, 10:00 AM - 6:00 PM\n- **Architect**: Wilhelm Kreis\n- **Building Architecture**: Neoclassicism\n- **Heritage**: Yes\n- **Wheelchair Accessible**: Yes\n- **Changing Tables**: Yes (located in both female and male toilets)\n- **Toilets**: Yes (wheelchair accessible)\n- **Fee**: Yes\n- **Image**: ![Deutsches Hygiene-Museum](https://commons.wikimedia.org/wiki/File%3ADeutsches_Hygienemuseum%2C_mit_Plastik_Ballwerfer_von_Richard_Daniel_Fabricius-9716.jpg)\n\nFor more information, you can visit their [Wikipedia page](https://de.wikipedia.org/wiki/Deutsches_Hygiene-Museum).'

Without RAG - as a baseline

In [24]:
llm.invoke(query).content

'The Hygiene Museum, known as the "Deutsches Hygiene-Museum," is located in Dresden, Germany. It was founded in 1912 and is renowned for its exhibitions on health, hygiene, and the human body. The museum aims to educate the public about various aspects of health, wellness, and the science of hygiene through interactive displays, historical artifacts, and contemporary art.\n\nOne of the museum\'s most famous exhibits is the "Human Body" exhibition, which features a life-sized model of a human body that allows visitors to explore the anatomy and functions of different systems. The museum also hosts temporary exhibitions on a wide range of topics related to health, society, and culture.\n\nIn addition to its exhibitions, the Hygiene Museum offers educational programs, workshops, and events aimed at promoting health awareness and understanding.\n\nIf you\'re interested in visiting, it\'s a good idea to check the museum\'s official website for current exhibitions, opening hours, and any spe

# Part 4: Query interpretation/Geocoding

Geocoding through prompt template

In [25]:
import os

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

from langchain_openai import ChatOpenAI
from langchain import PromptTemplate

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

LocationExtraction_template = """
You are a virtual assistant that extracts entities from the provided text.
USER: Text: {input_text}
ASSISTANT: I’ve read the text.
USER: What are all {entity_type} mentioned in the text?
ASSISTANT: Return the model's prediction formatted as geojson. For every {entity_type} found, mention also the coordinates in decimal degrees.
"""
#The geojson should start with square brackets

prompt_template = PromptTemplate(
    input_variables=["input_text", "entity_type"],
    template=LocationExtraction_template
)

chain = prompt_template | llm

example_text = """
Dresden is the capital city of the German state of Saxony and its second most populous city after Leipzig. It is the 12th most populous city of Germany,
the fourth largest by area (after Berlin, Hamburg, and Cologne), and the third-most populous city in the area of former East Germany, after Berlin and Leipzig.
Dresden's urban area comprises the towns of Freital, Pirna, Radebeul, Meissen, Coswig, Radeberg, and Heidenau and has around 790,000 inhabitants.
The Dresden metropolitan area has approximately 1.34 million inhabitants.
Dresden is the second largest city on the River Elbe after Hamburg.
Most of the city's population lives in the Elbe Valley, but a large, albeit very sparsely populated, area of the city east of the Elbe lies in the West Lusatian Hill Country and Uplands (the westernmost part of the Sudetes) and thus in Lusatia.
Many boroughs west of the Elbe lie in the Ore Mountain Foreland, as well as in the valleys of the rivers rising there and flowing through Dresden,
the longest of which are the Weißeritz and the Lockwitzbach. The name of the city as well as the names of most of its boroughs and rivers are of Sorbian origin.
"""

chain.invoke({"input_text": example_text,  "entity_type": "place name"}).content # Alternative: place, location, locations, place names

'```json\n{\n  "type": "FeatureCollection",\n  "features": [\n    {\n      "type": "Feature",\n      "properties": {\n        "name": "Dresden",\n        "description": "Capital city of the German state of Saxony."\n      },\n      "geometry": {\n        "type": "Point",\n        "coordinates": [13.7373, 51.0504]\n      }\n    },\n    {\n      "type": "Feature",\n      "properties": {\n        "name": "Saxony",\n        "description": "German state."\n      },\n      "geometry": {\n        "type": "Point",\n        "coordinates": [13.7333, 51.0500]\n      }\n    },\n    {\n      "type": "Feature",\n      "properties": {\n        "name": "Leipzig",\n        "description": "Second most populous city in Saxony."\n      },\n      "geometry": {\n        "type": "Point",\n        "coordinates": [12.3731, 51.3397]\n      }\n    },\n    {\n      "type": "Feature",\n      "properties": {\n        "name": "Berlin",\n        "description": "Largest city in Germany."\n      },\n      "geometry": {

In [26]:
import os

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

from langchain_openai import ChatOpenAI
from langchain import PromptTemplate

llm_w_so = ChatOpenAI(model="gpt-4o-mini", temperature=0).with_structured_output(method="json_mode")

LocationExtraction_template = """
You are a virtual assistant that extracts entities from the provided text.
USER: Text: {input_text}
ASSISTANT: I’ve read the text.
USER: What are all {entity_type} mentioned in the text?
ASSISTANT: Return the model's prediction formatted as geojson. For every {entity_type} found, mention also the coordinates in decimal degrees.
"""
#The geojson should start with square brackets

prompt_template = PromptTemplate(
    input_variables=["input_text", "entity_type"],
    template=LocationExtraction_template
)

chain = prompt_template | llm_w_so

example_text = """
Dresden is the capital city of the German state of Saxony and its second most populous city after Leipzig. It is the 12th most populous city of Germany,
the fourth largest by area (after Berlin, Hamburg, and Cologne), and the third-most populous city in the area of former East Germany, after Berlin and Leipzig.
Dresden's urban area comprises the towns of Freital, Pirna, Radebeul, Meissen, Coswig, Radeberg, and Heidenau and has around 790,000 inhabitants.
The Dresden metropolitan area has approximately 1.34 million inhabitants.
Dresden is the second largest city on the River Elbe after Hamburg.
Most of the city's population lives in the Elbe Valley, but a large, albeit very sparsely populated, area of the city east of the Elbe lies in the West Lusatian Hill Country and Uplands (the westernmost part of the Sudetes) and thus in Lusatia.
Many boroughs west of the Elbe lie in the Ore Mountain Foreland, as well as in the valleys of the rivers rising there and flowing through Dresden,
the longest of which are the Weißeritz and the Lockwitzbach. The name of the city as well as the names of most of its boroughs and rivers are of Sorbian origin.
"""

chain.invoke({"input_text": example_text,  "entity_type": "place name"}) # Alternative: place, location, locations, place names

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'name': 'Dresden',
    'description': 'Capital city of the German state of Saxony.'},
   'geometry': {'type': 'Point', 'coordinates': [13.7373, 51.0504]}},
  {'type': 'Feature',
   'properties': {'name': 'Saxony', 'description': 'German state.'},
   'geometry': {'type': 'Point', 'coordinates': [13.7333, 51.05]}},
  {'type': 'Feature',
   'properties': {'name': 'Leipzig',
    'description': 'Second most populous city in Saxony.'},
   'geometry': {'type': 'Point', 'coordinates': [12.3731, 51.3397]}},
  {'type': 'Feature',
   'properties': {'name': 'Berlin', 'description': 'Largest city in Germany.'},
   'geometry': {'type': 'Point', 'coordinates': [13.405, 52.52]}},
  {'type': 'Feature',
   'properties': {'name': 'Hamburg', 'description': 'City on the River Elbe.'},
   'geometry': {'type': 'Point', 'coordinates': [9.9937, 53.5511]}},
  {'type': 'Feature',
   'properties': {'name': 'Cologne', 'description': 

In [27]:
query = "Semperoper"

places = chain.invoke({"input_text": query,  "entity_type": "place name"}) # Alternative: place, location, locations, place names
places

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'name': 'Semperoper'},
   'geometry': {'type': 'Point', 'coordinates': [13.7333, 51.0509]}}]}

In [28]:
import folium

popup = folium.GeoJsonPopup(fields=["name"])

m = folium.Map([51.05, 13.73], zoom_start=14)

folium.GeoJson(places, popup=popup).add_to(m)

m

Next step: Compute the Q-Footprint from the P-Footprints

# Part 5: Conversation

In [29]:
#https://python.langchain.com/v0.2/docs/how_to/qa_chat_history_how_to/

from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferMemory

model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

chat_chain = ConversationChain(
    llm=model,
    memory = ConversationBufferMemory()
)
chat_chain("Hi. My name is Roshan. I need some help")

chat_chain("Can you tell me what my name is?")

  memory = ConversationBufferMemory()
  chat_chain = ConversationChain(
  chat_chain("Hi. My name is Roshan. I need some help")


{'input': 'Can you tell me what my name is?',
 'history': "Human: Hi. My name is Roshan. I need some help\nAI: Hello, Roshan! I'm glad you reached out. What do you need help with today?",
 'response': "Your name is Roshan! It's nice to meet you. Is there anything specific you'd like to talk about or any questions you have?"}

In [30]:
!pip -q install chromadb langchain-chroma langchain_community

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [31]:
import chromadb
chroma_client = chromadb.HttpClient(host='https://klimakonform-maps.geo.tu-dresden.de/chromadb')
chroma_client.heartbeat()

1749002921090003129

In [32]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vector_store_building_with_names = Chroma(
    client=chroma_client,
    collection_name="buildings_with_names",
    embedding_function=embeddings_hf,
    create_collection_if_not_exists=False

)

vector_store_buildings_grouped_by_type = Chroma(
    client=chroma_client,
    collection_name="buildings_grouped_by_type",
    embedding_function=embeddings_hf,
    create_collection_if_not_exists=False

)

retriever = vector_store_building_with_names.as_retriever()

In [33]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [34]:
contextualize_q_prompt.invoke({
       "chat_history": [("human", "what's 5 + 2"), ("ai", "5 + 2 is 7")],
       "input": "now multiply that by 4"})

ChatPromptValue(messages=[SystemMessage(content='Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.', additional_kwargs={}, response_metadata={}), HumanMessage(content="what's 5 + 2", additional_kwargs={}, response_metadata={}), AIMessage(content='5 + 2 is 7', additional_kwargs={}, response_metadata={}), HumanMessage(content='now multiply that by 4', additional_kwargs={}, response_metadata={})])

In [35]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt)

In [36]:
# https://python.langchain.com/v0.2/docs/how_to/qa_chat_history_how_to/

from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

retriever = vector_store_building_with_names.as_retriever()

### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt)

### Answer question ###
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [37]:
conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}  # constructs a key "abc123" in `store`.
    },
)["answer"]

'Task decomposition is the process of breaking down a complex task into smaller, more manageable sub-tasks or components. This approach helps in organizing work, making it easier to understand, assign, and execute each part effectively. It is commonly used in project management, software development, and problem-solving to enhance efficiency and clarity.'

In [38]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'Common ways of task decomposition include creating a hierarchical structure, such as a work breakdown structure (WBS), where tasks are divided into levels of detail. Another method is using flowcharts or diagrams to visualize the sequence and relationships between tasks. Additionally, agile methodologies often employ user stories and sprints to break down tasks into smaller, iterative components.'

# Outlook

Not covered, but probably useful


*   https://python.langchain.com/docs/how_to/ensemble_retriever/
*   Listeneintrag

