# Experiment with geodescriber base on a LLM

## Purpose

In this notebook we will prototype a geodescribres base on a large language model (LLM)
## Setup
### Library import
**Setup software libraries**

In [24]:
import os
import json

import ee
import folium
import overpy
import plotly.offline as pyo
from shapely.geometry import Polygon, box

from streamlit_map.geocoder import Geocoder
from streamlit_map.geodescriber import GeoDescriber
from streamlit_map.visualize import foliumMapGEE, ipyleafletMapGEE, create_stacked_bar
from streamlit_map.processing import ZonalStatistics
from streamlit_map.data import GEEData

### Utils

In [25]:
def _get_bbox(geojson: dict) -> box:
    # Create a Shapely polygon from the coordinates
    poly = Polygon(geojson['geometry']['coordinates'][0])
    # Get the bbox coordinates using the bounds() method
    min_x, min_y, max_x, max_y = poly.bounds
    # Create the box object using the box() function
    shapely_box = box(min_x, min_y, max_x, max_y)

    return shapely_box

### Initial setup

**Initialize Earth Engine**

In [26]:
private_key = json.loads(os.getenv("EE_PRIVATE_KEY"))
ee_credentials  = ee.ServiceAccountCredentials(email=private_key['client_email'], key_data=os.getenv("EE_PRIVATE_KEY"))

In [27]:
ee.Initialize(credentials=ee_credentials)

**Variables**

In [28]:
MAX_ALLOWED_AREA_SIZE = 25.0

In [29]:
datasets = {}
for dataset in ['Global-Land-Cover', 'Koppen-Geiger-Climate']:
    datasets[dataset] = GEEData(dataset)
    
gee_data = datasets['Global-Land-Cover']

## Run demo
### Display data on map

In [44]:
geometry = {'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-16.97428464346484,28.595180785691795],
  [-16.97428464346484,27.98588368113657],
  [-16.10106160818097,27.98588368113657],
  [-16.10106160818097,28.595180785691795],
  [-16.97428464346484,28.595180785691795]]]}}]}

In [45]:
m = ipyleafletMapGEE(geometry=geometry)

for dataset, data in datasets.items():
    m.add_gee_layer(
        image=data.ee_image(),
        sld_interval=data.sld_interval(),
        name=dataset
    )
m

ipyleafletMapGEE(center=[25.0, 55.0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_titl…

### Zonal statistics
**Compute zonal statistics on selected region**

In [46]:
geojson = m.geometry['features'][0]
top = {}
figs = {}
for dataset, data in datasets.items():
    zs = ZonalStatistics(data, MAX_ALLOWED_AREA_SIZE)

    # Call the zs.check_area_and_compute function to get the plotly figure
    stats = zs.check_area_and_compute(geojson=geojson)

    # sort the items from top to bottom and take the top 8 elements
    top_8 = sorted(stats.items(), key=lambda x: x[1], reverse=True)[:8]
    top_8 = {k: v for k, v in top_8}

    top[dataset] = top_8
    
    # Plot figure
    colors = {data.class_names()[key]: item for key, item in
              data.class_colors().items()}

    fig = create_stacked_bar(values=stats, colors=colors)
    
    figs[dataset] = fig

2023-05-08 09:16:42.299 📏  area with size: 0.53 was selected, threshold is: 25.0
2023-05-08 09:16:43.080 [ZonalStatistics]: stats: {'b1': {'10': 1090, '100': 2817, '11': 198, '110': 110, '120': 683, '130': 5277, '150': 1928, '153': 424, '180': 147, '190': 1071, '20': 18, '200': 3189, '201': 594, '210': 45062.63921568623, '30': 576, '40': 2571, '60': 566, '70': 2629, '90': 5}}
2023-05-08 09:16:43.149 📏  area with size: 0.53 was selected, threshold is: 25.0


Stats:  {'Cropland, rainfed': 1.5807264096132745, 'Cropland, rainfed, herbaceous cover': 0.2871411276178242, 'Cropland, irrigated or post-flooding': 0.02610373887434765, 'Mosaic cropland (>50%) / natural vegetation (tree, shrub, herbaceous cover) (<50%)': 0.8353196439791248, 'Mosaic natural vegetation (tree, shrub, herbaceous cover) (>50%) / cropland (<50%)': 3.728484035885989, 'Tree cover, broadleaved, deciduous, closed to open (>15%)': 0.8208175668267095, 'Tree cover, needleleaved, evergreen, closed to open (>15%)': 3.8125960833699986, 'Tree cover, mixed leaf type (broadleaved and needleleaved)': 0.00725103857620768, 'Mosaic tree and shrub (>50%) / herbaceous cover (<50%)': 4.085235133835408, 'Mosaic herbaceous cover (>50%) / tree and shrub (<50%)': 0.159522848676569, 'Shrubland': 0.9904918695099691, 'Grassland': 7.652746113329586, 'Sparse vegetation (tree, shrub, herbaceous cover) (<15%)': 2.7960004749856817, 'Sparse herbaceous cover (<15%)': 0.6148880712624113, 'Shrub or herbaceous

2023-05-08 09:16:43.983 [ZonalStatistics]: stats: {'b1': {'12': 173.8, '13': 9, '4': 132.91764705882355, '5': 156.44705882352943, '6': 10.494117647058824}}


Stats:  {'Hot semi-arid climate': 27.538634036952182, 'Cold semi-arid climate': 32.41359138107542, 'Hot desert climate': 2.1742309754789644, 'Warm-summer Mediterranean climate': 36.00887242236631, 'Cold-summer Mediterranean climate': 1.8646711841271388}


**Display statistics on a figure**

In [13]:
# Display the plotly figure object in the notebook
# Set notebook mode to work in offline
pyo.init_notebook_mode()
for dataset, fig in figs.items():
    pyo.iplot(fig)

### Geocoder
Get the address of the region's centroid

In [47]:
# Geocoder
bbox = _get_bbox(geojson=geojson)

# create Geocoder object
geolocator = Geocoder(user_agent="my-app")

# reverse geocode center point of box to get region and country
center_point = bbox.centroid
region, country = geolocator.reverse_geocode(center_point)

print("Region: ", region)
print("Country: ", country)

Region:  Santa Cruz de Tenerife, Canary Islands
Country:  Spain


### Geodescribe the region with OpenAI API

In [48]:
# Geodescribe the region with OpenAI API
geo_describer = GeoDescriber(model_name="text-davinci-003")
description = geo_describer.generate_description(
    land_cover_per=top['Global-Land-Cover'],
    climate_per=top['Koppen-Geiger-Climate'],
    region_name=region,
    country=country
)

In [49]:
from IPython.display import Markdown, display


display(Markdown(f"""
## Description of the region: 

{description}
"""))


## Description of the region: 

The Santa Cruz de Tenerife region has a warm-summer Mediterranean climate, with hot and dry summers, mild winters and low precipitation overall. The landscape is dominated by water bodies such as the ocean, estuaries, and lagoons. The rest of the region includes grasslands, bare areas, as well as some cover from evergreen trees and a mix of natural and crop vegetation. The socioeconomics of this region are largely focused around the tourism industry, as the warm and pleasant climate makes it an attractive and popular destination. Additionally, fishing and agricultural activities take place in the region, making it a hub for food production.


### Get features of interest with OpenStreetMap's Overpass API.

In [36]:
list(bbox.bounds)

[-16.97428464346484, 27.98588368113657, -16.10106160818097, 28.595180785691795]

In [39]:
bounds = list(bbox.bounds)
bounds = [bounds[1], bounds[0], bounds[3], bounds[2]]
s = ",".join(str(x) for x in bounds)

In [41]:
# create Overpass API query
api = overpy.Overpass()

# define bbox coordinates
#bbox = "27.98,-16.97,28.59,-16.10"
bbox = s

# define query
query = """
    [out:json];
    (
      node["place"="city"]({bbox});
      node["place"="town"]({bbox});
      relation["leisure"="nature_reserve"]({bbox});
    );
    out center;
""".format(bbox=bbox)

# execute query and get results
result = api.query(query)

# print nodes, ways, and relations
for node in result.nodes:
    print("Node: %s (%f, %f)" % (node.tags.get("name", "n/a"), node.lat, node.lon))

for way in result.ways:
    print("Way: %s (%f, %f)" % (way.tags.get("name", "n/a"), way.center_lat, way.center_lon))

for relation in result.relations:
    print("Relation: %s" % (relation.tags.get("name", "n/a")))

Node: Santa Cruz de Tenerife (28.469648, -16.254088)
Node: La Orotava (28.389883, -16.523569)
Node: Puerto de la Cruz (28.415902, -16.553296)
Node: Los Cristianos (28.052630, -16.717000)
Node: Guía de Isora (28.210137, -16.778727)
Node: Adeje (28.121942, -16.724189)
Node: Granadilla de Abona (28.122141, -16.576673)
Node: San Miguel de Abona (28.099205, -16.616791)
Node: Icod de los Vinos (28.368055, -16.717639)
Node: Tegueste (28.523261, -16.340764)
Node: Arona (28.099625, -16.680910)
Node: Buenavista del Norte (28.372118, -16.851256)
Node: Candelaria (28.354671, -16.371009)
Node: Santa Úrsula (28.425309, -16.491739)
Node: La Victoria de Acentejo (28.434786, -16.468175)
Node: Tacoronte (28.480532, -16.413760)
Node: Güímar (28.315019, -16.409950)
Node: Taco (28.446755, -16.299781)
Node: La Cuesta (28.468472, -16.289307)
Node: San Cristóbal de La Laguna (28.485771, -16.315942)
Node: La Matanza de Acentejo (28.450513, -16.452316)
Node: Realejo Alto (28.381474, -16.583850)
Relation: Zona E

In [42]:
result

<overpy.Result at 0x7efde3add300>

In [43]:
result.nodes[0].tags

{'capital': '4',
 'name': 'Santa Cruz de Tenerife',
 'name:ar': 'سانتا كروث دي تينيريفه',
 'name:es': 'Santa Cruz de Tenerife',
 'name:fr': 'Santa Cruz de Ténérife',
 'name:ja': 'サンタ・クルス・デ・テネリフェ',
 'name:ko': '산타크루스데테네리페',
 'name:lt': 'Tenerifės Santa Krusas',
 'name:ru': 'Санта-Крус-де-Тенерифе',
 'name:short': 'Santa Cruz',
 'name:zh': '圣克鲁斯-德特内里费',
 'place': 'city',
 'population': '147140',
 'population:date': '2022',
 'ref:ine': '38038002501',
 'source': 'Instituto Geográfico Nacional',
 'source:date': '2011-06',
 'source:name': 'Nomenclátor Geográfico de Municipios y Entidades de Población',
 'wikidata': 'Q14328',
 'wikimedia_commons': 'Category:Santa Cruz de Tenerife',
 'wikipedia': 'es:Santa Cruz de Tenerife'}