From c5a4e789464af73567f9049eb7426a1614df065b Mon Sep 17 00:00:00 2001 From: Robert Ziegltrum Date: Sun, 23 Mar 2025 16:36:36 +0100 Subject: [PATCH 1/5] add maps_display, maps_draw --- streamlit/view_groups.py | 19 +++ streamlit/views/maps_display.py | 131 ++++++++++++++++++++ streamlit/views/maps_draw.py | 204 ++++++++++++++++++++++++++++++++ 3 files changed, 354 insertions(+) create mode 100644 streamlit/views/maps_display.py create mode 100644 streamlit/views/maps_draw.py diff --git a/streamlit/view_groups.py b/streamlit/view_groups.py index 53c884b..6a02024 100644 --- a/streamlit/view_groups.py +++ b/streamlit/view_groups.py @@ -105,6 +105,25 @@ }, ], }, + + { + "title": "Geo Visualization", + "views": [ + { + "label": "Display Geo Input", + "help": "Display geo information on a map.", + "page": "views/maps_display.py", + "icon": ":material/globe:", + }, + { + "label": "User Geo Input", + "help": "Enable users to select own geo input.", + "page": "views/maps_draw.py", + "icon": ":material/pin_drop:", + }, + ], + }, + { "title": "Unity Catalog", "views": [ diff --git a/streamlit/views/maps_display.py b/streamlit/views/maps_display.py new file mode 100644 index 0000000..783dbe6 --- /dev/null +++ b/streamlit/views/maps_display.py @@ -0,0 +1,131 @@ + +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +import numpy as np +import pandas as pd + + +st.header("Geo Visualization", divider=True) +st.subheader("Read a table and display as a map") +st.write("This receipt loads a table from a delta table and displays the data on a map.") + +cfg = Config() + +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + + +def read_table(table_name, conn): + with conn.cursor() as cursor: + query = f"SELECT * FROM {table_name}" + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + +cities = [ + {"name": "New York", "latitude": 40.7128, "longitude": -74.0060}, + {"name": "Los Angeles", "latitude": 34.0522, "longitude": -118.2437}, + {"name": "London", "latitude": 51.5074, "longitude": -0.1278}, + {"name": "Tokyo", "latitude": 35.6895, "longitude": 139.6917}, + {"name": "Sydney", "latitude": -33.8688, "longitude": 151.2093}, + {"name": "Paris", "latitude": 48.8566, "longitude": 2.3522}, + {"name": "Dubai", "latitude": 25.276987, "longitude": 55.296249}, + {"name": "Rio de Janeiro", "latitude": -22.9068, "longitude": -43.1729}, + {"name": "Moscow", "latitude": 55.7558, "longitude": 37.6173}, + {"name": "Cape Town", "latitude": -33.9249, "longitude": 18.4241} +] + +data = pd.DataFrame(cities) + + +tab_a, tab_b, tab_c, tab_d = st.tabs(["**Try it**","**Try it with a delta table**", "**Code snippet**", "**Requirements**"]) + +with tab_a: + col1, col2 = st.columns(2) + with col1: + if st.button("Try It with a random sample"): + st.map(data, latitude="Latitude", longitude="Longitude") + st.dataframe(data) + + +with tab_b: + col1, col2 = st.columns(2) + with col1: + http_path_input = st.text_input( + "Enter your Databricks HTTP Path:", placeholder="/sql/1.0/warehouses/xxxxxx" + ) + + table_name = st.text_input( + "Specify a Unity Catalog table name:", placeholder="catalog.schema.table" + ) + st.info("For displaying a sample, please use the table samples.accuweather.forecast_daily_calendar_metric") + + if http_path_input and table_name: + conn = get_connection(http_path_input) + df = read_table(table_name, conn) + + st.dataframe(df) + + if 'latitude' in df.columns and 'longitude' in df.columns: + df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce') + df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce') + df = df.dropna(subset=['latitude', 'longitude']) + + if not df.empty: + st.map(df, latitude="latitude", longitude="longitude") + else: + st.warning("no longitude, latitude found in the table") + + +table = [ + { + "type": "Get Tables", + "param": "Get long lat from the tables", + "description": "Get long lat from the tables.", + "code": """ + ```python + def read_table(table_name, conn): + with conn.cursor() as cursor: + query = f"SELECT * FROM {table_name} LIMIT 1000" + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + ``` + """, + }, + { + "type": "Display Maps", + "param": "Display pandas df as maü", + "description": "Display the streamlit map", + "code": """ + ```python + conn = get_connection(http_path_input) + df = read_table(table_name, conn) + + st.dataframe(df) + + if 'latitude' in df.columns and 'longitude' in df.columns: + df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce') + df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce') + df = df.dropna(subset=['latitude', 'longitude']) + + if not df.empty: + st.map(df, latitude="latitude", longitude="longitude") + else: + st.warning("no longitude, latitude found in the table") + + + ``` + """, + }, +] + +with tab_c: + for i, row in enumerate(table): + with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)): + st.markdown(f"**Description**: {row['description']}") + st.markdown(row["code"]) diff --git a/streamlit/views/maps_draw.py b/streamlit/views/maps_draw.py new file mode 100644 index 0000000..6626691 --- /dev/null +++ b/streamlit/views/maps_draw.py @@ -0,0 +1,204 @@ + +import streamlit as st +from streamlit_folium import st_folium +import folium +from folium.plugins import Draw + + + +st.header("Collect user geo input", divider=True) +st.subheader("Enable user to select geo input") +st.write("This receipt enables users to pick geo points or draw geofences to futher be used.") + +tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) + +with tab_a: + choice=st.selectbox("### Select an option",['Points', 'Geofences','Polyline', 'Rectangle','Circle']) + + col1, col2 = st.columns(2) + + with col1: + + st.write("## Select a map input") + m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": True if choice=='Polyline' else False, + "rectangle": True if choice=='Rectangle' else False, + "circle": True if choice=='Circle' else False, + "marker": True if choice=='Points' else False , + "circlemarker": False, + "polygon": True if choice=='Geofences' else False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + + st.write("For usage of the app, select a point on the map.") + with st.expander("Click to see the last active selected map input", expanded=False): + if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: + st.json(output["last_active_drawing"]["geometry"]) + + +table = [ + { + "type": "User Input for points", + "param": "", + "description": "User input for long, lat.", + "code": """ + ```python + m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": False, + "rectangle": False, + "circle": False, + "marker": True, + "circlemarker": False, + "polygon":False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + ``` + """, + }, + { + "type": "User input for geofences", + "param": "", + "description": "Enable users to draw geo fences", + "code": """ + ```python + m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": False, + "rectangle": False, + "circle": False, + "marker": False, + "circlemarker": False, + "polygon":True, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + ``` + """, + }, + { + "type": "User input for polylines", + "param": "", + "description": "Enable users to draw polylines", + "code": """ + ```python + m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": True, + "rectangle": False, + "circle": False, + "marker": False, + "circlemarker": False, + "polygon":False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + ``` + """, + }, + { + "type": "User input for rectangles", + "param": "", + "description": "Enable users to draw rectangles", + "code": """ + ```python + m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": False, + "rectangle": True, + "circle": False, + "marker": False, + "circlemarker": False, + "polygon":False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + ``` + """, + }, + { + "type": "User input for circles", + "param": "", + "description": "Enable users to draw circles", + "code": """ + ```python + m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": False, + "rectangle": False, + "circle": True, + "marker": False, + "circlemarker": False, + "polygon":False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + ``` + """, + }, + { + "type": "User input for markers", + "param": "", + "description": "Enable users to draw markers", + "code": """ + ```python + m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": False, + "rectangle": False, + "circle": False, + "marker": True, + "circlemarker": False, + "polygon":False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + ``` + """, + }, + { + "type": "Output user input", + "param": "", + "description": "Display user input as json.", + "code": """ + ```python + st.write("For usage of the app, select a point on the map.") + with st.expander("Click to see the last active selected map input", expanded=False): + if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: + st.json(output["last_active_drawing"]["geometry"]) + + ``` + """, + }, + +] + +with tab_b: + for i, row in enumerate(table): + with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)): + st.markdown(f"**Description**: {row['description']}") + st.markdown(row["code"]) From d1f4c08d664435658cd545843eb8705b9f63cde4 Mon Sep 17 00:00:00 2001 From: Robert-Ziegltrum Date: Sun, 23 Mar 2025 17:23:02 +0100 Subject: [PATCH 2/5] Update maps_display.py minor change, remove the no longer needed numpy import --- streamlit/views/maps_display.py | 1 - 1 file changed, 1 deletion(-) diff --git a/streamlit/views/maps_display.py b/streamlit/views/maps_display.py index 783dbe6..571817d 100644 --- a/streamlit/views/maps_display.py +++ b/streamlit/views/maps_display.py @@ -2,7 +2,6 @@ import streamlit as st from databricks import sql from databricks.sdk.core import Config -import numpy as np import pandas as pd From d5def605291851fa771393dfa85973f11aaa839f Mon Sep 17 00:00:00 2001 From: Robert Ziegltrum Date: Mon, 24 Mar 2025 12:39:26 +0100 Subject: [PATCH 3/5] Add list alerts --- streamlit/view_groups.py | 23 +++++++++++----------- streamlit/views/alerts.py | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) create mode 100644 streamlit/views/alerts.py diff --git a/streamlit/view_groups.py b/streamlit/view_groups.py index 6a02024..c7cf253 100644 --- a/streamlit/view_groups.py +++ b/streamlit/view_groups.py @@ -106,6 +106,17 @@ ], }, + { + "title": "Alerts", + "views": [ + { + "label": "List alerts", + "help": "List all alerts.", + "page": "views/alerts.py", + "icon": ":material/notifications_none:", + }, + ], + }, { "title": "Geo Visualization", "views": [ @@ -152,16 +163,4 @@ }, ], }, - - { - "title": "Unity Catalog", - "views": [ - { - "label": "Get Catalogs", - "help": "Get meta data.", - "page": "views/unity_catalog_get.py", - "icon": ":material/lan:", - }, - ], - } ] diff --git a/streamlit/views/alerts.py b/streamlit/views/alerts.py new file mode 100644 index 0000000..b68fc25 --- /dev/null +++ b/streamlit/views/alerts.py @@ -0,0 +1,41 @@ +import streamlit as st +from databricks.sdk import WorkspaceClient +import pandas as pd + +w = WorkspaceClient() + +def get_all_alerts(): + alerts = [alert.__dict__ for alert in w.alerts.list()] + return pd.DataFrame(alerts) + +st.header("List alert", divider=True) +st.subheader("Enable users to display all alerts") +st.write("This receipt enables users to display all alerts.") + +tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) + +with tab_a: + st.write("""### See all alerts""") + st.dataframe(get_all_alerts()) + +table = [ + { + "type": "Get alerts", + "param": "", + "description": "Get all alerts", + "code": """ + ```python + def get_all_alerts(): + alerts = [alert.__dict__ for alert in w.alerts.list()] + return pd.DataFrame(alerts) + ``` + """, + }, +] + + +with tab_b: + for i, row in enumerate(table): + with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)): + st.markdown(f"**Description**: {row['description']}") + st.markdown(row["code"]) From 82ae1db950d4afd7a9efdad3034affc533bf996e Mon Sep 17 00:00:00 2001 From: Pascal Vogel <100202393+pbv0@users.noreply.github.com> Date: Wed, 12 Nov 2025 20:59:18 +0100 Subject: [PATCH 4/5] Minor changes to maps + add charts visualizations + add docs --- .../streamlit/visualizations/_category_.json | 9 ++ .../visualizations/visualizations_charts.mdx | 131 ++++++++++++++++++ .../visualizations/visualizations_map.mdx | 112 +++++++++++++++ 3 files changed, 252 insertions(+) create mode 100644 docs/docs/streamlit/visualizations/_category_.json create mode 100644 docs/docs/streamlit/visualizations/visualizations_charts.mdx create mode 100644 docs/docs/streamlit/visualizations/visualizations_map.mdx diff --git a/docs/docs/streamlit/visualizations/_category_.json b/docs/docs/streamlit/visualizations/_category_.json new file mode 100644 index 0000000..3fe64c2 --- /dev/null +++ b/docs/docs/streamlit/visualizations/_category_.json @@ -0,0 +1,9 @@ +{ + "label": "Visualizations", + "position": 5, + "link": { + "type": "generated-index", + "description": "Display data and collect user input using charts and maps." + } +} + diff --git a/docs/docs/streamlit/visualizations/visualizations_charts.mdx b/docs/docs/streamlit/visualizations/visualizations_charts.mdx new file mode 100644 index 0000000..138b509 --- /dev/null +++ b/docs/docs/streamlit/visualizations/visualizations_charts.mdx @@ -0,0 +1,131 @@ +--- +sidebar_position: 1 +--- + +# Charts + +Use this recipe to visualize data using Streamlit's built-in chart components: area charts, line charts, and bar charts. This example demonstrates loading data from a Unity Catalog table and creating various business insights through different chart visualizations. + +## Code snippet + +### Load data from a table + +```python title="app.py" +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name} LIMIT 1000") + return cursor.fetchall_arrow().to_pandas() + +# Get data +warehouse_name = "your_warehouse_name" +table_name = "samples.nyctaxi.trips" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Process datetime columns +df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"]) +df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"]) +df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour +df["trip_duration_minutes"] = (df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]).dt.total_seconds() / 60 +``` + +### Demand analysis: Trips by hour + +```python title="app.py" +import streamlit as st + +# Count trips by hour to understand demand patterns +hourly_demand = df["pickup_hour"].value_counts().sort_index() +st.bar_chart(hourly_demand) + +peak_hour = hourly_demand.idxmax() +st.info(f"Peak demand hour: {peak_hour}:00 with {hourly_demand.max()} trips") +``` + +### Revenue analysis: Average fare by hour + +```python title="app.py" +import streamlit as st + +# Analyze when fares are highest +avg_fare_by_hour = df.groupby("pickup_hour")["fare_amount"].mean() +st.line_chart(avg_fare_by_hour) + +best_hour = avg_fare_by_hour.idxmax() +st.success(f"Best earning hour: {best_hour}:00") +``` + +### Location analysis: Top pickup zones + +```python title="app.py" +import streamlit as st + +# Identify high-demand pickup locations +top_pickups = df["pickup_zip"].value_counts().head(15) +st.bar_chart(top_pickups) +``` + +### Cumulative revenue over time + +```python title="app.py" +import streamlit as st + +# Track total revenue accumulation +revenue_df = df.set_index("tpep_pickup_datetime")[["fare_amount"]].sort_index() +revenue_df["cumulative_revenue"] = revenue_df["fare_amount"].cumsum() +st.area_chart(revenue_df["cumulative_revenue"]) +``` + +## Resources + +- [SQL warehouse](https://docs.databricks.com/aws/en/compute/sql-warehouse/) +- [Unity Catalog table](https://docs.databricks.com/aws/en/tables/) + +## Permissions + +Your [app service principal](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/#how-does-databricks-apps-manage-authorization) needs the following permissions: + +- `CAN USE` on the SQL warehouse +- `SELECT` on the Unity Catalog table + +See Unity [Catalog privileges and securable objects](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/privileges) for more information. + +## Dependencies + +- [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` +- [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` +- [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` +- [Pandas](https://pypi.org/project/pandas/) - `pandas` + +```python title="requirements.txt" +streamlit +databricks-sdk +databricks-sql-connector +pandas +``` + diff --git a/docs/docs/streamlit/visualizations/visualizations_map.mdx b/docs/docs/streamlit/visualizations/visualizations_map.mdx new file mode 100644 index 0000000..bd23c60 --- /dev/null +++ b/docs/docs/streamlit/visualizations/visualizations_map.mdx @@ -0,0 +1,112 @@ +--- +sidebar_position: 2 +--- + +# Map display and interaction + +This recipe enables you to display geographic data on a map and collect user geo input through interactive map drawing. You can load location data from Unity Catalog tables or use the drawing tools to capture points, polygons, and geofences from users. + +## Code snippet + +### Display geo data from a table + +```python title="app.py" +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name}") + return cursor.fetchall_arrow().to_pandas() + +# Get data and display on map +warehouse_name = "your_warehouse_name" +table_name = "samples.accuweather.forecast_daily_calendar_metric" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Display map with latitude/longitude columns +st.map(df, latitude="latitude", longitude="longitude") +``` + +### Collect user geo input + +```python title="app.py" +import streamlit as st +from streamlit_folium import st_folium +import folium +from folium.plugins import Draw + +# Create a map centered on a location +m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) + +# Enable drawing tools (set True for the tools you want to enable) +draw = Draw( + draw_options={ + "marker": True, # For collecting points + "polygon": True, # For collecting geofences/polygons + "polyline": True, # For collecting polylines + "rectangle": True, # For collecting rectangles + "circle": True, # For collecting circles + "circlemarker": False, + }, + edit_options={"edit": True}, +) +draw.add_to(m) +output = st_folium(m, width=700, height=500) + +# Access the drawn geometry +if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: + geometry = output["last_active_drawing"]["geometry"] + st.json(geometry) +``` + +## Resources + +- [SQL warehouse](https://docs.databricks.com/aws/en/compute/sql-warehouse/) _(optional, only for reading table data)_ +- [Unity Catalog table](https://docs.databricks.com/aws/en/tables/) _(optional, only for reading table data)_ + +## Permissions + +Your [app service principal](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/#how-does-databricks-apps-manage-authorization) needs the following permissions: + +- `CAN USE` on the SQL warehouse _(only required if reading data from tables)_ +- `SELECT` on the Unity Catalog table _(only required if reading data from tables)_ + +See Unity [Catalog privileges and securable objects](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/privileges) for more information. + +## Dependencies + +- [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` +- [Streamlit Folium](https://pypi.org/project/streamlit-folium/) - `streamlit-folium` +- [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` _(for table data)_ +- [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` _(for table data)_ + +```python title="requirements.txt" +streamlit +streamlit-folium +databricks-sdk +databricks-sql-connector +``` + From e61047d96008609561c454c6a4f1a6f1ffea329b Mon Sep 17 00:00:00 2001 From: Pascal Vogel <100202393+pbv0@users.noreply.github.com> Date: Wed, 12 Nov 2025 21:05:00 +0100 Subject: [PATCH 5/5] Remove alerts --- streamlit/requirements.txt | 1 + streamlit/view_groups.py | 47 ++- streamlit/views/alerts.py | 41 --- streamlit/views/maps_display.py | 130 -------- streamlit/views/maps_draw.py | 204 ------------- streamlit/views/visualizations_charts.py | 366 +++++++++++++++++++++++ streamlit/views/visualizations_map.py | 252 ++++++++++++++++ 7 files changed, 636 insertions(+), 405 deletions(-) delete mode 100644 streamlit/views/alerts.py delete mode 100644 streamlit/views/maps_display.py delete mode 100644 streamlit/views/maps_draw.py create mode 100644 streamlit/views/visualizations_charts.py create mode 100644 streamlit/views/visualizations_map.py diff --git a/streamlit/requirements.txt b/streamlit/requirements.txt index a292a30..09e7cf8 100644 --- a/streamlit/requirements.txt +++ b/streamlit/requirements.txt @@ -3,3 +3,4 @@ databricks-sdk[openai]==0.46.0 databricks-sql-connector==4.0.0 pandas==2.2.3 streamlit==1.41.1 +streamlit-folium==0.25.3 diff --git a/streamlit/view_groups.py b/streamlit/view_groups.py index c7cf253..70c6bb7 100644 --- a/streamlit/view_groups.py +++ b/streamlit/view_groups.py @@ -105,36 +105,6 @@ }, ], }, - - { - "title": "Alerts", - "views": [ - { - "label": "List alerts", - "help": "List all alerts.", - "page": "views/alerts.py", - "icon": ":material/notifications_none:", - }, - ], - }, - { - "title": "Geo Visualization", - "views": [ - { - "label": "Display Geo Input", - "help": "Display geo information on a map.", - "page": "views/maps_display.py", - "icon": ":material/globe:", - }, - { - "label": "User Geo Input", - "help": "Enable users to select own geo input.", - "page": "views/maps_draw.py", - "icon": ":material/pin_drop:", - }, - ], - }, - { "title": "Unity Catalog", "views": [ @@ -163,4 +133,21 @@ }, ], }, + { + "title": "Visualizations", + "views": [ + { + "label": "Charts", + "help": "Visualize data using Streamlit's built-in chart components.", + "page": "views/visualizations_charts.py", + "icon": ":material/bar_chart:", + }, + { + "label": "Map display and interaction", + "help": "Display geo information on a map and allow users to draw on the map.", + "page": "views/visualizations_map.py", + "icon": ":material/globe:", + }, + ], + }, ] diff --git a/streamlit/views/alerts.py b/streamlit/views/alerts.py deleted file mode 100644 index b68fc25..0000000 --- a/streamlit/views/alerts.py +++ /dev/null @@ -1,41 +0,0 @@ -import streamlit as st -from databricks.sdk import WorkspaceClient -import pandas as pd - -w = WorkspaceClient() - -def get_all_alerts(): - alerts = [alert.__dict__ for alert in w.alerts.list()] - return pd.DataFrame(alerts) - -st.header("List alert", divider=True) -st.subheader("Enable users to display all alerts") -st.write("This receipt enables users to display all alerts.") - -tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) - -with tab_a: - st.write("""### See all alerts""") - st.dataframe(get_all_alerts()) - -table = [ - { - "type": "Get alerts", - "param": "", - "description": "Get all alerts", - "code": """ - ```python - def get_all_alerts(): - alerts = [alert.__dict__ for alert in w.alerts.list()] - return pd.DataFrame(alerts) - ``` - """, - }, -] - - -with tab_b: - for i, row in enumerate(table): - with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)): - st.markdown(f"**Description**: {row['description']}") - st.markdown(row["code"]) diff --git a/streamlit/views/maps_display.py b/streamlit/views/maps_display.py deleted file mode 100644 index 571817d..0000000 --- a/streamlit/views/maps_display.py +++ /dev/null @@ -1,130 +0,0 @@ - -import streamlit as st -from databricks import sql -from databricks.sdk.core import Config -import pandas as pd - - -st.header("Geo Visualization", divider=True) -st.subheader("Read a table and display as a map") -st.write("This receipt loads a table from a delta table and displays the data on a map.") - -cfg = Config() - -@st.cache_resource -def get_connection(http_path): - return sql.connect( - server_hostname=cfg.host, - http_path=http_path, - credentials_provider=lambda: cfg.authenticate, - ) - - -def read_table(table_name, conn): - with conn.cursor() as cursor: - query = f"SELECT * FROM {table_name}" - cursor.execute(query) - return cursor.fetchall_arrow().to_pandas() - -cities = [ - {"name": "New York", "latitude": 40.7128, "longitude": -74.0060}, - {"name": "Los Angeles", "latitude": 34.0522, "longitude": -118.2437}, - {"name": "London", "latitude": 51.5074, "longitude": -0.1278}, - {"name": "Tokyo", "latitude": 35.6895, "longitude": 139.6917}, - {"name": "Sydney", "latitude": -33.8688, "longitude": 151.2093}, - {"name": "Paris", "latitude": 48.8566, "longitude": 2.3522}, - {"name": "Dubai", "latitude": 25.276987, "longitude": 55.296249}, - {"name": "Rio de Janeiro", "latitude": -22.9068, "longitude": -43.1729}, - {"name": "Moscow", "latitude": 55.7558, "longitude": 37.6173}, - {"name": "Cape Town", "latitude": -33.9249, "longitude": 18.4241} -] - -data = pd.DataFrame(cities) - - -tab_a, tab_b, tab_c, tab_d = st.tabs(["**Try it**","**Try it with a delta table**", "**Code snippet**", "**Requirements**"]) - -with tab_a: - col1, col2 = st.columns(2) - with col1: - if st.button("Try It with a random sample"): - st.map(data, latitude="Latitude", longitude="Longitude") - st.dataframe(data) - - -with tab_b: - col1, col2 = st.columns(2) - with col1: - http_path_input = st.text_input( - "Enter your Databricks HTTP Path:", placeholder="/sql/1.0/warehouses/xxxxxx" - ) - - table_name = st.text_input( - "Specify a Unity Catalog table name:", placeholder="catalog.schema.table" - ) - st.info("For displaying a sample, please use the table samples.accuweather.forecast_daily_calendar_metric") - - if http_path_input and table_name: - conn = get_connection(http_path_input) - df = read_table(table_name, conn) - - st.dataframe(df) - - if 'latitude' in df.columns and 'longitude' in df.columns: - df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce') - df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce') - df = df.dropna(subset=['latitude', 'longitude']) - - if not df.empty: - st.map(df, latitude="latitude", longitude="longitude") - else: - st.warning("no longitude, latitude found in the table") - - -table = [ - { - "type": "Get Tables", - "param": "Get long lat from the tables", - "description": "Get long lat from the tables.", - "code": """ - ```python - def read_table(table_name, conn): - with conn.cursor() as cursor: - query = f"SELECT * FROM {table_name} LIMIT 1000" - cursor.execute(query) - return cursor.fetchall_arrow().to_pandas() - ``` - """, - }, - { - "type": "Display Maps", - "param": "Display pandas df as maü", - "description": "Display the streamlit map", - "code": """ - ```python - conn = get_connection(http_path_input) - df = read_table(table_name, conn) - - st.dataframe(df) - - if 'latitude' in df.columns and 'longitude' in df.columns: - df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce') - df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce') - df = df.dropna(subset=['latitude', 'longitude']) - - if not df.empty: - st.map(df, latitude="latitude", longitude="longitude") - else: - st.warning("no longitude, latitude found in the table") - - - ``` - """, - }, -] - -with tab_c: - for i, row in enumerate(table): - with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)): - st.markdown(f"**Description**: {row['description']}") - st.markdown(row["code"]) diff --git a/streamlit/views/maps_draw.py b/streamlit/views/maps_draw.py deleted file mode 100644 index 6626691..0000000 --- a/streamlit/views/maps_draw.py +++ /dev/null @@ -1,204 +0,0 @@ - -import streamlit as st -from streamlit_folium import st_folium -import folium -from folium.plugins import Draw - - - -st.header("Collect user geo input", divider=True) -st.subheader("Enable user to select geo input") -st.write("This receipt enables users to pick geo points or draw geofences to futher be used.") - -tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) - -with tab_a: - choice=st.selectbox("### Select an option",['Points', 'Geofences','Polyline', 'Rectangle','Circle']) - - col1, col2 = st.columns(2) - - with col1: - - st.write("## Select a map input") - m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco - draw = Draw( - draw_options={ - "polyline": True if choice=='Polyline' else False, - "rectangle": True if choice=='Rectangle' else False, - "circle": True if choice=='Circle' else False, - "marker": True if choice=='Points' else False , - "circlemarker": False, - "polygon": True if choice=='Geofences' else False, - }, - edit_options={"edit": True}, - ) - draw.add_to(m) - output = st_folium(m, width=700, height=500) - - st.write("For usage of the app, select a point on the map.") - with st.expander("Click to see the last active selected map input", expanded=False): - if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: - st.json(output["last_active_drawing"]["geometry"]) - - -table = [ - { - "type": "User Input for points", - "param": "", - "description": "User input for long, lat.", - "code": """ - ```python - m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco - draw = Draw( - draw_options={ - "polyline": False, - "rectangle": False, - "circle": False, - "marker": True, - "circlemarker": False, - "polygon":False, - }, - edit_options={"edit": True}, - ) - draw.add_to(m) - output = st_folium(m, width=700, height=500) - ``` - """, - }, - { - "type": "User input for geofences", - "param": "", - "description": "Enable users to draw geo fences", - "code": """ - ```python - m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco - draw = Draw( - draw_options={ - "polyline": False, - "rectangle": False, - "circle": False, - "marker": False, - "circlemarker": False, - "polygon":True, - }, - edit_options={"edit": True}, - ) - draw.add_to(m) - output = st_folium(m, width=700, height=500) - ``` - """, - }, - { - "type": "User input for polylines", - "param": "", - "description": "Enable users to draw polylines", - "code": """ - ```python - m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco - draw = Draw( - draw_options={ - "polyline": True, - "rectangle": False, - "circle": False, - "marker": False, - "circlemarker": False, - "polygon":False, - }, - edit_options={"edit": True}, - ) - draw.add_to(m) - output = st_folium(m, width=700, height=500) - ``` - """, - }, - { - "type": "User input for rectangles", - "param": "", - "description": "Enable users to draw rectangles", - "code": """ - ```python - m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco - draw = Draw( - draw_options={ - "polyline": False, - "rectangle": True, - "circle": False, - "marker": False, - "circlemarker": False, - "polygon":False, - }, - edit_options={"edit": True}, - ) - draw.add_to(m) - output = st_folium(m, width=700, height=500) - ``` - """, - }, - { - "type": "User input for circles", - "param": "", - "description": "Enable users to draw circles", - "code": """ - ```python - m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco - draw = Draw( - draw_options={ - "polyline": False, - "rectangle": False, - "circle": True, - "marker": False, - "circlemarker": False, - "polygon":False, - }, - edit_options={"edit": True}, - ) - draw.add_to(m) - output = st_folium(m, width=700, height=500) - ``` - """, - }, - { - "type": "User input for markers", - "param": "", - "description": "Enable users to draw markers", - "code": """ - ```python - m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) # Example: San Francisco - draw = Draw( - draw_options={ - "polyline": False, - "rectangle": False, - "circle": False, - "marker": True, - "circlemarker": False, - "polygon":False, - }, - edit_options={"edit": True}, - ) - draw.add_to(m) - output = st_folium(m, width=700, height=500) - ``` - """, - }, - { - "type": "Output user input", - "param": "", - "description": "Display user input as json.", - "code": """ - ```python - st.write("For usage of the app, select a point on the map.") - with st.expander("Click to see the last active selected map input", expanded=False): - if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: - st.json(output["last_active_drawing"]["geometry"]) - - ``` - """, - }, - -] - -with tab_b: - for i, row in enumerate(table): - with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)): - st.markdown(f"**Description**: {row['description']}") - st.markdown(row["code"]) diff --git a/streamlit/views/visualizations_charts.py b/streamlit/views/visualizations_charts.py new file mode 100644 index 0000000..e39e041 --- /dev/null +++ b/streamlit/views/visualizations_charts.py @@ -0,0 +1,366 @@ +import pandas as pd +from databricks import sql +from databricks.sdk import WorkspaceClient +from databricks.sdk.core import Config + +import streamlit as st + +st.header(body="Visualizations", divider=True) +st.subheader("Charts") +st.write( + "This recipe demonstrates how to visualize data using Streamlit's built-in chart components: area charts, line charts, and bar charts." +) + +cfg = Config() + +w = WorkspaceClient() + +warehouses = w.warehouses.list() + +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + + +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + + +def read_table(table_name, conn): + with conn.cursor() as cursor: + query = f"SELECT * FROM {table_name} LIMIT 1000" + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + + +tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) + +with tab_a: + st.markdown("### Load data and visualize with charts") + st.write( + "Select a warehouse and load data from a Unity Catalog table to visualize with different chart types." + ) + + warehouse_selection = st.selectbox( + "Select a SQL Warehouse:", + options=[""] + list(warehouse_paths.keys()), + help="Warehouse list populated from your workspace using app service principal.", + ) + + st.markdown("**Table:** `samples.nyctaxi.trips`") + table_name = "samples.nyctaxi.trips" + + if st.button("Load Data", type="primary"): + if not warehouse_selection: + st.warning("Please select a SQL warehouse") + else: + with st.spinner("Loading data..."): + try: + http_path = warehouse_paths[warehouse_selection] + conn = get_connection(http_path) + df = read_table(table_name, conn) + + if df.empty: + st.warning("The query returned no data") + else: + st.success(f"Loaded {len(df)} rows from {table_name}") + + # Store data in session state for chart display + st.session_state.chart_data = df + + except Exception as e: + st.error(f"Error loading data: {str(e)}") + + # Display charts if data is loaded + if "chart_data" in st.session_state: + df = st.session_state.chart_data + + # Process data for business insights + try: + df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"]) + df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"]) + df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour + df["pickup_day"] = df["tpep_pickup_datetime"].dt.day_name() + df["trip_duration_minutes"] = ( + df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"] + ).dt.total_seconds() / 60 + except Exception as e: + st.error(f"Error processing datetime columns: {str(e)}") + + st.divider() + st.markdown("### Data Preview") + st.dataframe(df.head(10), use_container_width=True) + + st.divider() + + # Create sub-tabs for different business insights + chart_tab1, chart_tab2, chart_tab3, chart_tab4, chart_tab5 = st.tabs( + [ + "Demand Patterns", + "Revenue Analysis", + "Trip Characteristics", + "Popular Locations", + "Time Analysis", + ] + ) + + with chart_tab1: + st.markdown("#### Demand by Hour of Day") + st.write("Understand peak demand hours to optimize fleet deployment") + + if "pickup_hour" in df.columns: + # Count trips by hour + hourly_demand = df["pickup_hour"].value_counts().sort_index() + st.bar_chart(hourly_demand, use_container_width=True) + + peak_hour = hourly_demand.idxmax() + st.info( + f"🚕 Peak demand hour: {peak_hour}:00 with {hourly_demand.max()} trips" + ) + else: + st.warning("Required columns not found in the data") + + with chart_tab2: + st.markdown("#### Revenue Patterns") + st.write("Track revenue trends and identify high-earning periods") + + col1, col2 = st.columns(2) + + with col1: + if "pickup_hour" in df.columns and "fare_amount" in df.columns: + st.markdown("**Average Fare by Hour**") + avg_fare_by_hour = df.groupby("pickup_hour")["fare_amount"].mean() + st.line_chart(avg_fare_by_hour, use_container_width=True) + + best_hour = avg_fare_by_hour.idxmax() + st.success( + f"💰 Best earning hour: {best_hour}:00 (${avg_fare_by_hour.max():.2f} avg)" + ) + + with col2: + if "tpep_pickup_datetime" in df.columns and "fare_amount" in df.columns: + st.markdown("**Total Revenue Over Time**") + revenue_df = df.set_index("tpep_pickup_datetime")[ + ["fare_amount"] + ].sort_index() + revenue_df["cumulative_revenue"] = revenue_df[ + "fare_amount" + ].cumsum() + st.area_chart( + revenue_df["cumulative_revenue"], use_container_width=True + ) + + with chart_tab3: + st.markdown("#### Trip Characteristics") + st.write("Analyze typical trip patterns to improve service") + + col1, col2 = st.columns(2) + + with col1: + if "trip_distance" in df.columns: + st.markdown("**Trip Distance Distribution**") + # Create histogram-style data + distance_bins = pd.cut(df["trip_distance"], bins=20) + distance_counts = distance_bins.value_counts().sort_index() + # Convert interval index to strings for charting + distance_counts.index = distance_counts.index.astype(str) + st.bar_chart(distance_counts, use_container_width=True) + + avg_distance = df["trip_distance"].mean() + st.info(f"📏 Average trip distance: {avg_distance:.2f} miles") + + with col2: + if "trip_duration_minutes" in df.columns: + st.markdown("**Trip Duration Distribution**") + # Filter out outliers (trips > 120 minutes) + duration_df = df[df["trip_duration_minutes"] <= 120] + duration_bins = pd.cut( + duration_df["trip_duration_minutes"], bins=20 + ) + duration_counts = duration_bins.value_counts().sort_index() + # Convert interval index to strings for charting + duration_counts.index = duration_counts.index.astype(str) + st.bar_chart(duration_counts, use_container_width=True) + + avg_duration = df["trip_duration_minutes"].mean() + st.info(f"⏱️ Average trip duration: {avg_duration:.1f} minutes") + + with chart_tab4: + st.markdown("#### Popular Locations") + st.write("Identify high-demand zones for strategic positioning") + + col1, col2 = st.columns(2) + + with col1: + if "pickup_zip" in df.columns: + st.markdown("**Top 15 Pickup Locations**") + top_pickups = df["pickup_zip"].value_counts().head(15) + st.bar_chart(top_pickups, use_container_width=True) + + with col2: + if "dropoff_zip" in df.columns: + st.markdown("**Top 15 Dropoff Locations**") + top_dropoffs = df["dropoff_zip"].value_counts().head(15) + st.bar_chart(top_dropoffs, use_container_width=True) + + with chart_tab5: + st.markdown("#### Time-Based Analysis") + st.write("Understand how trip patterns vary throughout the day") + + col1, col2 = st.columns(2) + + with col1: + if "pickup_hour" in df.columns and "trip_distance" in df.columns: + st.markdown("**Average Trip Distance by Hour**") + avg_distance_by_hour = df.groupby("pickup_hour")[ + "trip_distance" + ].mean() + st.line_chart(avg_distance_by_hour, use_container_width=True) + + with col2: + if ( + "pickup_hour" in df.columns + and "trip_duration_minutes" in df.columns + ): + st.markdown("**Average Trip Duration by Hour**") + avg_duration_by_hour = df.groupby("pickup_hour")[ + "trip_duration_minutes" + ].mean() + st.line_chart(avg_duration_by_hour, use_container_width=True) + +with tab_b: + st.markdown("### Load data from a table") + st.code( + """ +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name} LIMIT 1000") + return cursor.fetchall_arrow().to_pandas() + +# Get data +warehouse_name = "your_warehouse_name" +table_name = "samples.nyctaxi.trips" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Process datetime columns +df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"]) +df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"]) +df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour +df["trip_duration_minutes"] = (df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]).dt.total_seconds() / 60 + """, + language="python", + ) + + st.markdown("### Demand analysis: Trips by hour") + st.code( + """ +import streamlit as st + +# Count trips by hour to understand demand patterns +hourly_demand = df["pickup_hour"].value_counts().sort_index() +st.bar_chart(hourly_demand) + +peak_hour = hourly_demand.idxmax() +st.info(f"Peak demand hour: {peak_hour}:00 with {hourly_demand.max()} trips") + """, + language="python", + ) + + st.markdown("### Revenue analysis: Average fare by hour") + st.code( + """ +import streamlit as st + +# Analyze when fares are highest +avg_fare_by_hour = df.groupby("pickup_hour")["fare_amount"].mean() +st.line_chart(avg_fare_by_hour) + +best_hour = avg_fare_by_hour.idxmax() +st.success(f"Best earning hour: {best_hour}:00") + """, + language="python", + ) + + st.markdown("### Location analysis: Top pickup zones") + st.code( + """ +import streamlit as st + +# Identify high-demand pickup locations +top_pickups = df["pickup_zip"].value_counts().head(15) +st.bar_chart(top_pickups) + """, + language="python", + ) + + st.markdown("### Cumulative revenue over time") + st.code( + """ +import streamlit as st + +# Track total revenue accumulation +revenue_df = df.set_index("tpep_pickup_datetime")[["fare_amount"]].sort_index() +revenue_df["cumulative_revenue"] = revenue_df["fare_amount"].cumsum() +st.area_chart(revenue_df["cumulative_revenue"]) + """, + language="python", + ) + +with tab_c: + col1, col2, col3 = st.columns(3) + + with col1: + st.markdown( + """ + **Permissions (app service principal)** + * `CAN USE` on the SQL warehouse + * `SELECT` on the Unity Catalog table + """ + ) + with col2: + st.markdown( + """ + **Databricks resources** + * SQL warehouse + * Unity Catalog table + """ + ) + with col3: + st.markdown( + """ + **Dependencies** + * [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` + * [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` + * [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` + * [Pandas](https://pypi.org/project/pandas/) - `pandas` + """ + ) diff --git a/streamlit/views/visualizations_map.py b/streamlit/views/visualizations_map.py new file mode 100644 index 0000000..351294d --- /dev/null +++ b/streamlit/views/visualizations_map.py @@ -0,0 +1,252 @@ +import folium +import pandas as pd +from databricks import sql +from databricks.sdk import WorkspaceClient +from databricks.sdk.core import Config +from folium.plugins import Draw +from streamlit_folium import st_folium + +import streamlit as st + +st.header(body="Visualizations", divider=True) +st.subheader("Map display and interaction") +st.write( + "This recipe enables you to display geographic data on a map and collect user geo input through interactive map drawing." +) + +cfg = Config() + +w = WorkspaceClient() + +warehouses = w.warehouses.list() + +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + + +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + + +def read_table(table_name, conn): + with conn.cursor() as cursor: + query = f"SELECT * FROM {table_name}" + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + + +# Sample cities data +cities = [ + {"name": "New York", "latitude": 40.7128, "longitude": -74.0060}, + {"name": "Los Angeles", "latitude": 34.0522, "longitude": -118.2437}, + {"name": "London", "latitude": 51.5074, "longitude": -0.1278}, + {"name": "Tokyo", "latitude": 35.6895, "longitude": 139.6917}, + {"name": "Sydney", "latitude": -33.8688, "longitude": 151.2093}, + {"name": "Paris", "latitude": 48.8566, "longitude": 2.3522}, + {"name": "Dubai", "latitude": 25.276987, "longitude": 55.296249}, + {"name": "Rio de Janeiro", "latitude": -22.9068, "longitude": -43.1729}, + {"name": "Moscow", "latitude": 55.7558, "longitude": 37.6173}, + {"name": "Cape Town", "latitude": -33.9249, "longitude": 18.4241}, +] + +tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) + +with tab_a: + # Sub-tabs for different functionalities + subtab1, subtab2 = st.tabs(["Display geo data", "Draw on the map"]) + + with subtab1: + st.markdown("### Display data on a map") + st.write( + "Load a table from a Delta table and display the geographic data on a map." + ) + + display_option = st.radio( + "Choose data source:", + ["Sample data", "Load from a table"], + horizontal=True, + ) + + if display_option == "Sample data": + data = pd.DataFrame(cities) + if st.button("Display sample data on map"): + st.map(data, latitude="latitude", longitude="longitude") + st.dataframe(data) + else: + warehouse_selection = st.selectbox( + "Select a SQL Warehouse:", + options=[""] + list(warehouse_paths.keys()), + help="Warehouse list populated from your workspace using app service principal.", + ) + + table_name = st.text_input( + "Specify a Unity Catalog table name:", + value="samples.accuweather.forecast_daily_calendar_metric", + help="Use this example table or input your own", + ) + + if warehouse_selection and table_name: + http_path = warehouse_paths[warehouse_selection] + conn = get_connection(http_path) + df = read_table(table_name, conn) + + st.dataframe(df) + + if "latitude" in df.columns and "longitude" in df.columns: + df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce") + df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce") + df = df.dropna(subset=["latitude", "longitude"]) + + if not df.empty: + st.map(df, latitude="latitude", longitude="longitude") + else: + st.warning("No longitude, latitude found in the table") + + with subtab2: + st.markdown("### Draw on the map") + st.write("Enable users to pick geo points or draw geofences to be used.") + + choice = st.selectbox( + "Select an input type", + ["Points", "Geofences", "Polyline", "Rectangle", "Circle"], + ) + + st.write("Select points on the map below:") + m = folium.Map( + location=[37.7749, -122.4194], zoom_start=13 + ) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": True if choice == "Polyline" else False, + "rectangle": True if choice == "Rectangle" else False, + "circle": True if choice == "Circle" else False, + "marker": True if choice == "Points" else False, + "circlemarker": False, + "polygon": True if choice == "Geofences" else False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + + with st.expander( + "Click to see the last active selected map input", expanded=False + ): + if ( + output["last_active_drawing"] + and "geometry" in output["last_active_drawing"] + ): + st.json(output["last_active_drawing"]["geometry"]) + +with tab_b: + st.markdown("### Display geo data from a table") + st.code( + """ +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name}") + return cursor.fetchall_arrow().to_pandas() + +# Get data and display on map +warehouse_name = "your_warehouse_name" +table_name = "samples.accuweather.forecast_daily_calendar_metric" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Display map with latitude/longitude columns +st.map(df, latitude="latitude", longitude="longitude") + """ + ) + + st.markdown("### Collect user geo input") + st.code( + """ +import streamlit as st +from streamlit_folium import st_folium +import folium +from folium.plugins import Draw + +# Create a map centered on a location +m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) + +# Enable drawing tools (set True for the tools you want to enable) +draw = Draw( + draw_options={ + "marker": True, # For collecting points + "polygon": True, # For collecting geofences/polygons + "polyline": True, # For collecting polylines + "rectangle": True, # For collecting rectangles + "circle": True, # For collecting circles + "circlemarker": False, + }, + edit_options={"edit": True}, +) +draw.add_to(m) +output = st_folium(m, width=700, height=500) + +# Access the drawn geometry +if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: + geometry = output["last_active_drawing"]["geometry"] + st.json(geometry) + """ + ) + +with tab_c: + col1, col2, col3 = st.columns(3) + + with col1: + st.markdown( + """ + **Permissions (app service principal)** + * `CAN USE` on the SQL warehouse + * `SELECT` on the Unity Catalog table + + _Note: Only required if reading data from tables_ + """ + ) + with col2: + st.markdown( + """ + **Databricks resources** + * SQL warehouse _(optional, only for reading table data)_ + * Unity Catalog table _(optional, only for reading table data)_ + """ + ) + with col3: + st.markdown( + """ + **Dependencies** + * [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` + * [Streamlit Folium](https://pypi.org/project/streamlit-folium/) - `streamlit-folium` + * [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` _(for table data)_ + * [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` _(for table data)_ + """ + )