diff --git a/docs/docs/streamlit/visualizations/_category_.json b/docs/docs/streamlit/visualizations/_category_.json new file mode 100644 index 0000000..3fe64c2 --- /dev/null +++ b/docs/docs/streamlit/visualizations/_category_.json @@ -0,0 +1,9 @@ +{ + "label": "Visualizations", + "position": 5, + "link": { + "type": "generated-index", + "description": "Display data and collect user input using charts and maps." + } +} + diff --git a/docs/docs/streamlit/visualizations/visualizations_charts.mdx b/docs/docs/streamlit/visualizations/visualizations_charts.mdx new file mode 100644 index 0000000..138b509 --- /dev/null +++ b/docs/docs/streamlit/visualizations/visualizations_charts.mdx @@ -0,0 +1,131 @@ +--- +sidebar_position: 1 +--- + +# Charts + +Use this recipe to visualize data using Streamlit's built-in chart components: area charts, line charts, and bar charts. This example demonstrates loading data from a Unity Catalog table and creating various business insights through different chart visualizations. + +## Code snippet + +### Load data from a table + +```python title="app.py" +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name} LIMIT 1000") + return cursor.fetchall_arrow().to_pandas() + +# Get data +warehouse_name = "your_warehouse_name" +table_name = "samples.nyctaxi.trips" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Process datetime columns +df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"]) +df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"]) +df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour +df["trip_duration_minutes"] = (df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]).dt.total_seconds() / 60 +``` + +### Demand analysis: Trips by hour + +```python title="app.py" +import streamlit as st + +# Count trips by hour to understand demand patterns +hourly_demand = df["pickup_hour"].value_counts().sort_index() +st.bar_chart(hourly_demand) + +peak_hour = hourly_demand.idxmax() +st.info(f"Peak demand hour: {peak_hour}:00 with {hourly_demand.max()} trips") +``` + +### Revenue analysis: Average fare by hour + +```python title="app.py" +import streamlit as st + +# Analyze when fares are highest +avg_fare_by_hour = df.groupby("pickup_hour")["fare_amount"].mean() +st.line_chart(avg_fare_by_hour) + +best_hour = avg_fare_by_hour.idxmax() +st.success(f"Best earning hour: {best_hour}:00") +``` + +### Location analysis: Top pickup zones + +```python title="app.py" +import streamlit as st + +# Identify high-demand pickup locations +top_pickups = df["pickup_zip"].value_counts().head(15) +st.bar_chart(top_pickups) +``` + +### Cumulative revenue over time + +```python title="app.py" +import streamlit as st + +# Track total revenue accumulation +revenue_df = df.set_index("tpep_pickup_datetime")[["fare_amount"]].sort_index() +revenue_df["cumulative_revenue"] = revenue_df["fare_amount"].cumsum() +st.area_chart(revenue_df["cumulative_revenue"]) +``` + +## Resources + +- [SQL warehouse](https://docs.databricks.com/aws/en/compute/sql-warehouse/) +- [Unity Catalog table](https://docs.databricks.com/aws/en/tables/) + +## Permissions + +Your [app service principal](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/#how-does-databricks-apps-manage-authorization) needs the following permissions: + +- `CAN USE` on the SQL warehouse +- `SELECT` on the Unity Catalog table + +See Unity [Catalog privileges and securable objects](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/privileges) for more information. + +## Dependencies + +- [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` +- [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` +- [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` +- [Pandas](https://pypi.org/project/pandas/) - `pandas` + +```python title="requirements.txt" +streamlit +databricks-sdk +databricks-sql-connector +pandas +``` + diff --git a/docs/docs/streamlit/visualizations/visualizations_map.mdx b/docs/docs/streamlit/visualizations/visualizations_map.mdx new file mode 100644 index 0000000..bd23c60 --- /dev/null +++ b/docs/docs/streamlit/visualizations/visualizations_map.mdx @@ -0,0 +1,112 @@ +--- +sidebar_position: 2 +--- + +# Map display and interaction + +This recipe enables you to display geographic data on a map and collect user geo input through interactive map drawing. You can load location data from Unity Catalog tables or use the drawing tools to capture points, polygons, and geofences from users. + +## Code snippet + +### Display geo data from a table + +```python title="app.py" +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name}") + return cursor.fetchall_arrow().to_pandas() + +# Get data and display on map +warehouse_name = "your_warehouse_name" +table_name = "samples.accuweather.forecast_daily_calendar_metric" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Display map with latitude/longitude columns +st.map(df, latitude="latitude", longitude="longitude") +``` + +### Collect user geo input + +```python title="app.py" +import streamlit as st +from streamlit_folium import st_folium +import folium +from folium.plugins import Draw + +# Create a map centered on a location +m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) + +# Enable drawing tools (set True for the tools you want to enable) +draw = Draw( + draw_options={ + "marker": True, # For collecting points + "polygon": True, # For collecting geofences/polygons + "polyline": True, # For collecting polylines + "rectangle": True, # For collecting rectangles + "circle": True, # For collecting circles + "circlemarker": False, + }, + edit_options={"edit": True}, +) +draw.add_to(m) +output = st_folium(m, width=700, height=500) + +# Access the drawn geometry +if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: + geometry = output["last_active_drawing"]["geometry"] + st.json(geometry) +``` + +## Resources + +- [SQL warehouse](https://docs.databricks.com/aws/en/compute/sql-warehouse/) _(optional, only for reading table data)_ +- [Unity Catalog table](https://docs.databricks.com/aws/en/tables/) _(optional, only for reading table data)_ + +## Permissions + +Your [app service principal](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/#how-does-databricks-apps-manage-authorization) needs the following permissions: + +- `CAN USE` on the SQL warehouse _(only required if reading data from tables)_ +- `SELECT` on the Unity Catalog table _(only required if reading data from tables)_ + +See Unity [Catalog privileges and securable objects](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/privileges) for more information. + +## Dependencies + +- [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` +- [Streamlit Folium](https://pypi.org/project/streamlit-folium/) - `streamlit-folium` +- [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` _(for table data)_ +- [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` _(for table data)_ + +```python title="requirements.txt" +streamlit +streamlit-folium +databricks-sdk +databricks-sql-connector +``` + diff --git a/streamlit/requirements.txt b/streamlit/requirements.txt index 55f0fad..d93a7cc 100644 --- a/streamlit/requirements.txt +++ b/streamlit/requirements.txt @@ -2,6 +2,7 @@ databricks-connect==16.0.0 databricks-sdk[openai]==0.60.0 databricks-sql-connector==4.0.0 pandas==2.2.3 -streamlit==1.41.1 psycopg[binary]==3.2.9 psycopg-pool==3.2.6 +streamlit==1.41.1 +streamlit-folium==0.25.3 \ No newline at end of file diff --git a/streamlit/view_groups.py b/streamlit/view_groups.py index 9bf37d7..885e949 100644 --- a/streamlit/view_groups.py +++ b/streamlit/view_groups.py @@ -152,6 +152,22 @@ ], }, { + "title": "Visualizations", + "views": [ + { + "label": "Charts", + "help": "Visualize data using Streamlit's built-in chart components.", + "page": "views/visualizations_charts.py", + "icon": ":material/bar_chart:", + }, + { + "label": "Map display and interaction", + "help": "Display geo information on a map and allow users to draw on the map.", + "page": "views/visualizations_map.py", + "icon": ":material/globe:", + }, + ], + }, "title": "External services", "views": [ { diff --git a/streamlit/views/visualizations_charts.py b/streamlit/views/visualizations_charts.py new file mode 100644 index 0000000..e39e041 --- /dev/null +++ b/streamlit/views/visualizations_charts.py @@ -0,0 +1,366 @@ +import pandas as pd +from databricks import sql +from databricks.sdk import WorkspaceClient +from databricks.sdk.core import Config + +import streamlit as st + +st.header(body="Visualizations", divider=True) +st.subheader("Charts") +st.write( + "This recipe demonstrates how to visualize data using Streamlit's built-in chart components: area charts, line charts, and bar charts." +) + +cfg = Config() + +w = WorkspaceClient() + +warehouses = w.warehouses.list() + +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + + +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + + +def read_table(table_name, conn): + with conn.cursor() as cursor: + query = f"SELECT * FROM {table_name} LIMIT 1000" + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + + +tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) + +with tab_a: + st.markdown("### Load data and visualize with charts") + st.write( + "Select a warehouse and load data from a Unity Catalog table to visualize with different chart types." + ) + + warehouse_selection = st.selectbox( + "Select a SQL Warehouse:", + options=[""] + list(warehouse_paths.keys()), + help="Warehouse list populated from your workspace using app service principal.", + ) + + st.markdown("**Table:** `samples.nyctaxi.trips`") + table_name = "samples.nyctaxi.trips" + + if st.button("Load Data", type="primary"): + if not warehouse_selection: + st.warning("Please select a SQL warehouse") + else: + with st.spinner("Loading data..."): + try: + http_path = warehouse_paths[warehouse_selection] + conn = get_connection(http_path) + df = read_table(table_name, conn) + + if df.empty: + st.warning("The query returned no data") + else: + st.success(f"Loaded {len(df)} rows from {table_name}") + + # Store data in session state for chart display + st.session_state.chart_data = df + + except Exception as e: + st.error(f"Error loading data: {str(e)}") + + # Display charts if data is loaded + if "chart_data" in st.session_state: + df = st.session_state.chart_data + + # Process data for business insights + try: + df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"]) + df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"]) + df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour + df["pickup_day"] = df["tpep_pickup_datetime"].dt.day_name() + df["trip_duration_minutes"] = ( + df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"] + ).dt.total_seconds() / 60 + except Exception as e: + st.error(f"Error processing datetime columns: {str(e)}") + + st.divider() + st.markdown("### Data Preview") + st.dataframe(df.head(10), use_container_width=True) + + st.divider() + + # Create sub-tabs for different business insights + chart_tab1, chart_tab2, chart_tab3, chart_tab4, chart_tab5 = st.tabs( + [ + "Demand Patterns", + "Revenue Analysis", + "Trip Characteristics", + "Popular Locations", + "Time Analysis", + ] + ) + + with chart_tab1: + st.markdown("#### Demand by Hour of Day") + st.write("Understand peak demand hours to optimize fleet deployment") + + if "pickup_hour" in df.columns: + # Count trips by hour + hourly_demand = df["pickup_hour"].value_counts().sort_index() + st.bar_chart(hourly_demand, use_container_width=True) + + peak_hour = hourly_demand.idxmax() + st.info( + f"🚕 Peak demand hour: {peak_hour}:00 with {hourly_demand.max()} trips" + ) + else: + st.warning("Required columns not found in the data") + + with chart_tab2: + st.markdown("#### Revenue Patterns") + st.write("Track revenue trends and identify high-earning periods") + + col1, col2 = st.columns(2) + + with col1: + if "pickup_hour" in df.columns and "fare_amount" in df.columns: + st.markdown("**Average Fare by Hour**") + avg_fare_by_hour = df.groupby("pickup_hour")["fare_amount"].mean() + st.line_chart(avg_fare_by_hour, use_container_width=True) + + best_hour = avg_fare_by_hour.idxmax() + st.success( + f"💰 Best earning hour: {best_hour}:00 (${avg_fare_by_hour.max():.2f} avg)" + ) + + with col2: + if "tpep_pickup_datetime" in df.columns and "fare_amount" in df.columns: + st.markdown("**Total Revenue Over Time**") + revenue_df = df.set_index("tpep_pickup_datetime")[ + ["fare_amount"] + ].sort_index() + revenue_df["cumulative_revenue"] = revenue_df[ + "fare_amount" + ].cumsum() + st.area_chart( + revenue_df["cumulative_revenue"], use_container_width=True + ) + + with chart_tab3: + st.markdown("#### Trip Characteristics") + st.write("Analyze typical trip patterns to improve service") + + col1, col2 = st.columns(2) + + with col1: + if "trip_distance" in df.columns: + st.markdown("**Trip Distance Distribution**") + # Create histogram-style data + distance_bins = pd.cut(df["trip_distance"], bins=20) + distance_counts = distance_bins.value_counts().sort_index() + # Convert interval index to strings for charting + distance_counts.index = distance_counts.index.astype(str) + st.bar_chart(distance_counts, use_container_width=True) + + avg_distance = df["trip_distance"].mean() + st.info(f"📏 Average trip distance: {avg_distance:.2f} miles") + + with col2: + if "trip_duration_minutes" in df.columns: + st.markdown("**Trip Duration Distribution**") + # Filter out outliers (trips > 120 minutes) + duration_df = df[df["trip_duration_minutes"] <= 120] + duration_bins = pd.cut( + duration_df["trip_duration_minutes"], bins=20 + ) + duration_counts = duration_bins.value_counts().sort_index() + # Convert interval index to strings for charting + duration_counts.index = duration_counts.index.astype(str) + st.bar_chart(duration_counts, use_container_width=True) + + avg_duration = df["trip_duration_minutes"].mean() + st.info(f"⏱️ Average trip duration: {avg_duration:.1f} minutes") + + with chart_tab4: + st.markdown("#### Popular Locations") + st.write("Identify high-demand zones for strategic positioning") + + col1, col2 = st.columns(2) + + with col1: + if "pickup_zip" in df.columns: + st.markdown("**Top 15 Pickup Locations**") + top_pickups = df["pickup_zip"].value_counts().head(15) + st.bar_chart(top_pickups, use_container_width=True) + + with col2: + if "dropoff_zip" in df.columns: + st.markdown("**Top 15 Dropoff Locations**") + top_dropoffs = df["dropoff_zip"].value_counts().head(15) + st.bar_chart(top_dropoffs, use_container_width=True) + + with chart_tab5: + st.markdown("#### Time-Based Analysis") + st.write("Understand how trip patterns vary throughout the day") + + col1, col2 = st.columns(2) + + with col1: + if "pickup_hour" in df.columns and "trip_distance" in df.columns: + st.markdown("**Average Trip Distance by Hour**") + avg_distance_by_hour = df.groupby("pickup_hour")[ + "trip_distance" + ].mean() + st.line_chart(avg_distance_by_hour, use_container_width=True) + + with col2: + if ( + "pickup_hour" in df.columns + and "trip_duration_minutes" in df.columns + ): + st.markdown("**Average Trip Duration by Hour**") + avg_duration_by_hour = df.groupby("pickup_hour")[ + "trip_duration_minutes" + ].mean() + st.line_chart(avg_duration_by_hour, use_container_width=True) + +with tab_b: + st.markdown("### Load data from a table") + st.code( + """ +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name} LIMIT 1000") + return cursor.fetchall_arrow().to_pandas() + +# Get data +warehouse_name = "your_warehouse_name" +table_name = "samples.nyctaxi.trips" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Process datetime columns +df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"]) +df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"]) +df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour +df["trip_duration_minutes"] = (df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]).dt.total_seconds() / 60 + """, + language="python", + ) + + st.markdown("### Demand analysis: Trips by hour") + st.code( + """ +import streamlit as st + +# Count trips by hour to understand demand patterns +hourly_demand = df["pickup_hour"].value_counts().sort_index() +st.bar_chart(hourly_demand) + +peak_hour = hourly_demand.idxmax() +st.info(f"Peak demand hour: {peak_hour}:00 with {hourly_demand.max()} trips") + """, + language="python", + ) + + st.markdown("### Revenue analysis: Average fare by hour") + st.code( + """ +import streamlit as st + +# Analyze when fares are highest +avg_fare_by_hour = df.groupby("pickup_hour")["fare_amount"].mean() +st.line_chart(avg_fare_by_hour) + +best_hour = avg_fare_by_hour.idxmax() +st.success(f"Best earning hour: {best_hour}:00") + """, + language="python", + ) + + st.markdown("### Location analysis: Top pickup zones") + st.code( + """ +import streamlit as st + +# Identify high-demand pickup locations +top_pickups = df["pickup_zip"].value_counts().head(15) +st.bar_chart(top_pickups) + """, + language="python", + ) + + st.markdown("### Cumulative revenue over time") + st.code( + """ +import streamlit as st + +# Track total revenue accumulation +revenue_df = df.set_index("tpep_pickup_datetime")[["fare_amount"]].sort_index() +revenue_df["cumulative_revenue"] = revenue_df["fare_amount"].cumsum() +st.area_chart(revenue_df["cumulative_revenue"]) + """, + language="python", + ) + +with tab_c: + col1, col2, col3 = st.columns(3) + + with col1: + st.markdown( + """ + **Permissions (app service principal)** + * `CAN USE` on the SQL warehouse + * `SELECT` on the Unity Catalog table + """ + ) + with col2: + st.markdown( + """ + **Databricks resources** + * SQL warehouse + * Unity Catalog table + """ + ) + with col3: + st.markdown( + """ + **Dependencies** + * [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` + * [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` + * [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` + * [Pandas](https://pypi.org/project/pandas/) - `pandas` + """ + ) diff --git a/streamlit/views/visualizations_map.py b/streamlit/views/visualizations_map.py new file mode 100644 index 0000000..351294d --- /dev/null +++ b/streamlit/views/visualizations_map.py @@ -0,0 +1,252 @@ +import folium +import pandas as pd +from databricks import sql +from databricks.sdk import WorkspaceClient +from databricks.sdk.core import Config +from folium.plugins import Draw +from streamlit_folium import st_folium + +import streamlit as st + +st.header(body="Visualizations", divider=True) +st.subheader("Map display and interaction") +st.write( + "This recipe enables you to display geographic data on a map and collect user geo input through interactive map drawing." +) + +cfg = Config() + +w = WorkspaceClient() + +warehouses = w.warehouses.list() + +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + + +@st.cache_resource +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + + +def read_table(table_name, conn): + with conn.cursor() as cursor: + query = f"SELECT * FROM {table_name}" + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + + +# Sample cities data +cities = [ + {"name": "New York", "latitude": 40.7128, "longitude": -74.0060}, + {"name": "Los Angeles", "latitude": 34.0522, "longitude": -118.2437}, + {"name": "London", "latitude": 51.5074, "longitude": -0.1278}, + {"name": "Tokyo", "latitude": 35.6895, "longitude": 139.6917}, + {"name": "Sydney", "latitude": -33.8688, "longitude": 151.2093}, + {"name": "Paris", "latitude": 48.8566, "longitude": 2.3522}, + {"name": "Dubai", "latitude": 25.276987, "longitude": 55.296249}, + {"name": "Rio de Janeiro", "latitude": -22.9068, "longitude": -43.1729}, + {"name": "Moscow", "latitude": 55.7558, "longitude": 37.6173}, + {"name": "Cape Town", "latitude": -33.9249, "longitude": 18.4241}, +] + +tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"]) + +with tab_a: + # Sub-tabs for different functionalities + subtab1, subtab2 = st.tabs(["Display geo data", "Draw on the map"]) + + with subtab1: + st.markdown("### Display data on a map") + st.write( + "Load a table from a Delta table and display the geographic data on a map." + ) + + display_option = st.radio( + "Choose data source:", + ["Sample data", "Load from a table"], + horizontal=True, + ) + + if display_option == "Sample data": + data = pd.DataFrame(cities) + if st.button("Display sample data on map"): + st.map(data, latitude="latitude", longitude="longitude") + st.dataframe(data) + else: + warehouse_selection = st.selectbox( + "Select a SQL Warehouse:", + options=[""] + list(warehouse_paths.keys()), + help="Warehouse list populated from your workspace using app service principal.", + ) + + table_name = st.text_input( + "Specify a Unity Catalog table name:", + value="samples.accuweather.forecast_daily_calendar_metric", + help="Use this example table or input your own", + ) + + if warehouse_selection and table_name: + http_path = warehouse_paths[warehouse_selection] + conn = get_connection(http_path) + df = read_table(table_name, conn) + + st.dataframe(df) + + if "latitude" in df.columns and "longitude" in df.columns: + df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce") + df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce") + df = df.dropna(subset=["latitude", "longitude"]) + + if not df.empty: + st.map(df, latitude="latitude", longitude="longitude") + else: + st.warning("No longitude, latitude found in the table") + + with subtab2: + st.markdown("### Draw on the map") + st.write("Enable users to pick geo points or draw geofences to be used.") + + choice = st.selectbox( + "Select an input type", + ["Points", "Geofences", "Polyline", "Rectangle", "Circle"], + ) + + st.write("Select points on the map below:") + m = folium.Map( + location=[37.7749, -122.4194], zoom_start=13 + ) # Example: San Francisco + draw = Draw( + draw_options={ + "polyline": True if choice == "Polyline" else False, + "rectangle": True if choice == "Rectangle" else False, + "circle": True if choice == "Circle" else False, + "marker": True if choice == "Points" else False, + "circlemarker": False, + "polygon": True if choice == "Geofences" else False, + }, + edit_options={"edit": True}, + ) + draw.add_to(m) + output = st_folium(m, width=700, height=500) + + with st.expander( + "Click to see the last active selected map input", expanded=False + ): + if ( + output["last_active_drawing"] + and "geometry" in output["last_active_drawing"] + ): + st.json(output["last_active_drawing"]["geometry"]) + +with tab_b: + st.markdown("### Display geo data from a table") + st.code( + """ +import streamlit as st +from databricks import sql +from databricks.sdk.core import Config +from databricks.sdk import WorkspaceClient +import pandas as pd + +cfg = Config() +w = WorkspaceClient() + +# List available SQL warehouses +warehouses = w.warehouses.list() +warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses} + +# Connect to SQL warehouse +def get_connection(http_path): + return sql.connect( + server_hostname=cfg.host, + http_path=http_path, + credentials_provider=lambda: cfg.authenticate, + ) + +# Read table +def read_table(table_name, conn): + with conn.cursor() as cursor: + cursor.execute(f"SELECT * FROM {table_name}") + return cursor.fetchall_arrow().to_pandas() + +# Get data and display on map +warehouse_name = "your_warehouse_name" +table_name = "samples.accuweather.forecast_daily_calendar_metric" + +http_path = warehouse_paths[warehouse_name] +conn = get_connection(http_path) +df = read_table(table_name, conn) + +# Display map with latitude/longitude columns +st.map(df, latitude="latitude", longitude="longitude") + """ + ) + + st.markdown("### Collect user geo input") + st.code( + """ +import streamlit as st +from streamlit_folium import st_folium +import folium +from folium.plugins import Draw + +# Create a map centered on a location +m = folium.Map(location=[37.7749, -122.4194], zoom_start=13) + +# Enable drawing tools (set True for the tools you want to enable) +draw = Draw( + draw_options={ + "marker": True, # For collecting points + "polygon": True, # For collecting geofences/polygons + "polyline": True, # For collecting polylines + "rectangle": True, # For collecting rectangles + "circle": True, # For collecting circles + "circlemarker": False, + }, + edit_options={"edit": True}, +) +draw.add_to(m) +output = st_folium(m, width=700, height=500) + +# Access the drawn geometry +if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]: + geometry = output["last_active_drawing"]["geometry"] + st.json(geometry) + """ + ) + +with tab_c: + col1, col2, col3 = st.columns(3) + + with col1: + st.markdown( + """ + **Permissions (app service principal)** + * `CAN USE` on the SQL warehouse + * `SELECT` on the Unity Catalog table + + _Note: Only required if reading data from tables_ + """ + ) + with col2: + st.markdown( + """ + **Databricks resources** + * SQL warehouse _(optional, only for reading table data)_ + * Unity Catalog table _(optional, only for reading table data)_ + """ + ) + with col3: + st.markdown( + """ + **Dependencies** + * [Streamlit](https://pypi.org/project/streamlit/) - `streamlit` + * [Streamlit Folium](https://pypi.org/project/streamlit-folium/) - `streamlit-folium` + * [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` _(for table data)_ + * [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` _(for table data)_ + """ + )