How does GeoLLM compare to our location encoder model????
- Generate a map with multiple layers
- Layer 1: Squirrel predictions (raster like?)
- Layer 2: Squirrel uncertainty 
- Layer 3 - X: LLM preds (sim & predictions)
	- with tooltip.

In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import folium

In [3]:
# llm predictions (mean & max of vectors)
llm_preds_sim = pd.read_csv("/data/cher/geollm-bias/playground/llm-sim/Eastern Gray Squirrel/cos-sim-Meta-Llama-3.1-8B_STL.csv")
llm_preds_sim_gdf = gpd.GeoDataFrame(llm_preds_sim, geometry=gpd.points_from_xy(llm_preds_sim['longitude'], llm_preds_sim['latitude']))
llm_preds_sim_gdf.crs = "EPSG:4326" 

In [12]:
import re

def extract_coordinates(text):

    # Regular expression to extract coordinates
    match = re.search(r"Coordinates:\s*\(([-\d.]+),\s*([-\d.]+)\)", text)

    if match:
        lat, lon = float(match.group(1)), float(match.group(2))
        return lat, lon
    else:
        return None, None

In [15]:


# llm predictions (inference)
llm_preds_inf = pd.read_csv("/data/cher/geollm-bias/playground/llm-responses/Eastern Gray Squirrel/STL/expert-incontext-Meta-Llama-3.1-8B-Instruct-numeric.csv")

coordinate_w_prompts = './prompts/st_louis_equal.jsonl'

results = {}
with open(coordinate_w_prompts, "r", encoding="utf-8") as file:
    for i, line in enumerate(file):
        lat, lon = extract_coordinates(line)

        results[i] = {}
        results[i]['latitude'] = lat
        results[i]['longitude'] = lon

idx_latlon = pd.DataFrame.from_dict(results, orient='index').reset_index()

In [21]:
llm_preds_inference = idx_latlon.merge(llm_preds_inf, on = ['index'])
llm_preds_inference = llm_preds_inference[~llm_preds_inference['prediction'].isna()]

In [4]:
# sdm preds
all_stl_preds = np.load("/data/cher/StLouis-SDM/stl_species.npy")
stl_preds_latlon = pd.read_csv('/data/cher/HighResSDM/unconditional/uncond-loc(orig).csv', usecols=['lat', 'lon'])


# Eastern Gray Squirrel index?
species_idxs = pd.read_csv("/data/cher/geollm-bias/playground/unique_species.csv")
species_index = species_idxs[species_idxs['CommonNames'] == 'Eastern Grey Squirrel'].index
squirrel_gdf = pd.DataFrame(all_stl_preds[:, species_index]).merge(stl_preds_latlon, right_index = True, left_index=True)
squirrel_gdf.rename(columns = {0 : "prediction"}, inplace = True)

In [32]:
# randomly choose 200 points to make it more readable
llm_preds_inference = llm_preds_inference.sample(n=300, random_state=42)
llm_preds_sim_gdf = llm_preds_sim_gdf.sample(n=300, random_state=42)

llm_preds_inference['prediction'] = round(llm_preds_inference['prediction'], 2)
llm_preds_sim_gdf[['normalized_similarity_mean', 'normalized_similarity_max']] = round(llm_preds_sim_gdf[['normalized_similarity_mean', 'normalized_similarity_max']], 2)

In [34]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# ---------------------------
# (Assuming you already have your data loaded into these DataFrames)
# llm_preds_inference, llm_preds_sim, and squirrel_gdf
# ---------------------------

# Determine Map Center (using squirrel data)
center_lat = squirrel_gdf['lat'].mean()
center_lon = squirrel_gdf['lon'].mean()

# ---------------------------
# Create Plotly Scattermapbox Traces
# ---------------------------

# Trace for LLM Predictions - Inference
llm_preds_trace = go.Scattermapbox(
    lat=llm_preds_inference['latitude'],
    lon=llm_preds_inference['longitude'],
    mode='markers+text',  # Display both markers and text
    text=[f"{pred:.3f}" for pred in llm_preds_inference['prediction']],
    textposition="top center",
    marker=go.scattermapbox.Marker(
        size=8,
        color=llm_preds_inference['prediction'],  # Color markers by prediction
        colorscale='Reds',
        opacity=1,
        colorbar=dict(title="Sim Max")
    ),
    name='GeoLLM Predictions - Inference'
)

# Trace for LLM Predictions - Mean
llm_mean_trace = go.Scattermapbox(
    lat=llm_preds_sim_gdf['latitude'],
    lon=llm_preds_sim_gdf['longitude'],
    mode='markers+text',  # Display both markers and text
    text=[f"{mean:.3f}" for mean in llm_preds_sim_gdf['normalized_similarity_mean']],
    textposition="top center",
    marker=go.scattermapbox.Marker(
        size=8,
        color=llm_preds_sim_gdf['normalized_similarity_mean'],  # Color markers by normalized similarity mean
        colorscale='Reds',
        opacity=1,
        colorbar=dict(title="Sim Mean")
    ),
    name='GeoLLM Predictions - Mean'
)

# Trace for LLM Predictions - Max
llm_max_trace = go.Scattermapbox(
    lat=llm_preds_sim_gdf['latitude'],
    lon=llm_preds_sim_gdf['longitude'],
    mode='markers+text',  # Display both markers and text
    text=[f"{max:.3f}" for max in llm_preds_sim_gdf['normalized_similarity_max']],
    textposition="top center",
    marker=go.scattermapbox.Marker(
        size=8,
        color=llm_preds_sim_gdf['normalized_similarity_max'],  # Color markers by normalized similarity max
        colorscale='Reds',
        opacity=1,
        colorbar=dict(title="Sim Max")
    ),
    name='GeoLLM Predictions - Max'
)

# Trace for Squirrel Predictions
# (This trace will be dynamically filtered using a slider based on 'prediction' value)
squirrel_trace = go.Scattermapbox(
    lat=squirrel_gdf['lat'],
    lon=squirrel_gdf['lon'],
    mode='markers',
    marker=go.scattermapbox.Marker(
        size=5,
        color=squirrel_gdf['prediction'],  # Color markers by prediction value
        colorscale='Viridis',
        opacity=0.3,
        colorbar=dict(title="Prediction")
    ),
    name='Squirrel Predictions - SDM'
)

# ---------------------------
# Create the Figure and Configure Layout
# ---------------------------
fig = go.Figure(data=[llm_preds_trace, llm_mean_trace, llm_max_trace, squirrel_trace])

fig.update_layout(
    mapbox_style="open-street-map",  # No token required for open-street-map style
    mapbox_zoom=12,
    mapbox_center={"lat": center_lat, "lon": center_lon},
    margin={"l": 0, "r": 0, "t": 0, "b": 0},
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
)

# ---------------------------
# Add a Slider to Dynamically Filter the Squirrel Predictions Trace
# ---------------------------

# We will create slider steps that update only the squirrel_trace (which is at index 3)
# For each slider step, we filter squirrel_gdf to only include rows where prediction > threshold.
thresholds = np.linspace(0, 1, 11)  # Creates thresholds from 0.0 to 1.0 in steps of 0.1
slider_steps = []
for t in thresholds:
    # Filter the squirrel data for the current threshold
    filtered = squirrel_gdf[squirrel_gdf['prediction'] > t]
    
    # Create a slider step that will update trace index 3
    step = dict(
        method="restyle",
        args=[
            {
                # Update the 'lat', 'lon', and 'marker.color' properties for the squirrel_trace
                "lat": [filtered['lat'].tolist()],
                "lon": [filtered['lon'].tolist()],
                "marker.color": [filtered['prediction'].tolist()]
            },
            [3]  # This tells Plotly to update trace at index 3 only
        ],
        label=str(round(t, 2))
    )
    slider_steps.append(step)

# Add the slider to the layout
fig.update_layout(
    sliders=[{
        "active": 0,
        "currentvalue": {"prefix": "Squirrel SDM Prediction Threshold: "},
        "pad": {"t": 50},
        "steps": slider_steps
    }]
)

# ---------------------------
# Export the Map to an HTML File
# ---------------------------
fig.write_html("Eastern_Gray_Squirrel_GeoLLM_Predictions-wslider3.html")
print("Map saved to Eastern_Gray_Squirrel_GeoLLM_Predictions.html")


Map saved to Eastern_Gray_Squirrel_GeoLLM_Predictions.html
