In [604]:
import pandas as pd
from globals import BASE_DIR
import json
import geopandas as gpd
from shapely.geometry import Point
import folium
from folium import FeatureGroup
from folium.plugins import MarkerCluster

# dataset = "snowcard"
available_datasets = ["snowcard", "yelp", "foursquaretky", "brightkite", "gowalla"]
available_datasets = ["gowalla"]
top_k_eval = 10





In [605]:
def unstack_recommendations(df):
    # Repeat each user_id for the length of their item_id:token list
    unstacked_df = df.explode(["item_id:token"]).reset_index(drop=True)
    return unstacked_df

In [606]:
def top_k_to_df(recommender_dir, top_k_eval=top_k_eval):
    # Load the JSON data
    with open(recommender_dir) as f:
        data = json.load(f)

    # Initialize a list to store the recommendations
    base_recommendations = []

    # Iterate through the data and structure it for DataFrame
    for user, items in data.items():
        for item in items:
            base_recommendations.append({
                "user_id:token": user,
                "item_id:token": item
            })

    # Create a DataFrame from the structured recommendations
    base_df = pd.DataFrame(base_recommendations)

    # Assuming `unstack_recommendations` is a function you want to apply here:
    base_df = unstack_recommendations(base_df)

    df = base_df.groupby('user_id:token').head(top_k_eval)

    return df

In [607]:
def group_user_events(user_events, user_groups):
    stats = {}
    for group in user_groups.keys():
        stats[group] = {}
        user_events_group = user_events.copy()
        user_events_group = user_events_group.loc[user_events_group['user_id:token'].isin(user_groups[group])]
        user_dist = user_events_group["user_id:token"].value_counts()
        stats[group]["num_users"] = user_dist.shape[0]
        stats[group]["mean_checkins"] = user_events_group["user_id:token"].value_counts().mean()
        stats[group]["min_checkins"] = user_events_group["user_id:token"].value_counts().min()
        stats[group]["max_checkins"] = user_events_group["user_id:token"].value_counts().max()
        item_dist = user_events_group["item_id:token"].value_counts()
        stats[group]["num_items"] = item_dist.shape[0]
        stats[group]["sparsity"] = 1 - len(user_events_group) / (len(user_events_group["user_id:token"].unique()) * len(user_events_group["item_id:token"].unique()))

    return stats



In [608]:
full_stats = {}
for dataset in available_datasets:
    # Stays the same across all models 
    train_data = pd.read_csv(f"{BASE_DIR}{dataset}_dataset/processed_data_recbole/{dataset}_sample.train.inter", sep="\t")
    test_data = pd.read_csv(f"{BASE_DIR}{dataset}_dataset/processed_data_recbole/{dataset}_sample.test.inter", sep="\t")
    valid_data = pd.read_csv(f"{BASE_DIR}{dataset}_dataset/processed_data_recbole/{dataset}_sample.valid.inter", sep="\t")

    train_data = pd.concat([train_data, valid_data])
    user_group_dir = f"{BASE_DIR}{dataset}_dataset/{dataset}_user_id_popularity.json"
    with open(user_group_dir) as f:
        user_groups = json.load(f)

    poi_data = pd.read_csv(f"{BASE_DIR}{dataset}_dataset/processed_data_capri/poiCoos.txt", sep="\t", header=None, names=["item_id:token", "lat:float", "lon:float"])
    poi_data["item_id:token"] = poi_data["item_id:token"].astype(str) + "_x"

    all_user_ids = (
    set(user_groups["high"])
    | set(user_groups["medium"])
    | set(user_groups["low"])
    )

    value_counts = train_data["item_id:token"].value_counts().reset_index()
    value_counts.columns = ["item_id:token", "count"]
    value_counts["item_pop"] = value_counts["count"] / len(value_counts)
    train_data = train_data.merge(
        value_counts[["item_id:token", "item_pop"]],
        on="item_id:token",
        how="left",
    )

    user_events = pd.concat([train_data, test_data])

#### Turn the POI data into a Geodataframe and perform filtering

In [609]:
def geolocation_poi_filter(poi_data, coordinates=(18.0680, 59.3293) , radius=20000, train_data=train_data):

    gdf = gpd.GeoDataFrame(
        poi_data,
        geometry=gpd.points_from_xy(poi_data["lon:float"], poi_data["lat:float"]),
        crs="EPSG:4326"  # WGS84
    )

    gdf = gdf.to_crs("EPSG:3857")
    stockholm_point = gpd.GeoSeries(
        [Point(coordinates)], crs="EPSG:4326"
    ).to_crs("EPSG:3857")
    buffer = stockholm_point.buffer(radius)  # 20 km buffer default

    filtered_gdf = gdf[gdf.geometry.within(buffer.unary_union)]
    filtered_gdf = filtered_gdf.to_crs("EPSG:4326")

    relevant_items = filtered_gdf["item_id:token"].unique().tolist()

    relevant_train_data = train_data[train_data["item_id:token"].isin(relevant_items)]

    relevant_users = relevant_train_data["user_id:token"].unique().tolist()

    filtered_gdf = pd.merge(filtered_gdf, relevant_train_data[["item_id:token", "item_pop"]], on="item_id:token", how="left")
    filtered_gdf = filtered_gdf.drop_duplicates(subset=["item_id:token"])

    return filtered_gdf, relevant_train_data, relevant_users



    



In [610]:
coordinates=(18.0686, 59.3290)

filtered_gdf, relevant_train_data, relevant_users = geolocation_poi_filter(poi_data, coordinates, train_data=train_data)

  filtered_gdf = gdf[gdf.geometry.within(buffer.unary_union)]


# User Groups

### Stockholm Sample
(Gowalla + coordinates=(18.0686, 59.3293) & radius=20000)
* user g1 = 481_x
* user g2 = 1023_x
* user g3 = 130_x

* BPR: gowalla_sample-BPR-Dec-19-2024_20-23-30/baseline
* BPR CP: gowalla_sample-BPR-Dec-19-2024_20-23-30/cp
* BPR UPD: gowalla_sample-BPR-Dec-19-2024_20-23-30/upd
* LORE: gowalla_sample-contextpoi-LORE-Sep-16-2024_09-00-00/baseline

In [611]:
# training data for the 3 users

train_g1 = relevant_train_data.loc[relevant_train_data["user_id:token"] == "481_x"]
train_g2 = relevant_train_data.loc[relevant_train_data["user_id:token"] == "1023_x"]
train_g3 = relevant_train_data.loc[relevant_train_data["user_id:token"] == "130_x"]

In [612]:
# baseline recommendations for the 3 users
baseline = top_k_to_df(f"{BASE_DIR}{dataset}_dataset/recommendations/{dataset}_sample-BPR-Dec-19-2024_20-23-30/baseline/top_k_recommendations.json")
cp = top_k_to_df(f"{BASE_DIR}{dataset}_dataset/recommendations/{dataset}_sample-BPR-Dec-19-2024_20-23-30/cp/top_k_recommendations.json")
upd = top_k_to_df(f"{BASE_DIR}{dataset}_dataset/recommendations/{dataset}_sample-BPR-Dec-19-2024_20-23-30/upd/top_k_recommendations.json")
context = top_k_to_df(f"{BASE_DIR}{dataset}_dataset/recommendations/{dataset}_sample-contextpoi-LORE-Sep-16-2024_09-00-00/baseline/top_k_recommendations.json")


In [613]:
user_samples = {"g1": "481_x", "g2": "1023_x", "g3": "130_x"}
for key, item in user_samples.items():
    print(key)

g1
g2
g3


In [614]:
relevant_train_data

Unnamed: 0,user_id:token,item_id:token,checkin_count:float,item_pop
418,1016_x,1125_x,1,0.001357
419,1016_x,1652_x,1,0.001764
420,1016_x,1222_x,2,0.001629
421,1016_x,919_x,1,0.004615
422,1016_x,1207_x,1,0.000814
...,...,...,...,...
42330,99_x,4200_x,1,0.000679
42331,99_x,1685_x,1,0.003393
42339,99_x,5134_x,1,0.000407
42340,99_x,1818_x,1,0.000271


In [615]:
def data_prep_visualization(filtered_gdf, inters, user_samples):
    data_list = []
    for user_group, user_id in user_samples.items():
        user_inters = inters.loc[inters["user_id:token"] == user_id]
        relevant_items = filtered_gdf.loc[filtered_gdf["item_id:token"].isin(user_inters["item_id:token"])]
        relevant_items.reset_index(drop=True, inplace=True)
        data_list.append(relevant_items)
    return data_list


    



In [616]:
training_data_list = data_prep_visualization(filtered_gdf, relevant_train_data, user_samples)
baseline_list = data_prep_visualization(filtered_gdf, baseline, user_samples)
cp_list = data_prep_visualization(filtered_gdf, cp, user_samples)
upd_list = data_prep_visualization(filtered_gdf, upd, user_samples)
context_list = data_prep_visualization(filtered_gdf, context, user_samples)

In [617]:
# train_items_g1 = filtered_gdf.loc[filtered_gdf["item_id:token"].isin(train_g1["item_id:token"])]
# train_items_g2 = filtered_gdf.loc[filtered_gdf["item_id:token"].isin(train_g2["item_id:token"])]
# train_items_g3 = filtered_gdf.loc[filtered_gdf["item_id:token"].isin(train_g3["item_id:token"])]

In [618]:
user_samples

{'g1': '481_x', 'g2': '1023_x', 'g3': '130_x'}

In [619]:
list(user_samples.values())

['481_x', '1023_x', '130_x']

In [620]:
color_scheme = ["darkpurple", "blue", "green"]

In [621]:
# Initialize the map
m = folium.Map(location=(coordinates[1], coordinates[0]), zoom_start=12)

# Define feature groups for each user group and general data categories
layer_groups = {
    "g1": folium.FeatureGroup(name="Group 1 (g1)").add_to(m),
    "g2": folium.FeatureGroup(name="Group 2 (g2)").add_to(m),
    "g3": folium.FeatureGroup(name="Group 3 (g3)").add_to(m),
    "Training Data": folium.FeatureGroup(name="All Training Data").add_to(m),
    "Baseline Recommendations": folium.FeatureGroup(name="Baseline Recommendations").add_to(m),
    "CP Recommendations": folium.FeatureGroup(name="CP Recommendations").add_to(m),
    "UPD Recommendations": folium.FeatureGroup(name="UPD Recommendations").add_to(m),
    "Context-Aware Recommendations": folium.FeatureGroup(name="Context-Aware Recommendations").add_to(m),
}

# Function to add markers for a given dataset and group
def add_markers_to_layers(data_list, data_type, icon, color_scheme):
    for i, group in enumerate(data_list):
        user_group_key = list(user_samples.keys())[i]  # Get the user group key (e.g., 'g1')
        user_id = user_samples[user_group_key]         # Get the corresponding user ID (e.g., '481_x')

        for rank, row in group.iterrows():
            item_pop_percentage = f"{row['item_pop'] * 100:.4f}%"  # Format as a percentage
            popup_content = f"""
                <b>Recommendations for user:</b> {user_id} <br>
                <b>POI ID:</b> {row['item_id:token']}<br>
                <b>POI Popularity:</b> {item_pop_percentage}<br>
                <b>Data Type:</b> {data_type}<br>
                <b>User Group:</b> {user_group_key} <br>
                <b>Item Rank:</b> Position {rank} <br>
            """
            # Add marker to the specific user group layer
            folium.Marker(
                location=(row.geometry.y, row.geometry.x),  # (latitude, longitude)
                popup=folium.Popup(popup_content, max_width=300),
                icon=folium.Icon(color=color_scheme[i], icon=icon, prefix="glyphicon")
            ).add_to(layer_groups[user_group_key])

            # Also add marker to the general data layer (e.g., "Baseline Recommendations")
            folium.Marker(
                location=(row.geometry.y, row.geometry.x),  # (latitude, longitude)
                popup=folium.Popup(popup_content, max_width=300),
                icon=folium.Icon(color=color_scheme[i], icon=icon, prefix="glyphicon")
            ).add_to(layer_groups[data_type])

# Add training data markers
add_markers_to_layers(training_data_list, "Training Data", "glyphicon-user", color_scheme)

# Add baseline data markers
add_markers_to_layers(baseline_list, "Baseline Recommendations", "star", color_scheme)

# Add CP recommendations markers
add_markers_to_layers(cp_list, "CP Recommendations", "glyphicon-repeat", color_scheme)

# Add UPD recommendations markers
add_markers_to_layers(upd_list, "UPD Recommendations", "glyphicon-random", color_scheme)

# Add context-aware recommendations markers
add_markers_to_layers(context_list, "Context-Aware Recommendations", "glyphicon-eye-open", color_scheme)

# Add the 20 km buffer as a circle
folium.Circle(
    location=coordinates,  # Stockholm center
    radius=20000,  # 20 km in meters
    color="red",
    fill=True,
    fill_opacity=0.2
).add_to(m)

# Add LayerControl to toggle visibility of layers
folium.LayerControl().add_to(m)

# Save or display the map
m.save("stockholm_map_with_groups_and_filters.html")
m


In [622]:
import folium
from folium.plugins.treelayercontrol import TreeLayerControl

# Initialize the map
m = folium.Map(location=[coordinates[1], coordinates[0]], zoom_start=12)

def create_layer_structure(group_key, group_name, group_color, data_lists, data_labels, data_icons):
    group_layer = {"label": group_key, "select_all_checkbox": True, "children": []}
    
    # Iterate through each DataFrame in the provided lists for the current group
    for data_list, data_label, data_icon in zip(data_lists, data_labels, data_icons):
        sublayer = folium.FeatureGroup(name=f"{data_label} ({group_name})")
        
        # Iterate through the DataFrame corresponding to the current user group
        for df in data_list:  
            for _, row in df.iterrows():
                item_pop_percentage = f"{row['item_pop'] * 100:.4f}%"  # Format as a percentage
                popup_content = f"""
                    <b>Recommendations for user:</b> {user_samples[group_key]} <br>
                    <b>POI ID:</b> {row['item_id:token']}<br>
                    <b>POI Popularity:</b> {item_pop_percentage}<br>
                    <b>Data Type:</b> {data_label}<br>
                    <b>User Group:</b> {group_key} <br>
                """
                folium.Marker(
                    location=(row.geometry.y, row.geometry.x),
                    popup=folium.Popup(popup_content, max_width=300),
                    icon=folium.Icon(color=group_color, icon=data_icon, prefix="glyphicon"),
                ).add_to(sublayer)
        
        group_layer["children"].append({"label": data_label, "layer": sublayer})
        sublayer.add_to(m)
    
    return group_layer


# Define user groups, colors, and data
user_samples = {"g1": "481_x", "g2": "1023_x", "g3": "130_x"}
color_scheme = ["darkpurple", "blue", "green"]
data_lists = [training_data_list, baseline_list, cp_list, upd_list, context_list]
data_labels = ["Training Data", "Baseline", "CP", "UPD", "Context-Aware"]
data_icons = ["glyphicon glyphicon-cog", "glyphicon glyphicon-star-empty", "glyphicon-repeat", "glyphicon-random", "glyphicon-eye-open"]

# Create overlay tree structure
overlay_tree = {"label": "User Groups", "select_all_checkbox": "Select/Deselect All", "children": []}

# Iterate through each user group and process their corresponding data
for group_idx, (group_key, group_name, group_color) in enumerate(zip(user_samples.keys(), user_samples.values(), color_scheme)):
    # Select the DataFrame for the current user group from each list
    data_lists_for_group = [
        [training_data_list[group_idx]],  # Training data for the current group
        [baseline_list[group_idx]],      # Baseline recommendations for the current group
        [cp_list[group_idx]],            # CP recommendations for the current group
        [upd_list[group_idx]],           # UPD recommendations for the current group
        [context_list[group_idx]],       # Context-aware recommendations for the current group
    ]
    
    # Create and append the layer structure for this user group
    overlay_tree["children"].append(
        create_layer_structure(group_key, group_name, group_color, data_lists_for_group, data_labels, data_icons)
    )

# Add 20 km buffer as a circle (adjust coordinates as needed)
folium.Circle(
    location=coordinates,
    radius=20000,  # 20 km in meters
    color="red",
    fill=False,
    fill_opacity=0.2,
).add_to(m)

# Add TreeLayerControl for filtering layers
TreeLayerControl(overlay_tree=overlay_tree).add_to(m)

# Create and add a custom legend for user groups and algorithms
legend_html = """
<div style="position: fixed; 
            bottom: 50px; left: 50px; width: 190px; height: 250px;
            background-color: white; border: 2px solid grey; z-index: 9999; font-size: 14px;
            padding: 10px;">
    <b>User Groups</b><br>
    <div><i class="glyphicon glyphicon-user" style="color: purple;"></i> Group 1 - User 481_x</div>
    <div><i class="glyphicon glyphicon-user" style="color: blue;"></i> Group 2 - User 1023_x</div>
    <div><i class="glyphicon glyphicon-user" style="color: green;"></i> Group 3 - User 130_x</div><br>
    <b>Algorithms:</b><br>
    <div><i class="glyphicon glyphicon-cog" style="color: black;"></i> Training Data</div>
    <div><i class="glyphicon glyphicon-star-empty" style="color: black;"></i> Baseline</div>
    <div><i class="glyphicon glyphicon-repeat" style="color: black;"></i> CP</div>
    <div><i class="glyphicon glyphicon-random" style="color: black;"></i> UPD</div>
    <div><i class="glyphicon glyphicon-eye-open" style="color: black;"></i> Context-Aware</div>
</div>
"""

m.get_root().html.add_child(folium.Element(legend_html))

# Save or display the map
m.save("stockholm_map.html")
m
