In [1]:
# !uv pip install dask[dataframe]<2025.0.1 numpy datamapplot umap-learn torch pandas ipykernel

In [2]:
import numpy as np
import datamapplot
import matplotlib.pyplot as plt
import umap
import torch
import pandas as pd

producer_embeddings = torch.load('producer_embeddings.pt', weights_only=False)
producer_communities = np.load('producer_communities.npy')

# Load 2D embeddings if they exist, otherwise create them
try:
    embeddings_2d = np.load('producer_embeddings_2d.npy')
except FileNotFoundError:
    # Reduce dimensionality to 2D using UMAP
    reducer = umap.UMAP(n_components=2, random_state=42)
    embeddings_2d = reducer.fit_transform(producer_embeddings)
    # Save the 2D embeddings
    np.save('producer_embeddings_2d.npy', embeddings_2d)

producer_df = pd.read_parquet('producer_profiles.parquet')
producer_df['bsky_url'] = producer_df['did'].apply(lambda x: f"https://bsky.app/profile/{x}")
producer_df.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,producer_idx,did,handle,display_name,description,followers,following,posts,joined,error,bsky_url
0,0,did:plc:nywb5oene54cllowkfwouxzz,chadloder.bsky.social,Chad Loder,"Community activist, cybersecurity expert, citi...",54792.0,1377.0,10515.0,2023-04-30T18:10:33.161Z,,https://bsky.app/profile/did:plc:nywb5oene54cl...
1,1,did:plc:mnfnfpykrohxbck6av3f7los,blkmatters3000.bsky.social,Michael E Hopson,Organizer. Abolitionist \nHe/him,134.0,23.0,5.0,2023-05-01T22:40:48.536Z,,https://bsky.app/profile/did:plc:mnfnfpykrohxb...
2,2,did:plc:kfdf3ncmu3ekd3yrorivypai,lolennui.bsky.social,Amy Ash,onion lady \n\nhttps://lolennui.com,52062.0,819.0,3329.0,2023-04-27T15:52:02.340Z,,https://bsky.app/profile/did:plc:kfdf3ncmu3ekd...
3,3,did:plc:rwbe4e7d7o3fwwcqkoyjvp4v,rui.bsky.social,Rui,,5196.0,43075.0,29.0,2023-04-11T18:05:18.291Z,,https://bsky.app/profile/did:plc:rwbe4e7d7o3fw...
4,4,did:plc:krt7ulietkhjowpctmv2iphb,paulio.bsky.social,Paulio 🥚,,1730.0,956.0,1138.0,2023-05-02T02:45:41.097Z,,https://bsky.app/profile/did:plc:krt7ulietkhjo...


In [3]:
producer_communities = producer_communities.astype(str)

In [4]:
producer_communities.shape, producer_communities

((37192,), array(['49', '27', '24', ..., '14', '93', '87'], dtype='<U11'))

In [5]:
np.unique(producer_communities)

array(['0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18',
       '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28',
       '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38',
       '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48',
       '49', '5', '50', '51', '52', '53', '54', '55', '56', '57', '58',
       '59', '6', '60', '61', '62', '63', '64', '65', '66', '67', '68',
       '69', '7', '70', '71', '72', '73', '74', '75', '76', '77', '78',
       '79', '8', '80', '81', '82', '83', '84', '85', '86', '87', '88',
       '89', '9', '90', '91', '92', '93', '94', '95', '96', '97', '98',
       '99'], dtype='<U11')

In [6]:
producer_df

Unnamed: 0,producer_idx,did,handle,display_name,description,followers,following,posts,joined,error,bsky_url
0,0,did:plc:nywb5oene54cllowkfwouxzz,chadloder.bsky.social,Chad Loder,"Community activist, cybersecurity expert, citi...",54792.0,1377.0,10515.0,2023-04-30T18:10:33.161Z,,https://bsky.app/profile/did:plc:nywb5oene54cl...
1,1,did:plc:mnfnfpykrohxbck6av3f7los,blkmatters3000.bsky.social,Michael E Hopson,Organizer. Abolitionist \nHe/him,134.0,23.0,5.0,2023-05-01T22:40:48.536Z,,https://bsky.app/profile/did:plc:mnfnfpykrohxb...
2,2,did:plc:kfdf3ncmu3ekd3yrorivypai,lolennui.bsky.social,Amy Ash,onion lady \n\nhttps://lolennui.com,52062.0,819.0,3329.0,2023-04-27T15:52:02.340Z,,https://bsky.app/profile/did:plc:kfdf3ncmu3ekd...
3,3,did:plc:rwbe4e7d7o3fwwcqkoyjvp4v,rui.bsky.social,Rui,,5196.0,43075.0,29.0,2023-04-11T18:05:18.291Z,,https://bsky.app/profile/did:plc:rwbe4e7d7o3fw...
4,4,did:plc:krt7ulietkhjowpctmv2iphb,paulio.bsky.social,Paulio 🥚,,1730.0,956.0,1138.0,2023-05-02T02:45:41.097Z,,https://bsky.app/profile/did:plc:krt7ulietkhjo...
...,...,...,...,...,...,...,...,...,...,...,...
37187,37187,did:plc:3u3pydxppxck3jntgs2wcwhh,arielsonline.bsky.social,ariel,that girl from the other blue app,103.0,37.0,6.0,2023-05-06T15:27:23.628Z,,https://bsky.app/profile/did:plc:3u3pydxppxck3...
37188,37188,did:plc:3zlq22qdm35eswu5nkbkwphc,handle.invalid,,,75.0,0.0,0.0,2023-05-06T22:13:05.448Z,,https://bsky.app/profile/did:plc:3zlq22qdm35es...
37189,37189,did:plc:uh2xhvsj4kcojgenqs5iweqx,haiverart.bsky.social,Haiver,Artistic Director at Alba.art ⏀ Generative Art...,376.0,272.0,103.0,2023-05-17T17:51:50.801Z,,https://bsky.app/profile/did:plc:uh2xhvsj4kcoj...
37190,37190,did:plc:bucblzi2mx576tqo5tuxwynq,thisisradinsky.bsky.social,RADINSKY 🪬🏳️‍⚧️,Anatoliy Osman-Douša 🏳️‍⚧️\nIllustrator &​ Aut...,9805.0,941.0,1792.0,2023-05-14T11:02:35.774Z,,https://bsky.app/profile/did:plc:bucblzi2mx576...


In [7]:
hover_text_template = """
<div style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;">
    <p style="font-weight: 600; font-size: 14px; margin: 0 0 2px 0;">{hover_text}</p>
    <p style="font-size: 14px; color: #666; margin: 0 0 8px 0;">@{handle}</p>
    <p style="font-size: 14px; color: #4A4A4A; margin: 0 0 8px 0;">{description}</p>
    <div style="display: flex; gap: 16px; font-size: 13px; color: #666;">
        <span><b>{followers}</b> followers</span>
        <span><b>{following}</b> following</span>
        <span><b>{posts}</b> posts</span>
    </div>
</div>
"""

# Create the plot
plot = datamapplot.create_interactive_plot(
    embeddings_2d, 
    producer_communities,
    hover_text=producer_df['display_name'].to_list(),
    extra_point_data=producer_df[['handle','description', 'followers', 'following', 'bsky_url', 'posts']].fillna(''),
    hover_text_html_template=hover_text_template,
    on_click="window.open(hoverData.bsky_url[index], '_blank')",
    enable_search=True,
    search_field="description"
)

# Print some basic statistics about the embeddings
print(f"Original embedding shape: {producer_embeddings.shape}")
print(f"2D embedding shape: {embeddings_2d.shape}")
print(f"Number of posts: {len(producer_embeddings)}")
plot.save('producer_embeddings.html')

Original embedding shape: (37192, 64)
2D embedding shape: (37192, 2)
Number of posts: 37192
