In [1]:
from dotenv import load_dotenv
import os
from google import genai
from google.genai import types

load_dotenv()

api_key = os.getenv("GOOGLE_API_KEY")
client = genai.Client(api_key=api_key)




In [8]:
result = client.models.embed_content(
        model="gemini-embedding-exp-03-07",
        contents='I am a young man',
        config=types.EmbedContentConfig(task_type='RETRIEVAL_DOCUMENT', output_dimensionality=768)

    )

In [9]:
len(result.embeddings[0].values)

768

In [5]:
os.path.exists(data_url)

True

In [7]:
import os
from pathlib import Path
import boto3
from dotenv import load_dotenv

load_dotenv()


def get_app_dir():
    """Get the application directory path"""
    app_dir = Path(os.path.expanduser("~")) / ".brain_in_a_vat"
    app_dir.mkdir(parents=True, exist_ok=True)
    return app_dir

db_path = str(get_app_dir() / "memory_db")
print(f"db_path exists: {os.path.exists(db_path)}, readable: {os.access(db_path, os.R_OK)}")
zip_path = get_app_dir() / f"memory_db_960e287d-a044-480c-bb6d-d4a91d7d4521.zip"
print(f"zip_path parent writable: {os.access(zip_path.parent, os.W_OK)}")

s3_client = boto3.client('s3',
            aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
            aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY')
        )
bucket_name = os.getenv('AWS_BUCKET_NAME')
s3_client.upload_file(
                str(zip_path),
                bucket_name,
                f"memory_db/960e287d-a044-480c-bb6d-d4a91d7d4521/memory_db.zip",
            )


db_path exists: True, readable: True
zip_path parent writable: True


In [16]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import datetime

# 1. Generate Dummy Data
np.random.seed(42) # for reproducibility
num_points = 10

# Generate points somewhat clustered to show density better
# Cluster 1
vectors_2d_c1 = np.random.normal(loc=[3,3], scale=1, size=(num_points // 2, 2))
# Cluster 2
vectors_2d_c2 = np.random.normal(loc=[7,7], scale=1.2, size=(num_points - num_points // 2, 2))
vectors_2d = np.vstack([vectors_2d_c1, vectors_2d_c2])


contents = [f"Topic {i+1}\nDetail Line {chr(97+i)}" for i in range(num_points)]
# Convert newlines to <br> for Plotly annotations
contents_html = [c.replace('\n', '<br>') for c in contents]

timestamps = [datetime.datetime.now() - datetime.timedelta(days=np.random.randint(1, 30)) for i in range(num_points)]
timestamps_str = [ts.strftime('%Y-%m-%d %H:%M') for ts in timestamps]

df = pd.DataFrame({
    'x': vectors_2d[:, 0],
    'y': vectors_2d[:, 1],
    'content_html': contents_html,
    'content': contents, # for hover if needed, not used in this version
    'timestamp': timestamps_str
})

# 2. Create Density Contour Plot
# Using a lower nbins for sparse data to get smoother contours
fig = px.density_contour(df, x='x', y='y', nbinsx=5, nbinsy=5,)

# Style the contour trace: filled, blue colorscale, no color bar
fig.update_traces(
    contours_coloring='fill',
    colorscale='Blues', # Or a custom list of blue shades
    contours_showlabels=False, # Hide contour line value labels
    selector=dict(type='histogram2dcontour'),
    opacity=0.7
)
# Hide the colorscale bar for the density contour
fig.data[0].showscale = False

fig.add_trace(px.scatter(df, x='x', y='y',
                        hover_data=['content', 'timestamp'],
                        opacity=0.7,
                        color_discrete_sequence=['#222']).data[0])

# 5. Layout and Styling
fig.update_layout(
    title_text='Memory Embeddings Visualization',
    title_x=0.5, # Center title
    template='plotly_white', # Clean background
    showlegend=False,        # No legend for contours or scatter points
    
    # Hide axes completely for a minimalist look like the example
    xaxis=dict(
        showgrid=False,
        zeroline=False,
        visible=False # Hide ticks, line, labels
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        visible=False # Hide ticks, line, labels
    ),
    
    # Adjust margins to give some space
    margin=dict(l=20, r=20, t=50, b=20),
    
)

fig.show()

In [23]:
import textwrap

# import display html
from IPython.display import HTML
def wrap_text(text, width=100):
    """Wrap text with <br> tags for Plotly annotations."""
    return "<br>".join(textwrap.wrap(text, width))
wrap_text('FastMCP allows you to add specialized metadata to your tools through annotations. These annotations communicate how tools behave to client applications without consuming token context in LLM prompts.')

display(HTML(wrap_text('FastMCP allows you to add specialized metadata to your tools through annotations. These annotations communicate how tools behave to client applications without consuming token context in LLM prompts.')))


In [18]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import datetime
from sklearn.cluster import KMeans
from scipy.spatial import distance 


cluster_number = 5
# 2. Perform Clustering
kmeans = KMeans(n_clusters=cluster_number, random_state=42, n_init='auto')
df['cluster'] = kmeans.fit_predict(vectors_2d)
cluster_centers = kmeans.cluster_centers_

# 3. Find Closest Point to each Cluster Center
closest_points_indices = []
for i in range(len(cluster_centers)):
    center = cluster_centers[i]
    points_in_cluster = df[df['cluster'] == i][['x', 'y']].values
    
    if len(points_in_cluster) == 0: # Should not happen with this data
        continue
        
    # Calculate distances from the center to all points in this cluster
    distances_to_center = [distance.euclidean(point, center) for point in points_in_cluster]
    
    # Find the index of the minimum distance *within the subset of points_in_cluster*
    min_dist_idx_in_subset = np.argmin(distances_to_center)
    
    # Get the original index from the main dataframe
    original_idx = df[df['cluster'] == i].index[min_dist_idx_in_subset]
    closest_points_indices.append(original_idx)

df_closest_points = df.loc[closest_points_indices]
print("DataFrame of points closest to cluster centers:")
print(df_closest_points[['x', 'y', 'content', 'cluster']])

# 4. Create Plot
# Base density contour
fig = px.density_contour(df, x='x', y='y',
                         nbinsx=cluster_number * 2, nbinsy=cluster_number * 2) # nbinsx/y from previous good version

# Style the contour trace
fig.update_traces(
    contours_coloring='fill',
    colorscale='Blues',
    contours_showlabels=False,
    opacity=0.6, # Slightly more transparent to see points better
    selector=dict(type='histogram2dcontour')
)
if len(fig.data) > 0 and isinstance(fig.data[0], go.Histogram2dContour):
    fig.data[0].showscale = False

# Add ALL scatter points (not just the closest ones)
# The user's snippet used px.scatter().data[0], which is a way to add a trace
# Let's use go.Scatter for more direct control and consistency
fig.add_trace(go.Scatter(
    x=df['x'],
    y=df['y'],
    mode='markers',
    marker=dict(
        color='#222222', # Dark markers
        size=7,
        opacity=0.7
    ),
    # Add hover data for ALL points
    customdata=df[['content', 'timestamp']],
    hovertemplate="<b>Content:</b> %{customdata[0]}<br><b>Timestamp:</b> %{customdata[1]}<extra></extra>"
))


# Add Text Labels with Boxes ONLY for the closest points
for i, row in df_closest_points.iterrows():
    fig.add_annotation(
        x=row['x'],
        y=row['y'],
        text=row['content_html'], # Use HTML formatted content
        showarrow=False,
        xanchor="left",
        yanchor="bottom",
        xshift=7,
        yshift=7,
        font=dict(family="Arial, Sans-serif", size=10, color="#111111"),
        align="left",
        bordercolor="#777777",
        borderwidth=1,
        borderpad=4,
        bgcolor="rgba(255, 255, 255, 0.9)", # Slightly more opaque box
        opacity=1
    )

# Layout and Styling (similar to previous good version)
fig.update_layout(
    title_text='Memory Embeddings Visualization',
    title_font_size=16,
    title_x=0.5,
    template='plotly_white',
    showlegend=False,
    xaxis=dict(showgrid=False, zeroline=False, visible=False),
    yaxis=dict(showgrid=False, zeroline=False, visible=False),
    margin=dict(l=20, r=20, t=60, b=20),
    hovermode='closest' # Enable hover for the scatter trace
)

DataFrame of points closest to cluster centers:
          x         y                 content  cluster
3  4.579213  3.767435  Topic 4\nDetail Line d        0
6  7.290355  4.704064  Topic 7\nDetail Line g        1
2  2.765847  2.765863  Topic 3\nDetail Line c        2
5  6.443899  6.441124  Topic 6\nDetail Line f        3
7  4.930099  6.325255  Topic 8\nDetail Line h        4
