In [None]:
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "texture-viz",
#     "sentence_transformers"
# ]
# ///

In [1]:
import pandas as pd
import texture
from texture.models import DatasetSchema, Column, DerivedSchema

In [2]:
P = "https://raw.githubusercontent.com/cmudig/Texture/main/examples/vis_papers/data/"
df_main = pd.read_parquet(P + "1_main.parquet")
df_words = pd.read_parquet(P + "2_words.parquet")
df_authors = pd.read_parquet(P + "3_authors.parquet")
df_keywords = pd.read_parquet(P + "4_keywords.parquet")

load_tables = {
    "main_table": df_main,
    "words_table": df_words,
    "authors_table": df_authors,
    "keywords_table": df_keywords,
}

schema = DatasetSchema(
    name="main_table",
    columns=[
        Column(name="Title", type="text"),
        Column(name="Abstract", type="text"),
        Column(
            name="word",
            type="categorical",
            derivedSchema=DerivedSchema(
                is_segment=True,
                table_name="words_table",
                derived_from="Abstract"
            ),
        ),
        Column(
            name="pos",
            type="categorical",
            derivedSchema=DerivedSchema(
                is_segment=True,
                table_name="words_table",
                derived_from="Abstract"
            ),
        ),
        # hierarchical non-segment
        Column(
            name="author",
            type="categorical",
            derivedSchema=DerivedSchema(
                is_segment=False,
                table_name="authors_table"
            ),
        ),
        Column(
            name="keyword",
            type="categorical",
            derivedSchema=DerivedSchema(
                is_segment=False,
                table_name="keywords_table"
            ),
        ),
        Column(name="Year", type="number"),
        Column(name="Conference", type="categorical"),
        Column(name="PaperType", type="categorical"),
        Column(name="CitationCount_CrossRef", type="number"),
        Column(name="Award", type="categorical"),
    ],
    primary_key=Column(name="id", type="number"),
    has_embeddings=True,
    has_projection=True,
)

def get_embedding(value: str):
    import sentence_transformers
    model = sentence_transformers.SentenceTransformer("all-mpnet-base-v2")
    e = model.encode(value)
    return e

In [3]:
texture.run(
    schema=schema, load_tables=load_tables, create_new_embedding_func=get_embedding
)

# Texture should be running at http://localhost:8080

Running from a notebook, starting a new process


In [5]:
get_embedding("Hello Brown")

array([ 4.81187971e-03,  3.40890065e-02,  3.83866904e-03, -3.45372707e-02,
        5.35184108e-02,  8.90741870e-03, -1.52280834e-02,  7.86621962e-03,
        7.25415489e-03, -4.68423888e-02, -3.26401070e-02, -2.62741093e-02,
        1.70146767e-02, -2.00641248e-02,  1.90118924e-02, -5.31631075e-02,
        1.38144894e-02,  2.41073705e-02, -4.46475111e-03,  9.76850186e-03,
       -1.38927046e-02,  2.59436443e-02,  1.95968021e-02,  3.88439186e-02,
       -3.67355421e-02,  3.20995525e-02, -6.37700362e-03, -1.43815076e-03,
       -1.54120391e-02,  7.41364509e-02, -1.54694626e-02,  8.16696696e-03,
        2.30414770e-03, -1.84669923e-02,  1.90941159e-06, -5.11167273e-02,
       -2.16183215e-02, -5.60364802e-04,  2.39021829e-04, -3.38688716e-02,
        2.45197043e-02, -1.03025474e-02, -2.45668627e-02,  1.23059349e-02,
       -3.90794426e-02,  5.87918796e-02,  3.66950296e-02,  2.90471278e-02,
       -2.33673095e-03,  1.49601605e-02,  2.26830933e-02, -1.26381526e-02,
       -2.27843923e-03, -