In [1]:
import pandas as pd
import texture
from texture.models import DatasetSchema, Column, DerivedSchema

In [2]:
df_main = pd.read_parquet("data/1_main.parquet")
df_word = pd.read_parquet("data/2_words.parquet")
df_topics = pd.read_parquet("data/3_topics.parquet")

In [3]:
def get_embedding(text):
    from openai import OpenAI

    client = OpenAI()

    text = text.replace("\n", " ")
    return (
        client.embeddings.create(input=[text], model="text-embedding-3-small")
        .data[0]
        .embedding
    )

In [4]:
# schema, load_tables = texture.preprocess(df_main)

In [5]:
load_tables = {
    "main": df_main,
    "words_table": df_word,
    "topics_table": df_topics,
}

schema = DatasetSchema(
    name="main",
    columns=[
        Column(name="text", type="text"),
        Column(
            name="word",
            type="categorical",
            derivedSchema=DerivedSchema(
                is_segment=True,
                table_name="words_table",
                derived_from="text",
                derived_how=None,
            ),
        ),
        Column(
            name="main_topics",
            type="categorical",
            derivedSchema=DerivedSchema(
                is_segment=False,
                table_name="topics_table",
                derived_from=None,
                derived_how=None,
            ),
        ),
        Column(
            name="number_topics",
            type="number",
        ),
        Column(
            name="Q0_Does this text discuss educational programs or social initiatives?",
            type="number",
        ),
        Column(
            name="Q1_Does this text condemn violence or advocate for peace?",
            type="number",
        ),
        Column(
            name="Q2_Is this text focused on remembering victims of violence or atrocities?",
            type="number",
        ),
        Column(
            name="Q3_Does this text advocate against injustice, racism, or hatred?",
            type="number",
        ),
        Column(
            name="Q4_Is this text about appreciating law enforcement officers?",
            type="number",
        ),
        Column(
            name="Q5_Does this text advocate for women's rights or choices?",
            type="number",
        ),
        Column(
            name="Q6_Is the text focused on the welfare or rights of children?",
            type="number",
        ),
        Column(
            name="Q7_Does this text discuss political advocacy, viewpoints, or accountability?",
            type="number",
        ),
        Column(
            name="Q8_Does this text address issues related to health, safety, welfare, or well-being?",
            type="number",
        ),
        Column(
            name="Q9_Is the focus of this text on environmental or energy issues and policies?",
            type="number",
        ),
        Column(
            name="Q10_Does this text deal with economic policies, financial matters, or strategies affecting communities or individuals?",
            type="number",
        ),
        Column(
            name="Q11_Is this text concerned with national security, defense policies, or military affairs?",
            type="number",
        ),
        Column(
            name="Q12_Does the text discuss healthcare improvements, welfare, or related issues?",
            type="number",
        ),
        Column(
            name="Q13_Does this text discuss legislative efforts, actions, or conflicts within political parties?",
            type="number",
        ),
        # Column(name="Q0_rationale", type="text"),
        # Column(name="Q1_rationale", type="text"),
        # Column(name="Q2_rationale", type="text"),
        # Column(name="Q3_rationale", type="text"),
        # Column(name="Q4_rationale", type="text"),
        # Column(name="Q5_rationale", type="text"),
        # Column(name="Q6_rationale", type="text"),
        # Column(name="Q7_rationale", type="text"),
        # Column(name="Q8_rationale", type="text"),
        # Column(name="Q9_rationale", type="text"),
        # Column(name="Q10_rationale", type="text"),
        # Column(name="Q11_rationale", type="text"),
        # Column(name="Q12_rationale", type="text"),
        # Column(name="Q13_rationale", type="text"),
    ],
    primary_key=Column(name="id", type="number"),
    origin="uploaded",
    has_embeddings=True,
    has_projection=True,
    search_result=None,
)

In [6]:
texture.run(
    schema=schema, load_tables=load_tables, create_new_embedding_func=get_embedding
)

Running from a notebook, starting a new process



[1mTexture[0m running on http://localhost:8080



In [7]:
df_main

Unnamed: 0,id,text,Q0_rationale,Q0_Does this text discuss educational programs or social initiatives?,Q1_rationale,Q1_Does this text condemn violence or advocate for peace?,Q2_rationale,Q2_Is this text focused on remembering victims of violence or atrocities?,Q3_rationale,"Q3_Does this text advocate against injustice, racism, or hatred?",...,Q11_rationale,"Q11_Is this text concerned with national security, defense policies, or military affairs?",Q12_rationale,"Q12_Does the text discuss healthcare improvements, welfare, or related issues?",Q13_rationale,"Q13_Does this text discuss legislative efforts, actions, or conflicts within political parties?",vector,umap_x,umap_y,number_topics
0,0,"New year, new chance to sign up for a quality ...",The text promotes signing up for a healthcare ...,0.75,The text encourages individuals to sign up for...,0.75,The text is about signing up for healthcare an...,0.00,"The text promotes signing up for healthcare, w...",0.50,...,The text discusses signing up for healthcare p...,0.00,The text discusses signing up for a healthcare...,1.0,The text encourages individuals to sign up for...,0.00,"[0.0010962822707369924, -0.0016538118943572044...",2.129538,1.749549,6
1,1,It’s been incredible to meet with so many New ...,The text discusses honoring the legacy of Mart...,1.00,The text honors Martin Luther King Jr.'s legac...,1.00,The text honors the legacy of Martin Luther Ki...,0.50,The text explicitly honors the legacy of Marti...,1.00,...,The text focuses on honoring Martin Luther Kin...,0.00,The text focuses on honoring Martin Luther Kin...,0.0,The text commemorates Martin Luther King Jr. D...,0.00,"[0.01976725272834301, -0.030230993404984474, 0...",4.054843,6.293632,5
2,2,Glad to be at the historic opening of the Alie...,The text describes the opening of a community ...,1.00,The text discusses the opening of a community ...,0.75,The text discusses the opening of a community ...,0.00,The text discusses a community service facilit...,0.50,...,The text describes the opening of a community ...,0.00,The text describes the opening of a community ...,0.0,The text describes the opening of a community ...,0.00,"[-0.0038568424060940742, -0.006571371573954821...",3.992207,5.389932,4
3,3,"I obtained $1,800,000 for the Roe Road Extensi...",The text discusses funding for infrastructure ...,1.00,The text focuses on improving evacuation route...,0.75,The text focuses on infrastructure improvement...,0.00,The text focuses on infrastructure improvement...,0.00,...,The text discusses funding for road projects a...,0.25,The text discusses funding for road projects a...,0.0,The text discusses funding for infrastructure ...,0.75,"[0.04909468814730644, -0.046484727412462234, 0...",4.327329,4.546385,6
4,4,The fatal beating of Tyre Nichols is horrifyin...,The text addresses the issue of police brutali...,1.00,The text explicitly condemns violence and call...,1.00,The text explicitly addresses the fatal beatin...,1.00,The text condemns the brutal beating of Tyre N...,1.00,...,The text expresses concern over police brutali...,0.00,The text expresses concern over police brutali...,0.0,The text expresses outrage over a violent inci...,0.00,"[0.03896353021264076, 0.054922595620155334, -0...",3.222133,6.252816,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,Reminder: every single Republican voted agains...,The text discusses a political issue related t...,0.00,The text criticizes Republican actions regardi...,0.50,The text discusses Republicans voting against ...,0.00,The text highlights the negative impact of Rep...,1.00,...,The text discusses healthcare policies and the...,0.00,The text discusses Republicans voting against ...,1.0,The text discusses how every Republican voted ...,1.00,"[0.005925746634602547, -0.0006595111335627735,...",1.499137,2.155650,6
96,96,"On International Holocaust Remembrance Day, we...",This text highlights a legislative effort to h...,1.00,The text commemorates Holocaust victims and ad...,1.00,This text explicitly focuses on remembering th...,1.00,This text commemorates the Holocaust and empha...,1.00,...,The text focuses on Holocaust remembrance and ...,0.00,The text focuses on remembrance and honoring i...,0.0,The text focuses on a legislative effort to ho...,1.00,"[-0.027606455609202385, 0.008372863754630089, ...",3.286500,7.092406,7
97,97,As Oregon and our country grapples with skyroc...,The text discusses an innovative housing proje...,1.00,The text discusses innovative housing solution...,1.00,The text addresses the housing crisis and inno...,0.00,The text discusses innovative housing solution...,0.75,...,The text addresses housing costs and innovativ...,0.00,The text discusses an innovative housing proje...,0.5,The text describes a project related to housin...,0.50,"[0.02520018257200718, -0.0113333435729146, 0.0...",4.434765,4.618957,7
98,98,Great visit at Henry County Medical Center spe...,The text mentions a visit related to a Rural H...,0.75,The text focuses on healthcare discussions and...,0.50,The text discusses a visit to a medical center...,0.00,"While the text focuses on rural health care, i...",0.50,...,The text discusses rural health care and commu...,0.00,The text mentions a Rural Health Agenda aimed ...,1.0,The text mentions a visit related to a Rural H...,0.50,"[0.005383396986871958, -0.007172237150371075, ...",3.839167,4.989978,5
