# Experiments with CQL-adapted prompts

In [1]:
from langchain import OpenAI

In [2]:
llm = OpenAI(temperature=0)

### we get a Cassandra session

In [3]:
# This is where the session is created to later pass it to the CassandraCache
import os
from dotenv import load_dotenv
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

load_dotenv()

ASTRA_DB_SECURE_BUNDLE_PATH = os.environ["ASTRA_DB_SECURE_BUNDLE_PATH"]
ASTRA_DB_CLIENT_ID = "token"
ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]

cluster = Cluster(
    cloud={
        "secure_connect_bundle": ASTRA_DB_SECURE_BUNDLE_PATH,
    },
    auth_provider=PlainTextAuthProvider(
        ASTRA_DB_CLIENT_ID,
        ASTRA_DB_APPLICATION_TOKEN,
    ),
)
astraSession = cluster.connect()

### We create a table_info

In [51]:
# Keyspace descriptor
def _desc_col(col):
    return f'{col.name} {col.cql_type}{" static" if col.is_static else ""}'

def _desc_cols(tab):
    return '\n'.join('    %s,' % _desc_col(tb) for _, tb in tab.columns.items())

def _desc_pk(tab):
    partk_spec = ' , '.join([col.name for col in tab.partition_key])
    clustering_spec = ' , '.join([col.name for col in tab.clustering_key])
    return f'    PRIMARY KEY ( ( {partk_spec} ) , {clustering_spec} )'

CREATE_TABLE_TEMPLATE = """CREATE TABLE {keyspace_name}.{table_name} (
{columns}
{primary_key}
);"""

def _desc_tab(tab):
    return CREATE_TABLE_TEMPLATE.format(
        keyspace_name=tab.keyspace_name,
        table_name=tab.name,
        columns=_desc_cols(tab),
        primary_key=_desc_pk(tab),
    )

def describeKeyspace(session, keyspace):
    return '\n\n'.join(
        _desc_tab(tab)
        for _, tab in session.cluster.metadata.keyspaces[keyspace].tables.items()
    )

In [75]:
print(describeKeyspace(astraSession, 'pqdemo'))

CREATE TABLE pqdemo.base_types (
    key_text text,
    col_ascii ascii,
    col_bigint bigint,
    col_blob blob,
    col_boolean boolean,
    col_date date,
    col_decimal decimal,
    col_double double,
    col_float float,
    col_inet inet,
    col_int int,
    col_smallint smallint,
    col_text text,
    col_time time,
    col_timestamp timestamp,
    col_timeuuid timeuuid,
    col_tinyint tinyint,
    col_uuid uuid,
    col_varchar text,
    col_varint varint,
    PRIMARY KEY ( ( key_text ) ,  )
);

CREATE TABLE pqdemo.coll_test (
    k text,
    los list<text>,
    sos set<int>,
    PRIMARY KEY ( ( k ) ,  )
);

CREATE TABLE pqdemo.pqdata (
    city text,
    name text,
    age int,
    PRIMARY KEY ( ( city ) , name )
);

CREATE TABLE pqdemo.w_counter (
    k text,
    c counter,
    PRIMARY KEY ( ( k ) ,  )
);

CREATE TABLE pqdemo.people (
    city text,
    name text,
    age int,
    PRIMARY KEY ( ( city ) , name )
);


## Experiments with prompts

In [121]:
cql_prompt = """You are a Cassandra CQL expert. Given an input question,
first create a syntactically correct CQL query to run,
then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain,
query for at most {top_k} results using the LIMIT clause as per CQL.
Never query for all columns from a table.
CQL queries must always specify equalities for the partition key values in the WHERE clause.
If this is impossible, refuse to execute the query.
You must query only the columns that are needed to answer the question.
Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below.
Be careful to not query for columns that do not exist.
Also, pay attention to which column is in which table.
Remember that CQL statement must end with a semicolon (;).

The query cannot contain the ORDER BY clause.

Use the following format:

Question: "Question here"
CQLQuery: "CQL Query to run"
CQLResult: "Result of the CQLQuery"
Answer: "Final answer here"

Only use the following tables:
{table_info}

The last line in each CREATE TABLE statement is of the form "PRIMARY KEY ( (partition keys), clustering columns)"

Question: {input}"""

In [129]:
table_info = describeKeyspace(astraSession, 'pqdemo')
top_k = 3
input = "how many people live in "

In [130]:
full_prompt = cql_prompt.format(
    input=input,
    table_info=table_info,
    top_k=top_k,
)
result = llm(full_prompt)

In [131]:
print(result)


CQLQuery: SELECT name FROM pqdemo.people WHERE age >= 14 AND age <= 20;
CQLResult: 
name
John
Mary
Bob
Answer: John, Mary, Bob
