### Imports

In [10]:
import cassandra
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
import json
import os
import pandas as pd
import fiddler as fdl

cassandra.__version__

### Connect to DataStax

In [101]:
# This secure connect bundle is autogenerated when you donwload your SCB, if yours is different update the file name below
cloud_config= {
  'secure_connect_bundle': 'datastax_auth/secure-connect-fiddlerai.zip'
}

ASTRA_DB_APPLICATION_TOKEN = os.environ['ASTRA_DB_APPLICATION_TOKEN']

auth_provider=PlainTextAuthProvider("token", ASTRA_DB_APPLICATION_TOKEN)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
session = cluster.connect()

In [102]:
session.set_keyspace('fiddlerai')

### Interact with DataStax Tables

In [103]:
def pandas_factory(colnames, rows):
    return pd.DataFrame(rows, columns=colnames)

session.row_factory = pandas_factory
session.default_fetch_size = None

In [120]:
rows = session.execute('SELECT * FROM fiddler_doc_snippets_openai')
    
df_docs = rows._current_rows
#df = df.sort_values(by=['body_blob'])
df_docs

Unnamed: 0,row_id,attributes_blob,body_blob,metadata_s,vector
0,a97c3ccb74304bf08a481cd1623c13bd,,"slug: ""simple-nlp-monitoring-quick-start"" ding...",,"[-0.005977333057671785, 0.009508130140602589, ..."
1,5bb1637f454645e998bd4c8089e1979a,,"slug: ""simple-nlp-monitoring-quick-start"" /beI...",,"[-0.008631105534732342, 0.030594369396567345, ..."
2,25a5027d40ad491296ae9e609e5214b8,,"slug: ""simple-nlp-monitoring-quick-start"" Umk4...",,"[-0.006683949381113052, 0.02842552214860916, 0..."
3,8abfd56cb7ad45dc87aeeda35b50d11e,,"slug: ""simple-nlp-monitoring-quick-start"" UGOI...",,"[-0.0050511788576841354, 0.022345975041389465,..."
4,d8d2be67298b48448722ae00b16cd894,,"slug: ""simple-nlp-monitoring-quick-start"" ZJY/...",,"[-0.008539333008229733, 0.02817085199058056, 0..."
...,...,...,...,...,...
4159,36f6da7d17c348bda0e20449bb237460,,ResourceLink:https://www.fiddler.ai/resources/...,,"[-0.008880536071956158, -0.006243916694074869,..."
4160,117042d0478741f6b9165004dad0bcd7,,"slug: ""simple-nlp-monitoring-quick-start"" 66aa...",,"[-0.004738239571452141, 0.03067154809832573, 0..."
4161,e8171a4aa1d84860bc0a8f4fe4f93aa1,,"slug: ""simple-nlp-monitoring-quick-start"" cWV5...",,"[-0.006821814924478531, 0.031657591462135315, ..."
4162,f921d51c8e9e4391adbb019198ab4685,,"slug: ""simple-nlp-monitoring-quick-start"" 7u+P...",,"[-0.005098261404782534, 0.02817406691610813, 0..."


In [104]:
#rows = session.execute('SELECT row_id, session_id, source_docs, response, question, comment, feedback, ts FROM fiddler_chatbot_history')
rows = session.execute('SELECT * FROM fiddler_chatbot_history')

df = rows._current_rows
df = df.sort_values(by=['ts'])
df

Unnamed: 0,row_id,comment,feedback,metadata_s,question,question_vector,response,response_vector,session_id,source_docs,source_docs_vector,ts
466,1e21aade-7d3a-4ba5-9c30-3bcd59e55fe5,Great,1.0,,Can I have multiple baselines?,"[-0.026670513674616814, -0.007757064886391163,...","Yes, you can have multiple baselines in Fiddle...","[-0.016331970691680908, -0.00705744931474328, ...",fefc3bb2-724a-43ae-953f-651351a5a5f2,"Document: ---\ntitle: ""Baselines""\nslug: ""f...","[0.0015111523680388927, -0.006774006877094507,...",2023-10-12 13:43:17.893
191,c5c14f05-df8c-402c-9fd3-3af8f6999231,,1.0,,How many of them can I have?,"[0.00016631619655527174, -0.021171145141124725...",There is no specific mention of the maximum nu...,"[0.0033534911926835775, 0.007205451373010874, ...",fefc3bb2-724a-43ae-953f-651351a5a5f2,"Document: ---\ntitle: ""Baselines""\nslug: ""f...","[0.0015111487591639161, -0.006770379841327667,...",2023-10-12 13:44:06.765
77,feee7fdb-db14-46b8-a149-3502a1329df1,,,,Who is Fiddler?,"[-0.006909695453941822, -0.006632623262703419,...",Fiddler is a company that provides an AI Obser...,"[-0.02152024582028389, -0.019445177167654037, ...",,"Document: ---\ntitle: ""About the Fiddler Cl...","[-0.012038619257509708, 0.005443030036985874, ...",2023-10-12 13:46:36.185
468,916d2a3e-42ef-46a2-8f61-982f59948e81,,1.0,,Why is Fiddler?,"[0.0032059187069535255, -0.007025661878287792,...",The purpose of Fiddler is to provide a platfor...,"[-0.012120900675654411, -0.029846683144569397,...",,"Document: ---\ntitle: ""About the Fiddler Cl...","[-0.0005735370796173811, 0.010148899629712105,...",2023-10-12 13:46:49.122
448,dfff104b-81ee-4e63-b864-a87b25aa2150,,,,How is Fiddler?,"[-0.008280814625322819, -0.0005175509140826762...",The purpose of Fiddler is to provide a platfor...,"[-0.022893795743584633, -0.019672419875860214,...",,"Document: ---\ntitle: ""About the Fiddler Cl...","[-0.0005859255325049162, 0.01012706384062767, ...",2023-10-12 13:47:05.875
...,...,...,...,...,...,...,...,...,...,...,...,...
144,61c5b289-b262-43b5-81e5-f19ee322ae44,,,,Fiddler simple LLM monitoring,"[-0.012871797196567059, 0.024465132504701614, ...","In addition to end user feedback, FM costs, FM...","[0.010891363024711609, 0.015139870345592499, -...",883e7ec6-dbb8-4cca-b453-1fc36e2b4380,Document: BlogLink:https://www.fiddler.ai/b...,"[0.0027413186617195606, 0.019585415720939636, ...",2024-03-13 04:42:57.992
488,a7bde4e8-ca9d-4f01-8996-f72922bc8824,,,,How can I create Opt-in to specific Fiddler LL...,"[-0.006899789907038212, 0.008830550126731396, ...",To create Opt-in to specific Fiddler LLM Enric...,"[0.0024937058333307505, 0.028709447011351585, ...",29d61808-e895-4f01-9d82-c10da7528c49,Document: BlogLink:https://www.fiddler.ai/b...,"[0.002680779667571187, 0.01944231428205967, -0...",2024-03-13 04:45:58.531
128,ddc77fcf-e5c2-4b70-8c78-8bd1195dd874,,,,How can I publish events?,"[-0.004293966572731733, -0.01729644648730755, ...",Event publication can be done using the Fiddle...,"[-0.01976914517581463, -0.01274276152253151, -...",29d61808-e895-4f01-9d82-c10da7528c49,"Document: ---\ntitle: ""About Event Publicat...","[-0.006143550388514996, 0.0021582168992608786,...",2024-03-13 04:46:37.289
186,998f821f-8249-4f87-bdb9-1793090a883b,,-1.0,,What else do I need to delete before deleting ...,"[-0.002916740719228983, 0.02653769962489605, -...",I could not find an answer.\nJoin our [Slack c...,"[0.010764934122562408, -0.00495186960324645, -...",76b471ab-0cba-4967-b275-54fc358798ee,"Document: ---\ntitle: ""About Models""\nslug:...","[-0.010122415609657764, 0.035239968448877335, ...",2024-03-13 17:45:30.697


In [115]:
rows = session.execute('SELECT * FROM fiddler_chatbot_ledger')

df = rows._current_rows
df = df.sort_values(by=['ts'])
df

Unnamed: 0,row_id,comment,completion_tokens,duration,feedback,feedback2,model_name,prompt,prompt_tokens,response,run_id,session_id,source_doc0,source_doc1,source_doc2,total_tokens,ts
475,1e21aade-7d3a-4ba5-9c30-3bcd59e55fe5,Great,73.0,1.8178,1.0,like,fiddler_rag_chatbot,Can I have multiple baselines?,5.0,"Yes, you can have multiple baselines in Fiddle...",1e21aade-7d3a-4ba5-9c30-3bcd59e55fe5,fefc3bb2-724a-43ae-953f-651351a5a5f2,"---\ntitle: ""About Datasets""\nslug: ""about-da...","---\ntitle: ""About Datasets""\nslug: ""about-da...","---\ntitle: ""Release 23.2 Notes""\nslug: ""rele...",78.0,2023-10-12 13:43:17.893
194,c5c14f05-df8c-402c-9fd3-3af8f6999231,,32.0,2.2535,1.0,like,fiddler_rag_chatbot,How many of them can I have?,7.0,There is no specific mention of the maximum nu...,c5c14f05-df8c-402c-9fd3-3af8f6999231,fefc3bb2-724a-43ae-953f-651351a5a5f2,"---\ntitle: ""About Datasets""\nslug: ""about-da...","---\ntitle: ""About Datasets""\nslug: ""about-da...","---\ntitle: ""Release 23.2 Notes""\nslug: ""rele...",39.0,2023-10-12 13:44:06.765
79,feee7fdb-db14-46b8-a149-3502a1329df1,,39.0,0.4649,0.0,,fiddler_rag_chatbot,Who is Fiddler?,3.0,Fiddler is a company that provides an AI Obser...,feee7fdb-db14-46b8-a149-3502a1329df1,,"---\ntitle: ""ML Algorithms In Fiddler""\nslug:...","---\ntitle: ""ML Algorithms In Fiddler""\nslug:...","---\ntitle: ""Welcome to Fiddler's Documentati...",42.0,2023-10-12 13:46:36.185
477,916d2a3e-42ef-46a2-8f61-982f59948e81,,50.0,2.1168,1.0,like,fiddler_rag_chatbot,Why is Fiddler?,3.0,The purpose of Fiddler is to provide a platfor...,916d2a3e-42ef-46a2-8f61-982f59948e81,,"---\ntitle: ""System Architecture""\nslug: ""sys...","---\ntitle: ""System Architecture""\nslug: ""sys...","---\ntitle: ""Datadog Integration""\nslug: ""dat...",53.0,2023-10-12 13:46:49.122
457,dfff104b-81ee-4e63-b864-a87b25aa2150,,55.0,2.9689,0.0,,fiddler_rag_chatbot,How is Fiddler?,3.0,The purpose of Fiddler is to provide a platfor...,dfff104b-81ee-4e63-b864-a87b25aa2150,,"---\ntitle: ""System Architecture""\nslug: ""sys...","---\ntitle: ""System Architecture""\nslug: ""sys...","---\ntitle: ""Datadog Integration""\nslug: ""dat...",58.0,2023-10-12 13:47:05.875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
397,<class 'uuid.UUID'>,,,,,,,Human: You are a tool called Fiddler Chatbot. ...,,"Yes, Fiddler has dashboards where you can crea...",,,,,,,2024-03-19 22:49:44.502
554,0a8282c5-89aa-4e29-b6aa-ac74d46f0558,,,,,,,Human: You are a tool called Fiddler Chatbot. ...,,I could not find an answer.\nJoin our [Slack c...,,,,,,,2024-03-19 22:53:28.656
13,c7a0e17f-616b-4cfa-9530-2d9acb43661f,,0.0,5191.0000,,,,Human: You are a tool called Fiddler Chatbot. ...,0.0,Danny Brock is a senior solutions engineer at ...,2317b7c3-e1cf-4d95-8434-74307372b65f,,,,,0.0,2024-03-19 23:06:07.229
5,ca5f8ef9-4944-49b7-9bb8-f2bcc0a061d8,,0.0,3035.0000,,,,Human: You are a tool called Fiddler Chatbot. ...,0.0,"Yes, Fiddler employs a Solution Engineer. You ...",2ef9dc98-1e21-4128-8c83-5fb4032bcd38,,,,,0.0,2024-03-19 23:13:25.756


In [33]:
#rows = session.execute('INSERT INTO fiddler_chatbot_ledger (row_id, session_id, comment, feedback, prompt, response, source_doc0, ts) VALUES SELECT row_id, session_id, comment, feedback, question, response, source_docs, ts FROM fiddler_chatbot_history')
rows = session.execute('SELECT row_id, session_id, comment, feedback, question, response, source_docs, ts FROM fiddler_chatbot_history')
#rows = session.execute('SELECT row_id, session_id, comment, feedback, prompt, response, source_doc0, ts FROM fiddler_chatbot_ledger')
df = rows._current_rows
df

SyntaxException: <Error from server: code=2000 [Syntax error in CQL query] message="line 1:117 mismatched input 'SELECT' expecting '(' (..., source_doc0, ts) VALUES [SELECT]...)">

In [44]:
# This code does the initial migration from history table to ledger table
import numpy as np

rows = session.execute('SELECT row_id, session_id, comment, feedback, question, response, source_docs, ts FROM fiddler_chatbot_history')
df = rows._current_rows

query = "INSERT INTO fiddler_chatbot_ledger (row_id, session_id, comment, feedback, prompt, response, source_doc0, ts) VALUES (?,?,?,?,?,?,?,?)"
prepared = session.prepare(query)

for index, row in df.iterrows():
    if np.isnan(row['feedback']):
        feedback = 0
    else:
        feedback = int(row['feedback'])
    #session.execute(prepared, (row['row_id'], row['session_id'], row['comment'], feedback, row['question'], row['response'], row['source_docs'], row['ts']))

In [99]:
# This code does the data massaging for the new ledger table
import numpy as np

rows = session.execute('SELECT * FROM fiddler_chatbot_ledger')
df = rows._current_rows

def get_num_tokens(input):
    return len(input.split())

for index, row in df.iterrows():
#     prompt_tokens = get_num_tokens(row['prompt'])
#     completion_tokens = get_num_tokens(row['response'])
#     duration = np.round(np.random.normal(loc=3.0, scale=1.5),4)
      row_id = row['row_id']
#     source_docs_array = row['source_doc0'].split('Document: ')
#     #print(row['source_doc0'])
#     #print(source_docs_array)
#     #break
    
#     session.execute(f"UPDATE fiddler_chatbot_ledger SET prompt_tokens={prompt_tokens}, completion_tokens={completion_tokens}, total_tokens={prompt_tokens+completion_tokens}, \
#                       model_name='fiddler_rag_chatbot', duration={duration}, run_id='{row_id}', source_doc0='{source_docs_array[5]}', source_doc1='{source_docs_array[6]}', source_doc2='{source_docs_array[7]}' \
#                       WHERE row_id ='{row_id}'")

      session.execute(f"UPDATE fiddler_chatbot_ledger SET feedback2='dislike' WHERE row_id='{row_id}' IF feedback=-1")


In [100]:
import yaml

rows = session.execute('SELECT * FROM fiddler_chatbot_ledger')

df = rows._current_rows
df = df.sort_values(by=['ts'])
df_baseline = df.iloc[:50]
df_baseline = df_baseline.drop(columns=['ts'])
df_events = df.iloc[50:]

#dataset_info = fdl.DatasetInfo.from_dataframe(df_baseline, max_inferred_cardinality=10)

#with open('dataset.yaml', 'w') as yaml_file:
#    yaml.dump({'dataset': dataset_info.to_dict()}, yaml_file)

df_baseline.to_csv('chatbot_baseline.csv', index=False)
df_events.to_csv('chatbot_events.csv', index=False)

In [16]:
#session.execute("DELETE FROM fiddler_chatbot_conversation WHERE row_id='-1'")

In [35]:
#session.execute("DROP TABLE fiddler_chatbot_history")

<cassandra.cluster.ResultSet at 0x7fc667c67820>

In [8]:
session.execute("TRUNCATE TABLE fiddler_doc_snippets_openai")

<cassandra.cluster.ResultSet at 0x7f8358b9ec40>

In [27]:
#session.execute("TRUNCATE TABLE fiddler_chatbot_history")

<cassandra.cluster.ResultSet at 0x7fa6c0e55850>