In [1]:
!pip install python-dotenv gradio openai cassandra-driver

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Collecting gradio
  Downloading gradio-3.50.2-py3-none-any.whl (20.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.3/20.3 MB[0m [31m74.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cassandra-driver
  Downloading cassandra_driver-3.28.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.1/19.1 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.104.1-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.

In [2]:
import os
import io
import IPython.display
from PIL import Image
import base64
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

# set parameters for AstraDB
astradb_token= os.environ['ASTRA_TOKEN']
astradb_secure_bundle_path= os.environ['SECURE_CONNECT_BUNDLE_PATH']
api_key = os.environ['OPENAI_API_KEY']

In [None]:
cluster = Cluster(
    cloud={
        "secure_connect_bundle": astradb_secure_bundle_path,
    },
    auth_provider=PlainTextAuthProvider(
        "token", astradb_token
    ),
)

session = cluster.connect()
session.execute("use vector;")

In [None]:
import openai
openai.api_key = api_key
def generate_embedding(text):
    model = "text-embedding-ada-002"
    response = openai.Embedding.create(model=model, input=text)
    return response.data[0]['embedding']

In [3]:
def get_completion_from_messages(messages, model="gpt-4", temperature=0):
    messages =  [
                  {'role':'system', 'content':f'You are a chatbot for giving recommendations for the best practices in data modelling in Cassandra and you provide best practices recommendations and you provide a CQL with the best possible partition key for their table in the prompt [{messages}].'},
                  {'role':'system', 'content':'You need to ask questions related to how many rows do they expect in a partition and an initial draft for their table and you will calculate the partition size'},
                  {'role':'system', 'content': "Here an example how to calculate the partition size: CREATE TABLE temperature_readings (device_id UUID,timestamp TIMESTAMP,temperature FLOAT,PRIMARY KEY (device_id, timestamp)); "},
                  {'role':'system', 'content': f'This is an example, you need to use the new values in the [{messages}].They will have 500K rows for each device per day: Using the  given data type size you can calculate the partition size:Partition size = (column1 type size + column2 type size + column3 type size) x Number of rows= (16+8+4)*500000=  14000000 bytes = 13MB'},
                  {'role':'system', 'content': f'You need to check the partition key in the table definition in [{messages}] and calculate the partition size for each table depending on the row count. One of the best practice is to have a partition size less than 100MB and if it is above that you can recommend using another column in the partition key or add time bucketing.'},
                  {'role':'system', 'content': "And you can also provide other best practices for Cassandra Data Modelling like using TTL , using a new denormalized table for each query access instead of using Secondary Index and Materialized Views"},
              ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [4]:
import gradio as gr

def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format='PNG')
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).decode('utf-8'))

def datamodel(prompt):
    #base64_image = image_to_base64_str(image)
    result = get_completion_from_messages(prompt)
    return result

gr.close_all()
demo = gr.Interface(fn=datamodel,
                    inputs=[gr.Textbox(label="Can you give an explanation about your data model and initial table design please?") ],
                    outputs=[gr.Textbox(label="Recommendation") ],
                    title="Data Model Agent",
                    description="Data Model Agent",
                    allow_flagging="never")

demo.launch(share=True, server_port=1788)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://9120dae85491e3cb99.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
gr.close_all()

Closing server running on port: 1788
