# Setup 

In [None]:
voyage_api_key = ""  # Replace with your API key
gemini_api_key = ""


In [None]:
import sys
!{sys.executable} -m pip install voyageai
!{sys.executable} -m pip install -q -U google-generativeai



In [85]:
import psycopg2

# Replace `mysecretpassword` with the password you set in the Docker command
conn = psycopg2.connect(
    dbname="postgres",       # Default database name
    user="postgres",         # Default user
    password="mysecretpassword",  # Password from the Docker setup
    host="localhost",        # PostgreSQL is hosted locally
    port="5432"              # Default PostgreSQL port
)

# Test connection
try:
    print("Connected to PostgreSQL!")
except Exception as e:
    print(f"Error: {e}")


Connected to PostgreSQL!


# Split data into chunks

In [14]:
# Step 1: Load the EU AI Act text
def load_text(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Example usage
file_path = "act.md"  # Update with the path to your file
eu_ai_act_text = load_text(file_path)
print(f"Loaded {len(eu_ai_act_text)} characters from the file.")


Loaded 654850 characters from the file.


In [15]:
import re

# Step 2: Split text into manageable chunks
def split_text(text, max_words=200):
    # Split text into paragraphs
    paragraphs = re.split(r'\n+', text)
    chunks = []
    current_chunk = []
    current_word_count = 0

    for paragraph in paragraphs:
        words = paragraph.split()
        if current_word_count + len(words) > max_words:
            # Start a new chunk if the limit is exceeded
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_word_count = 0
        current_chunk.append(paragraph)
        current_word_count += len(words)

    if current_chunk:
        chunks.append(" ".join(current_chunk))  # Add the last chunk

    return chunks

# Example usage
chunks = split_text(eu_ai_act_text)
print(f"Split into {len(chunks)} chunks.")
print(chunks[13])

Split into 525 chunks.


# Embed Data

In [51]:
def generate_embedding(chunk, model="voyage-3"):
    documents_embeddings = vo.embed(
        chunk, model="voyage-3", input_type="document"
        ).embeddings

In [39]:
import requests

def prepare_data(chunks, api_key):
    data = []
    num_chunk = 0
    for chunk in chunks:
        num_chunk += 1
        try:
            embedding = generate_embedding(chunk)
            data.append({"text": chunk, "embedding": embedding})
        except:
          print("Reached chunk: " + num_chunk)
    return data

# Example usage
data = prepare_data(chunks, api_key)
print(f"Prepared {len(data)} chunks with embeddings.")

Prepared 525 chunks with embeddings.


In [87]:
print(len(data))

525


# Insert into database

In [80]:
import psycopg2
from psycopg2.extras import execute_batch

def insert_into_db(data, conn_params):
    conn = psycopg2.connect(**conn_params)
    cur = conn.cursor()

    query = """
    INSERT INTO embeddings (text, embedding)
    VALUES (%s, %s)
    """
    records = [(item["text"], item["embedding"]) for item in data]

    execute_batch(cur, query, records)
    conn.commit()

db_params = {
    "dbname": "postgres",
    "user": "postgres",
    "password": "mysecretpassword",
    "host": "localhost",
    "port": 5432
}
insert_into_db(data, db_params)
print("Data inserted into the database successfully.")


Data inserted into the database successfully.


# Chatbot Interface

In [77]:
def query_embeddings(query_vector):
    # Perform similarity search
    cur = conn.cursor()

    cur.execute("SELECT text FROM embeddings ORDER BY embedding <=> %s LIMIT 5", (query_vector,))
    results = cur.fetchall()
    return [result[0] for result in results]


In [81]:
import google.generativeai as genai

# Query Gemini Flash using the genai package
def query_gemini_flash(context, query, api_key=gemini_api_key):
    # Initialize the generative model
    model = genai.GenerativeModel("gemini-1.5-flash")

    # Prepare the prompt combining context and query
    prompt = f"Based on the information: {context}\n\n Generate a response of: {query}"
    print("Prompt: " + prompt)

    # Generate the response from the model
    response = model.generate_content(prompt)
    
    return response.text



In [86]:
def chatbot():
    print("Welcome to the EU AI Act Chatbot!")
    while True:
        question = input("\nAsk a question (or type 'exit' to quit): ")
        if question.lower() == "exit":
            print("Goodbye!")
            break
        
        # Generate embedding for the question
        question_vector = generate_embedding(question)
        
        # Retrieve relevant sections
        results = query_embeddings(question_vector)
        context = " ".join([row[0] for row in results])
        
        # Generate and display response
        answer = query_gemini_flash(context, question)
        print(f"\nAnswer: {answer}")

# Run the chatbot
chatbot()


Welcome to the EU AI Act Chatbot!



Ask a question (or type 'exit' to quit):  What information should the EU declaration of conformity contain?


Prompt: Based on the information:          

 Generate a response of: What information should the EU declaration of conformity contain?

Answer: Please provide the information you'd like me to base my response on. I need the text or a link to the information about EU Declarations of Conformity to accurately tell you what they should contain.




Ask a question (or type 'exit' to quit):  exit


Goodbye!


In [None]:
What information should the EU declaration of conformity contain?


In [None]:
db_params = {
        "dbname": "postgres",
        "user": "postgres",
        "password": "mysecretpassword",
        "host": "localhost",
        "port": 5432
    }

    # API keys
    gemini_api_key = "AIzaSyAvUbnNinMR1i5DwOZw1BiQRr2MZZvlRvY"

    # Start chatbot
    chatbot_interface(db_params, lambda query: openai_embedding_model(query, openai_api_key), gemini_api_key)