# Setup 

In [1]:
# Requirements:
# pip install openai langchain psycopg2-binary sqlalchemy google-generativeai voyageai

In [81]:
voyage_api_key = "" 
gemini_api_key = ""


In [3]:
import psycopg2

# Replace `mysecretpassword` with the password you set in the Docker command
conn = psycopg2.connect(
    dbname="postgres",       # Default database name
    user="postgres",         # Default user
    password="mysecretpassword",  # Password from the Docker setup
    host="localhost",        # PostgreSQL is hosted locally
    port="5432"              # Default PostgreSQL port
)

# Test connection
try:
    print("Connected to PostgreSQL!")
except Exception as e:
    print(f"Error: {e}")


Connected to PostgreSQL!


# Split data into chunks

In [4]:
# Step 1: Load the EU AI Act text
def load_text(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Example usage
file_path = "act.md"  # Update with the path to your file
eu_ai_act_text = load_text(file_path)
print(f"Loaded {len(eu_ai_act_text)} characters from the file.")


Loaded 654850 characters from the file.


In [6]:
import re

# Step 2: Split text into manageable chunks
def split_text(text, max_words=200):
    # Split text into paragraphs
    paragraphs = re.split(r'\n+', text)
    chunks = []
    current_chunk = []
    current_word_count = 0

    for paragraph in paragraphs:
        words = paragraph.split()
        if current_word_count + len(words) > max_words:
            # Start a new chunk if the limit is exceeded
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_word_count = 0
        current_chunk.append(paragraph)
        current_word_count += len(words)

    if current_chunk:
        chunks.append(" ".join(current_chunk))  # Add the last chunk

    return chunks

# Example usage
chunks = split_text(eu_ai_act_text)
print(f"Split into {len(chunks)} chunks.")

Split into 525 chunks.


# Embed Data

In [26]:
import voyageai 

vo = voyageai.Client(voyage_api_key)

def generate_embedding(chunk, model="voyage-3"):
    documents_embeddings = vo.embed(
        chunk, model="voyage-3", input_type="document"
        ).embeddings
    return documents_embeddings

In [27]:
import requests

# found that I couldn't use the batch format without hitting rate limits, so tried this and it worked
def prepare_data(chunks, api_key):
    data = []
    for chunk in chunks:
        embedding = generate_embedding(chunk)
        data.append({"text": chunk, "embedding": embedding})
    return data

# Example usage
data = prepare_data(chunks, voyage_api_key)
print(f"Prepared {len(data)} chunks with embeddings.")

Prepared 525 chunks with embeddings.


# Insert into database

In [86]:
import psycopg2
from psycopg2.extras import execute_batch

def insert_into_db(data, conn_params):
    conn = psycopg2.connect(**conn_params)
    cur = conn.cursor()
    
    # Create table
    cur.execute("""
    CREATE TABLE embeddings_data (
        id SERIAL PRIMARY KEY,
        text TEXT,
        embedding VECTOR(1024)
    )
    """)
    conn.commit()

    for d in data:
        cur.execute("INSERT INTO embeddings_data (text, embedding) VALUES (%s, %s)", (d["text"], d["embedding"][0]))
        conn.commit()

db_params = {
    "dbname": "postgres",
    "user": "postgres",
    "password": "mysecretpassword",
    "host": "localhost",
    "port": 5432
}
insert_into_db(data, db_params)
print("Data inserted into the database successfully.")



Data inserted into the database successfully.


# Chatbot Interface

In [91]:
def query_embeddings(query_vector):
    # Perform similarity search
    cur = conn.cursor()

    cur.execute("SELECT text FROM embeddings_data ORDER BY embedding <-> %s::vector LIMIT 5", (query_vector))
    results = cur.fetchall()
    return results


In [84]:
import google.generativeai as genai

genai.configure(api_key=gemini_api_key)

# Query Gemini Flash using the genai package
def query_gemini_flash(context, query, api_key=gemini_api_key):
    # Initialize the generative model
    model = genai.GenerativeModel("gemini-1.5-flash")

    # Prepare the prompt combining context and query
    prompt = f"Based on the information: {context}\n\n Generate a response of: {query}"
    print("Prompt: " + prompt)

    # Generate the response from the model
    response = model.generate_content(prompt)
    
    return response.text

In [None]:
def chatbot():
    print("Welcome to the EU AI Act Chatbot!")
    while True:
        question = input("\nAsk a question (or type 'exit' to quit): ")
        if question.lower() == "exit":
            print("Goodbye!")
            break
        
        # Generate embedding for the question
        question_vector = generate_embedding(question)
        
        # Retrieve relevant sections
        results = query_embeddings(question_vector)
        context = " ".join([row[0] for row in results])
        
        # Generate and display response
        answer = query_gemini_flash(context, question)
        print(f"\nAnswer: {answer}")

# Run the chatbot
chatbot()


Welcome to the EU AI Act Chatbot!



Ask a question (or type 'exit' to quit):  What information should the EU declaration of conformity contain?


Prompt: Based on the information:         * [article 29 - Application of …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_29)         * [article 30 - Notification …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_30)         * [article 31 - Requirements …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_31)         * [article 32 - Presumption of …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_32)         * [article 33 - Subsidiaries of …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_33)         * [article 34 - Operational …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_34)         * [article 35 - Identification …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_35)         * [article 36 - Changes to …](https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689#art_36) 

Sample questions:

What information should the EU declaration of conformity contain?
What parts of the EU AI act are concerned with video recording?