# Final Setup for Automating DfE Code Reviews

> 3-Component agent

## Tasks


### Schema

In [None]:
-- Drops

DROP TABLE tasks CASCADE;
DROP TABLE model_answers CASCADE;


-- Creates

CREATE TABLE tasks (
    id SERIAL PRIMARY KEY,
    task_name TEXT NOT NULL,
    task_content TEXT NOT NULL,
    task_instructions TEXT NOT NULL
);
CREATE TABLE model_answers (
    id SERIAL PRIMARY KEY,
    task_id INT NOT NULL,
    task_name TEXT NOT NULL,
    file_content TEXT NOT NULL,
    metadata JSONB,
    FOREIGN KEY (task_id) REFERENCES tasks (id) ON DELETE CASCADE
);


In [None]:
-- Drops

DROP TABLE code_reviews CASCADE;
DROP TABLE submissions CASCADE;


-- Creates

CREATE TABLE code_reviews (
    id SERIAL PRIMARY KEY,
    task_id INT NOT NULL,
    review_tone FLOAT,
    review_text TEXT NOT NULL,
    metadata JSONB,
    FOREIGN KEY (task_id) REFERENCES tasks (id) ON DELETE CASCADE
);
CREATE TABLE submissions (
    id SERIAL PRIMARY KEY,
    review_id INT NOT NULL,
    file_name TEXT NOT NULL,
    file_content TEXT NOT NULL,
    FOREIGN KEY (review_id) REFERENCES code_reviews (id) ON DELETE CASCADE
);
BGE-M3

## Vectorisers


In [None]:
SELECT ai.create_vectorizer(   
    'tasks'::regclass,
    formatting => ai.formatting_python_template(
        'Task: $task_name\nInstructions: $chunk'
    ),
    destination => 'task_instruction_embedding',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('task_instructions', 
        chunk_size => 512, 
        chunk_overlap => 256
    )
);
SELECT ai.create_vectorizer(   
    'model_answers'::regclass,
    formatting => ai.formatting_python_template(
        'Task: $task_name\nModel answer: $chunk'
    ),
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('file_content', 
    chunk_size => 512,
    chunk_overlap => 256
    )
);
-- Task Instructions Vectorizer
SELECT ai.create_vectorizer(   
    'tasks'::regclass,
    formatting => ai.formatting_python_template(
        'Task: $task_name\nContent: $chunk'
    ),
    destination => 'task_content_embedding',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('task_content', 
    chunk_size => 800, 
    chunk_overlap => 400
    )
);


-- Vectoriser execute
SELECT ai.execute_vectorizer(11);
SELECT ai.execute_vectorizer(12);
SELECT ai.execute_vectorizer(13);

In [None]:
CREATE OR REPLACE FUNCTION generate_rag_response_file_content(query_text TEXT)
RETURNS TEXT AS $$
DECLARE
    context_chunks TEXT;
    response TEXT;
BEGIN
    WITH relevant_task_answers AS (
        SELECT 
        ma.file_content, 
        ma.chunk AS model_answer_chunk,
        tc.task_content, 
        tc.chunk AS task_content_chunk
        FROM 
        model_answers_embedding ma
        JOIN
        task_content_embeddings tc
        ON
        tc.id = ma.task_id
        ORDER BY ma.embedding <=> ai.ollama_embed('nomic-embed-text', query_text)
        LIMIT 3
    )
    SELECT string_agg(
        'Task: ' || task_content_chunk || E'\nModel Answer: ' || model_answer_chunk, 
        E'\n\n'
    ) 
    INTO context_chunks
    FROM relevant_task_answers;

    SELECT ai.ollama_chat_complete(
        'llama3',
        jsonb_build_array(
        jsonb_build_object('role', 'system', 'content', 'You are a helpful code reviewer. Provide accurate and concise answers based on the given context.'),
        jsonb_build_object(
            'role', 'user',
            'content', query_text || E'\n\nUse the following context to respond:\n' || context_chunks
        )
        )
    )->'message'->>'content' INTO response;

    RETURN response;
END;
$$ LANGUAGE plpgsql;