# Final Setup for Automating DfE Code Reviews

> 3-Component agent

## Tasks


### Schema

In [None]:
-- Drops

DROP TABLE tasks CASCADE;


-- Creates

CREATE TABLE tasks (
    id SERIAL PRIMARY KEY,
    task_name TEXT NOT NULL,
    task_content TEXT NOT NULL,
    task_instructions TEXT NOT NULL,
    model_answer_1 TEXT NOT NULL,
    model_answer_2 TEXT NOT NULL,
    metadata JSONB
);


## Vectorisers


In [None]:
SELECT ai.create_vectorizer(   
    'tasks'::regclass,
    formatting => ai.formatting_python_template(
        'Task Title: $task_name\nTask Content: $chunk'
    ),
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('task_content', 
        chunk_size => 1024, 
        chunk_overlap => 512
    ),
    destination => 'task_contents_embedding'
);

SELECT ai.create_vectorizer(   
    'tasks'::regclass,
    formatting => ai.formatting_python_template(
        'Task Title: $task_name\nTask Instructions: $chunk'
    ),
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('task_instructions', 
        chunk_size => 512, 
        chunk_overlap => 256
    ),
    destination => 'task_instructions_embedding'
);

SELECT ai.create_vectorizer(   
    'tasks'::regclass,
    formatting => ai.formatting_python_template(
        'Task Title: $task_name\nModel Answer: $chunk'
    ),
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('model_answer_1', 
        chunk_size => 512, 
        chunk_overlap => 256
    ),
    destination => 'task_model_answer_1_embedding'
);

SELECT ai.create_vectorizer(   
    'tasks'::regclass,
    formatting => ai.formatting_python_template(
        'Task Title: $task_name\nModel Answer 2: $chunk'
    ),
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('model_answer_2', 
        chunk_size => 512, 
        chunk_overlap => 256
    ),
    destination => 'task_model_answer_2_embedding'
);

-- Vectoriser execute
SELECT ai.execute_vectorizer(00);

-- Vectoriser status

select * from ai.vectorizer_status;


In [None]:
CREATE OR REPLACE FUNCTION generate_rag_response_file_content(query_text TEXT)
RETURNS TEXT AS $$
DECLARE
    context_chunks TEXT;
    response TEXT;
BEGIN
    WITH relevant_tasks AS (
        SELECT 
        tce.task_name, tce.chunk,
        FROM 
        task_contents_embedding tce
        JOIN
        task_instructions_embedding tie
        ON
        tce.id = .task_id
        ORDER BY ma.embedding <=> ai.ollama_embed('nomic-embed-text', query_text)
        LIMIT 3
    )
    SELECT string_agg(
        'Task: ' || task_content_chunk || E'\nModel Answer: ' || model_answer_chunk, 
        E'\n\n'
    ) 
    INTO context_chunks
    FROM relevant_task_answers;

    SELECT ai.ollama_chat_complete(
        'llama3',
        jsonb_build_array(
        jsonb_build_object('role', 'system', 'content', 'You are a helpful code reviewer. Provide accurate and concise answers based on the given context.'),
        jsonb_build_object(
            'role', 'user',
            'content', query_text || E'\n\nUse the following context to respond:\n' || context_chunks
        )
        )
    )->'message'->>'content' INTO response;

    RETURN response;
END;
$$ LANGUAGE plpgsql;

In [None]:
CREATE OR REPLACE FUNCTION generate_rag_response(query_text TEXT)
RETURNS TEXT AS $$
DECLARE
   context_chunks TEXT;
   response TEXT;
BEGIN
   -- Perform similarity search to find relevant blog posts
   SELECT string_agg(task_name || ': ' || chunk, E'\n') INTO context_chunks
   FROM
   (
       SELECT title, chunk
       FROM blogs_embedding
       ORDER BY embedding <=> ai.ollama_embed('nomic-embed-text', query_text)
       LIMIT 3
   ) AS relevant_posts;

   -- Generate a summary using llama3
   SELECT ai.ollama_chat_complete
   ( 'llama3'
   , jsonb_build_array
     ( jsonb_build_object('role', 'system', 'content', 'you are a helpful assistant')
     , jsonb_build_object
       ('role', 'user'
       , 'content', query_text || E'\nUse the following context to respond.\n' || context_chunks
       )
     )
   )->'message'->>'content' INTO response;

   RETURN response;
END;
$$ LANGUAGE plpgsql;

In [None]:
CREATE OR REPLACE FUNCTION generate_rag_response_code_review(query_text TEXT)
RETURNS TEXT AS $$
DECLARE
  context_chunks TEXT;
  response TEXT;
BEGIN
  WITH relevant_task_info AS (
    (SELECT 
      'Task Content' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', query_text) AS distance
    FROM 
      task_contents_embedding
    ORDER BY distance
    LIMIT 1)
    
    UNION ALL
    
    (SELECT 
      'Task Instructions' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', query_text) AS distance
    FROM 
      task_instructions_embedding
    ORDER BY distance
    LIMIT 1)
    
    UNION ALL
    
    (SELECT 
      'Model Answer 1' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', query_text_1) AS distance
    FROM 
      task_model_answer_1_embedding
    ORDER BY distance
    LIMIT 1)

    UNION ALL
    
    (SELECT 
      'Model Answer 2' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', query_text_2) AS distance
    FROM 
      task_model_answer_2_embedding
    ORDER BY distance
    LIMIT 1)
    
    ORDER BY distance
  )
  SELECT string_agg(
    source || E' (Task: ' || task_name || E'):\n' || chunk, 
    E'\n\n'
  ) 
  INTO context_chunks
  FROM relevant_task_info;

  SELECT ai.ollama_chat_complete(
    'llama3',
    jsonb_build_array(
      jsonb_build_object(
        'role', 'system', 
        'content', 'You are an expert code reviewer. Your task is to review the given code (query_text) and ensure it matches the task instructions provided in the context. Provide a detailed review, highlighting strengths, areas for improvement, and any discrepancies between the given code and the task instructions.'
      ),
      jsonb_build_object(
        'role', 'user',
        'content', 'Code to review:\n' || query_text || E'\n\n' ||
                   'Use the following context for your review:\n' || context_chunks || E'\n\n' ||
                   'Please provide a comprehensive code review. Include suggestions related to the Task Content and Model Answer for improvement if necessary.'
      )
    )
  )->'message'->>'content' INTO response;

  RETURN response;
END;
$$ LANGUAGE plpgsql;



In [None]:
CREATE OR REPLACE FUNCTION generate_rag_response_code_review(query_text_1 TEXT, query_text_2 TEXT, task_title TEXT, file_name TEXT)
RETURNS TEXT AS $$
DECLARE
  context_chunks TEXT;
  response TEXT;
BEGIN
  WITH relevant_task_info AS (
    (SELECT 
      'Task Content' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', task_title) AS distance
    FROM 
      task_contents_embedding
    WHERE
      task_name = $2
    ORDER BY distance
    LIMIT 1)
    
    UNION ALL
    
    (SELECT 
      'Task Instructions' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', task_title) AS distance
    FROM 
      task_instructions_embedding
    WHERE
      task_name = $2
    ORDER BY distance
    LIMIT 1)
    
    UNION ALL
    
    (SELECT 
      'Model Answer 1' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', query_text_1) AS distance
    FROM 
      task_model_answer_1_embedding
    ORDER BY distance
    LIMIT 1)

    UNION ALL
    
    (SELECT 
      'Model Answer 2' AS source,
      task_name,
      chunk,
      embedding <=> ai.ollama_embed('nomic-embed-text', query_text_2) AS distance
    FROM 
      task_model_answer_2_embedding
    ORDER BY distance
    LIMIT 1)
    
    ORDER BY distance
  )
  SELECT string_agg(
    source || E':\n' || chunk, 
    E'\n\n'
  ) 
  INTO context_chunks
  FROM relevant_task_info;

  SELECT ai.ollama_chat_complete(
    'llama3',
    jsonb_build_array(
      jsonb_build_object(
        'role', 'system', 
        'content', 'You are an expert code reviewer. Your task is to review the given code (query_text) and ensure it matches the task instructions. Provide a detailed review, highlighting strengths, areas for improvement, and any discrepancies between the code and the task requirements.'
      ),
      jsonb_build_object(
        'role', 'user',
        'content', 'Task Name: ' || task_title || E'\n\n' ||
                   'File name:\n' || file_name || E'\n\n' ||
                   'Code to review:\n' || query_text_1 || query_text_2 || E'\n\n' ||
                   'Use the following context for your review:\n' || context_chunks || E'\n\n' ||
                   'Please provide a comprehensive code review, focusing on how well the code matches the task instructions. Include specific suggestions for improvement if necessary.'
      )
    )
  )->'message'->>'content' INTO response;

  RETURN response;
END;
$$ LANGUAGE plpgsql;

