## Automating Code Reviews


### Schema


In [None]:
CREATE TABLE task_instructions (
    id SERIAL PRIMARY KEY,
    title TEXT NOT NULL,
    compulsory_task_1 TEXT NOT NULL,
    compulsory_task_2 TEXT NULL
);

CREATE TABLE code_solutions (
    id SERIAL PRIMARY KEY,
    task_instructions_id INT NOT NULL,
    title TEXT NOT NULL,
    code  TEXT NOT NULL,
    FOREIGN KEY (task_instructions_id) REFERENCES task_instructions (id) ON DELETE CASCADE
);

CREATE TABLE code_reviews (
    id SERIAL PRIMARY KEY,
    code_solution_id INT NOT NULL,
    title TEXT NOT NULL,
    review_positives TEXT NOT NULL,
    review_improvements TEXT NOT NULL,
    review_overall TEXT NOT NULL,
    FOREIGN KEY (code_solution_id) REFERENCES code_solutions (id) ON DELETE CASCADE
);


### Vectorisers

In [None]:
SELECT ai.create_vectorizer(
    'task_instructions'::regclass,
    destination => 'task_instruction_embeddings',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('compulsory_task_1')
);

SELECT ai.create_vectorizer(
    'task_instructions'::regclass,
    destination => 'task_instruction_embeddings',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('compulsory_task_2')
);

SELECT ai.create_vectorizer(
    'code_solutions'::regclass,
    destination => 'code_solution_embeddings',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('code')
);

SELECT ai.create_vectorizer(
    'code_reviews'::regclass,
    destination => 'positives_code_review_embeddings',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('review_positives')
);

SELECT ai.create_vectorizer(
    'code_reviews'::regclass,
    destination => 'improvements_code_review_embeddings',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('review_improvements')
);

SELECT ai.create_vectorizer(
    'code_reviews'::regclass,
    destination => 'overall_code_review_embeddings',
    embedding => ai.embedding_ollama('nomic-embed-text', 768),
    chunking => ai.chunking_recursive_character_text_splitter('review_overall')
);


### RAG Function (pgai)

In [None]:
CREATE OR REPLACE FUNCTION generate_code_review(query_code TEXT)
RETURNS TEXT AS $$
DECLARE
   context_chunks TEXT;
   response TEXT;
BEGIN
   -- Perform similarity search to find relevant code solutions
   WITH relevant_solutions AS (
       SELECT title, chunk
       FROM code_solution_embeddings
       ORDER BY embedding <=> ai.ollama_embed('nomic-embed-text', query_code)
       LIMIT 3
   ),
   -- Perform similarity search to find relevant code reviews
   relevant_reviews AS (
       SELECT title, review_text AS chunk
       FROM code_review_embeddings
       ORDER BY embedding <=> ai.ollama_embed('nomic-embed-text', query_code)
       LIMIT 3
   )
   -- Combine the results into a single context
   SELECT string_agg(title || ': ' || chunk, E'\n') 
   INTO context_chunks
   FROM (
       SELECT * FROM relevant_solutions
       UNION ALL
       SELECT * FROM relevant_reviews
   ) AS combined_context;

   -- Generate a review using llama3 with the combined context
   SELECT ai.ollama_chat_complete(
       'llama3',
       jsonb_build_array(
           jsonb_build_object('role', 'system', 'submission', 'You are a code mentor.'),
           jsonb_build_object(
               'role', 'user',
               'submission', query_code || E'\nUse the following context to review the code:\n' || context_chunks
           )
       ),
       host => 'http://ollama:11434'
   ) -> 'message' ->> 'submission' INTO response;

   RETURN response;
END;
$$ LANGUAGE plpgsql;


In [None]:
CREATE OR REPLACE FUNCTION generate_code_review(query_code TEXT)
RETURNS TEXT AS $$
DECLARE
   context_chunks TEXT;
   response TEXT;
BEGIN
   -- Perform similarity search to find relevant code solutions
   WITH relevant_solutions AS (
       SELECT title, chunk
       FROM code_solution_embeddings
       ORDER BY embedding <=> ai.ollama_embed('nomic-embed-text', query_code)
       LIMIT 3
   ),
   -- Perform similarity search to find relevant code reviews
   relevant_reviews AS (
       SELECT title, review_text AS chunk
       FROM code_review_embeddings
       ORDER BY embedding <=> ai.ollama_embed('nomic-embed-text', query_code)
       LIMIT 3
   )
   -- Combine the results into a single context
   SELECT string_agg(title || ': ' || chunk, E'\n') 
   INTO context_chunks
   FROM (
       SELECT * FROM relevant_solutions
       UNION ALL
       SELECT * FROM relevant_reviews
   ) AS combined_context;

   -- Generate a review using llama3 with the combined context
   SELECT ai.ollama_chat_complete(
       'llama3',
       jsonb_build_array(
           jsonb_build_object(
               'role', 'system',
               'content', 'You are an expert code mentor providing detailed, constructive code reviews. Analyze the code thoroughly, considering best practices, potential improvements, and context.'
           ),
           jsonb_build_object(
               'role', 'user',
               'content',
               'Code to Review:\n' || query_code ||
               E'\n\nReview Context:\n' || context_chunks ||
               E'\n\nProvide a comprehensive code review that includes:\n' ||
               '1. Positive aspects of the code\n' ||
               '2. Improvement aspects of the code\n' ||
               '3. Best practices and design patterns\n' ||
               '4. Performance and efficiency considerations\n' ||
               '5. Suggestions for refactoring\n' ||
               '6. Overall summary of reviewed code'
           )
       ),
       host => 'http://ollama:11434'
   ) -> 'message' ->> 'content' INTO response;

   RETURN response;
END;
$$ LANGUAGE plpgsql;


# TEST


In [None]:
SELECT
    chunk,
    embedding <=>  ai.ollama_embed('nomic-embed-text', 'task objectives', host => 'http://ollama:11434') as distance
FROM task_instruction_embeddings
ORDER BY distance;