# Answering the 200 train problems on the Java250 dataset - Codellama¶

In [1]:
import os
import pandas as pd
import numpy as np
import time

from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from tqdm import tqdm


from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_community.llms import Ollama

## Import the models

### Vector store

In [2]:
embeddings = OllamaEmbeddings(model="gemma:7b-instruct", num_gpu = 2,   num_thread = 24)
# load from disk
db = Chroma(persist_directory="bp_chroma_db", embedding_function=embeddings, )
retriever = db.as_retriever()

### LLMs

In [3]:
codellama = Ollama(model="codellama:latest", num_gpu=2, num_thread = 24, num_ctx = 4096)

## RAG Prompts:

In [4]:
codellama_template = """[INST]<<SYS>> Use Java code in your answer. <</SYS>>
                Question: {question}
            [/INST]"""
java_prompt_codellama = ChatPromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, 
                       messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], 
                                                                              output_parser=None, partial_variables={}, template= codellama_template, 
                                                                              template_format='f-string', validate_template=True), additional_kwargs={})])

### Chain making

In [5]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

codellama_chain = (
    {"question": RunnablePassthrough()}
    | java_prompt_codellama
    | codellama
    | StrOutputParser()
)

## QA

read the questions in

In [6]:
Java200 = pd.read_csv("Java200.csv")

In [11]:
Java200["id"][15:200]

15     p02641.html
16     p02257.html
17     p02259.html
18     p02415.html
19     p02410.html
          ...     
195    p02952.html
196    p02773.html
197    p03000.html
198    p02399.html
199    p03102.html
Name: id, Length: 185, dtype: object

Loop for answers

In [16]:
codellama_answers = []
i = 133
for problem in Java200["id"][133:200]:
    question = Java200["desc"][i]
    clm_a = codellama_chain.invoke(question)
    print("Codellama answer to: " + problem + "\n\n" + clm_a + "\n\n")
    codellama_answers.append(clm_a)
    print(i)
    print("Complete \n")
    i+=1

Codellama answer to: p02957.html

 
import java.util.*;

public class Main {
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        int A = sc.nextInt();
        int B = sc.nextInt();

        // Check if A and B are distinct
        if (A == B) {
            System.out.println("IMPOSSIBLE");
            return;
        }

        // Initialize K as the difference between A and B
        int K = A - B;

        // Check if |K| is equal to the absolute value of the difference between A and B
        if (Math.abs(A - K) == Math.abs(B - K)) {
            System.out.println(K);
        } else {
            System.out.println("IMPOSSIBLE");
        }
    }
}


133
Complete 

Codellama answer to: p02264.html

 
 
import java.util.*;
 
public class RoundRobinScheduling {
 
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        int n = sc.nextInt();
        int q = sc.nextInt();
        Process[] processes = 

In [18]:
qa_java200_cl = pd.DataFrame(data = {"problem": Java200["id"][133:200],
                                  "codellama_answer": codellama_answers})

In [19]:
qa_java200_cl.to_csv("qa_java200_cl.csv")