# Answering the 250 train problems on the Java250 dataset - Codellama¶

In [1]:
import os
import pandas as pd
import numpy as np
import time

from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from tqdm import tqdm


from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_community.llms import Ollama

## Import the models

### Vector store

In [2]:
embeddings = OllamaEmbeddings(model="gemma:7b-instruct", num_gpu = 2,   num_thread = 24)
# load from disk
db = Chroma(persist_directory="bp_chroma_db", embedding_function=embeddings, )
retriever = db.as_retriever()

### LLMs

In [18]:
codellama = Ollama(model="codellama:latest", num_gpu=2, num_thread = 24, num_ctx = 4096)

## RAG Prompts:

In [19]:
codellama_template = """[INST]<<SYS>> Use Java code in your answer. <</SYS>>
                Question: {question}
            [/INST]"""
java_prompt_codellama = ChatPromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, 
                       messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], 
                                                                              output_parser=None, partial_variables={}, template= codellama_template, 
                                                                              template_format='f-string', validate_template=True), additional_kwargs={})])

### Chain making

In [20]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

codellama_chain = (
    {"question": RunnablePassthrough()}
    | java_prompt_codellama
    | codellama
    | StrOutputParser()
)

## QA

### Initial QA of 200 questions May 2024

read the questions in

In [7]:
Java250 = pd.read_csv("Java250.csv")
Java200 = Java250.iloc[:200]

In [11]:
Java200["id"][15:200]

15     p02641.html
16     p02257.html
17     p02259.html
18     p02415.html
19     p02410.html
          ...     
195    p02952.html
196    p02773.html
197    p03000.html
198    p02399.html
199    p03102.html
Name: id, Length: 185, dtype: object

Loop for answers

In [16]:
codellama_answers = []
i = 133
for problem in Java200["id"][133:200]:
    question = Java200["desc"][i]
    clm_a = codellama_chain.invoke(question)
    print("Codellama answer to: " + problem + "\n\n" + clm_a + "\n\n")
    codellama_answers.append(clm_a)
    print(i)
    print("Complete \n")
    i+=1

Codellama answer to: p02957.html

 
import java.util.*;

public class Main {
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        int A = sc.nextInt();
        int B = sc.nextInt();

        // Check if A and B are distinct
        if (A == B) {
            System.out.println("IMPOSSIBLE");
            return;
        }

        // Initialize K as the difference between A and B
        int K = A - B;

        // Check if |K| is equal to the absolute value of the difference between A and B
        if (Math.abs(A - K) == Math.abs(B - K)) {
            System.out.println(K);
        } else {
            System.out.println("IMPOSSIBLE");
        }
    }
}


133
Complete 

Codellama answer to: p02264.html

 
 
import java.util.*;
 
public class RoundRobinScheduling {
 
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        int n = sc.nextInt();
        int q = sc.nextInt();
        Process[] processes = 

In [18]:
qa_java200_cl = pd.DataFrame(data = {"problem": Java200["id"][133:200],
                                  "codellama_answer": codellama_answers})

In [19]:
qa_java200_cl.to_csv("qa_java200_cl.csv")

### Final 50 questions QA September 2024

In [8]:
Java50 = Java250.iloc[200:250].reset_index(drop=True)

In [21]:
codellama_answers = []
i = 0
for i, problem in enumerate(Java50["id"]):
    question = Java50["desc"][i]
    cd_a = codellama_chain.invoke(question)
    print("Codellama answer to: " + problem + "\n\n" + cd_a + "\n\n")
    codellama_answers.append(cd_a)
    print(i)
    print("Complete \n")

Codellama answer to: p02835.html

 
package q10957;

import java.util.*;

public class BustOrWin {
   public static void main(String[] args) {
      Scanner scan = new Scanner(System.in);
      int a1 = scan.nextInt();
      int a2 = scan.nextInt();
      int a3 = scan.nextInt();
      
      if (a1 + a2 + a3 >= 22) {
         System.out.println("bust");
      } else {
         System.out.println("win");
      }
   }
}

This code reads three integers from the standard input, checks whether their sum is greater than or equal to 22, and prints "bust" if it is, or "win" otherwise.

The constraints of the problem are satisfied by this code, as all values in the input are integers between 1 and 13, inclusive.


0
Complete 

Codellama answer to: p02832.html


Input:
5
2 3 1 4 5

Output:
3

Explanation:
We can break the bricks as follows:
1. Break brick 1 and 2 to satisfy the condition that the 1st and 2nd bricks from the left have the integers 1 and 2 written on them, respectively.
2. Break 

In [22]:
qa_java50_codellama = pd.DataFrame(data = {"problem": Java50["id"],                            
                                  "codellama_answer": codellama_answers})

In [23]:
qa_java50_codellama.head()

Unnamed: 0,problem,codellama_answer
0,p02835.html,\npackage q10957;\n\nimport java.util.*;\n\np...
1,p02832.html,\nInput:\n5\n2 3 1 4 5\n\nOutput:\n3\n\nExplan...
2,p04012.html,\nimport java.util.*;\n\npublic class Solutio...
3,p02719.html,\nimport java.util.*;\npublic class AokiAndAb...
4,p02981.html,\nimport java.util.*;\n\npublic class Main {\n...


In [25]:
qa_java50_codellama.to_csv("qa_java50_codellama.csv")