# Code Generation Planning Phase

In [1]:
! pip install transformers openai==0.28 accelerate nltk datasets rouge_score

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting requests>=2.20 (from openai==0.28)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12

In [2]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import openai
import subprocess
import tempfile
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pandas as pd
from datasets import load_metric
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import jaccard_score

In [4]:
from huggingface_hub import login
access_token = "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"
login(token = access_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
# finetuned model name
model_name_1 = "Vinoth13/Finetuned_Llama2"
model_name_2 = "Vinoth13/Finetuned_CodeLlama"


## Decompose the Abstract into Sub Problems

In [6]:
# Load the entire model on the GPU 0
device_map = {"": 0}

model_1 = AutoModelForCausalLM.from_pretrained(
    model_name_1,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

# Load LLaMA tokenizer
tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1, trust_remote_code=True)
tokenizer_1.pad_token = tokenizer_1.eos_token
tokenizer_1.padding_side = "right" # Fix weird overflow issue with fp16 training


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

In [7]:
torch.cuda.empty_cache()

In [8]:
def decompose_into_subproblems(model_name, user_input):
    # Define the system prompt
    system_prompt = 'Below is an instruction that describes a complex problem. Decompose the problem into smaller, manageable subproblems only.\n\n'

    # Wrap the user input using the right chat template
    instruction = f"{system_prompt}\n ### Instruction:\n{user_input}\n\n### Response:\n"

    # Load the text generation pipeline
    pipe = pipeline(task="text-generation", model=model_1, tokenizer=tokenizer_1, max_length=1024, truncation=True)

    # Generate the subproblems
    result = pipe(instruction)

    # Trim the response and extract the subproblems
    generated_text = result[0]['generated_text'][len(instruction):].strip()

    # Split the generated text into subproblems, stopping at "###" or "Implementation"
    subproblems = []
    for line in generated_text.split('\n'):
        if '###' in line or 'Implementation' in line:
            break
        subproblems.append(line.strip())

    return subproblems, generated_text

In [9]:
# Calculate BLEU score
def calculate_bleu(reference, hypothesis):
    reference = [reference.split()]  # List of references for each sentence
    hypothesis = hypothesis.split()  # Hypothesis sentence
    smoothing_function = SmoothingFunction().method1
    return sentence_bleu(reference, hypothesis, smoothing_function=smoothing_function)

# Calculate ROUGE score
def calculate_rouge(reference, hypothesis):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, hypothesis)
    return scores

In [11]:
# Load the test dataset
test_path = '/content/drive/My Drive/Final FYP/test_dataset.csv'
test_df = pd.read_csv(test_path)

# Extract instruction and response from the first row of the test dataset
sample_data = test_df.iloc[5]['text']
instruction_start = sample_data.find("### Instruction:") + len("### Instruction:")
instruction_end = sample_data.find("### Response:")
response_start = instruction_end + len("### Response:")

instruction = sample_data[instruction_start:instruction_end].strip()
reference_text = sample_data[response_start:].strip()

# Example usage
user_input = instruction

subproblems, generated_text = decompose_into_subproblems(model_1, user_input)

# Evaluate the predictions
bleu_score = calculate_bleu(reference_text, generated_text)
rouge_score = calculate_rouge(reference_text, generated_text)

In [12]:
# Print the subproblems in numbered format
print("Generated Subproblems:\n")
for i, subproblem in enumerate(subproblems, 1):
    print(f"{subproblem}")

Generated Subproblems:

1. Create a hash map/dictionary to store the count of each integer in the array.
2. Iterate through the array and increment the count for each integer.
3. If the sum of the count is not equal to the given target, return false.
4. If the count of the integer is 1, return true.
5. Iterate through the count of each integer in the hash map/dictionary. If the count is 1, return true.
6. If the count is not 1, return false.
7. If the iteration completes without returning true, return false.

The algorithm checks if there are n unique integers in the array with a sum equal to zero. If so, it returns true. Otherwise, it returns false.

The time complexity of the algorithm is O(n), as it iterates through the array once. The space complexity is also O(n), as it uses a hash map/dictionary to store the count of each integer.

The main idea of the algorithm is to check if there are n unique integers in the array that sum up to zero. If there are, return true; otherwise, retu

In [13]:
# Print BLEU and ROUGE scores
print("\nBLEU Score:", bleu_score)
print("\nROUGE Scores:", rouge_score)


BLEU Score: 0.008100995596730576

ROUGE Scores: {'rouge1': Score(precision=0.06857142857142857, recall=0.5106382978723404, fmeasure=0.12090680100755667), 'rouge2': Score(precision=0.015736766809728183, recall=0.11827956989247312, fmeasure=0.027777777777777776), 'rougeL': Score(precision=0.05, recall=0.3723404255319149, fmeasure=0.08816120906801007)}


In [15]:
# Calculate average ROUGE scores
avg_rouge_scores = {
    'rouge1': rouge_score['rouge1'].fmeasure,
    'rouge2': rouge_score['rouge2'].fmeasure,
    'rougeL': rouge_score['rougeL'].fmeasure
}

# Print evaluation results
print(f"BLEU Score: {bleu_score:.4f}")
print(f"Average ROUGE-1 F1 Score: {avg_rouge_scores['rouge1']:.4f}")
print(f"Average ROUGE-2 F1 Score: {avg_rouge_scores['rouge2']:.4f}")
print(f"Average ROUGE-L F1 Score: {avg_rouge_scores['rougeL']:.4f}")

BLEU Score: 0.0081
Average ROUGE-1 F1 Score: 0.1209
Average ROUGE-2 F1 Score: 0.0278
Average ROUGE-L F1 Score: 0.0882


In [16]:
# Save the subproblems to a file
with open('subproblems.txt', 'w') as f:
    for i, subproblem in enumerate(subproblems, 1):
        f.write(f"{i}. {subproblem}\n")

# Generate another sample if the score is not good
if bleu_score < 0.6 or avg_rouge_scores['rouge1'] < 0.5:
    subproblems, generated_text = decompose_into_subproblems(model_1, user_input)
    print("\nGenerated Another Sample:")
    for i, subproblem in enumerate(subproblems, 1):
        print(f"{subproblem}")


Generated Another Sample:
1. Create a hashmap called `counts` to store the count of integers with the same value.
2. Initialize an empty list called `result` to store the unique integers.
3. Iterate through the input array `nums`.
4. If the current integer is not in the hashmap, add it to the hashmap with a count of 1.
5. If the count of the current integer is greater than 1, remove the integer from the hashmap.
6. If the current integer is not in the hashmap, add it to the result list.
7. Return the result list.

The time complexity of this algorithm is O(n) because it iterates through the input array once. The space complexity is also O(n) because it uses a hashmap to store the counts.

This algorithm works because it checks if a number is in the hashmap. If it is, it increments the count and removes the number from the hashmap. If it is not in the hashmap, it adds the number to the result list. This way, we only keep the unique numbers in the result list.



In [17]:
torch.cuda.empty_cache()

In [18]:
subproblems

['1. Create a hashmap called `counts` to store the count of integers with the same value.',
 '2. Initialize an empty list called `result` to store the unique integers.',
 '3. Iterate through the input array `nums`.',
 '4. If the current integer is not in the hashmap, add it to the hashmap with a count of 1.',
 '5. If the count of the current integer is greater than 1, remove the integer from the hashmap.',
 '6. If the current integer is not in the hashmap, add it to the result list.',
 '7. Return the result list.',
 '',
 'The time complexity of this algorithm is O(n) because it iterates through the input array once. The space complexity is also O(n) because it uses a hashmap to store the counts.',
 '',
 'This algorithm works because it checks if a number is in the hashmap. If it is, it increments the count and removes the number from the hashmap. If it is not in the hashmap, it adds the number to the result list. This way, we only keep the unique numbers in the result list.',
 '']

## Generate Code Snippets for each Sub Problem

In [6]:
# Load the entire model on the GPU 0
device_map = {"": 0}

model_2 = AutoModelForCausalLM.from_pretrained(
    model_name_2,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

# Load LLaMA tokenizer
tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2, trust_remote_code=True)
tokenizer_2.pad_token = tokenizer_2.eos_token
tokenizer_2.padding_side = "right" # Fix weird overflow issue with fp16 training


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/658 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.78k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/126 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/196 [00:00<?, ?B/s]

In [19]:
# Code generation part
# Define the system prompt and input prompt for code generation
system_prompt_code = 'Below is an instruction that describes a subproblem. Generate only Java code snippets for the problem below. No comments for this.\n\n'

prompts = [
    '1. Create a hashmap called `counts` to store the count of integers with the same value.',
    '2. Initialize an empty list called `result` to store the unique integers.',
    '3. Iterate through the input array `nums`.',
    '4. If the current integer is not in the hashmap, add it to the hashmap with a count of 1.',
    '5. If the count of the current integer is greater than 1, remove the integer from the hashmap.',
    '6. If the current integer is not in the hashmap, add it to the result list.',
    '7. Return the result list.',
    '',
    'The time complexity of this algorithm is O(n) because it iterates through the input array once. The space complexity is also O(n) because it uses a hashmap to store the counts.',
    '',
    'This algorithm works because it checks if a number is in the hashmap. If it is, it increments the count and removes the number from the hashmap. If it is not in the hashmap, it adds the number to the result list. This way, we only keep the unique numbers in the result list.',
    ''
]


In [13]:

# Load the test dataset
test_path = '/content/drive/My Drive/Final FYP/test_code_snippet_dataset.csv'
test_df = pd.read_csv(test_path)

# Extract instruction and response from the first row of the test dataset
sample_data = test_df.iloc[0]['text']
instruction_start = sample_data.find("### Input:") + len("### Input:")
instruction_end = sample_data.find("### Response:")
response_start = instruction_end + len("### Response:")

instruction = sample_data[instruction_start:instruction_end].strip()
reference_text = sample_data[response_start:].strip()

# Load the text generation pipeline with the specified model and tokenizer
pipe = pipeline(task="text-generation", model=model_2, tokenizer=tokenizer_2, max_length=256, truncation=True)

# Function to generate code for each subproblem
def generate_code_for_subproblem(prompt):
    instruction = f"{system_prompt_code}\n ### Instruction:\n{prompt}\n\n### Response:\n"
    result = pipe(instruction)
    output_text = result[0]['generated_text'][len(instruction):]
    # Define possible stop tokens
    stop_tokens = ["//", "###"]

    # Find the first occurrence of any stop token
    stop_index = len(output_text)
    for token in stop_tokens:
        token_index = output_text.find(token)
        if token_index != -1 and token_index < stop_index:
            stop_index = token_index

    output_text = output_text[:stop_index].strip()
    return output_text

# Iterate over each subproblem prompt and generate code
for prompt in prompts:
    code_snippet = generate_code_for_subproblem(prompt)
    print(f"Subproblem: {prompt}")
    print(f"Code Snippet:\n{code_snippet}")
    print("\n")


Subproblem: 1. Create a hashmap called `counts` to store the count of integers with the same value.
Code Snippet:
import java.util.HashMap;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> counts = new HashMap<>();
    }
}


Subproblem: 2. Initialize an empty list called `result` to store the unique integers.
Code Snippet:
List<Integer> result = new ArrayList<>();


Subproblem: 3. Iterate through the input array `nums`.
Code Snippet:
int[] nums = {1, 2, 3, 4, 5};

for (int i = 0; i < nums.length; i++) {
    System.out.println(nums[i]);
}


Subproblem: 4. If the current integer is not in the hashmap, add it to the hashmap with a count of 1.
Code Snippet:
import java.util.HashMap;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> hashmap = new HashMap<>();
        int currentInteger = 5;

        if (!hashmap.containsKey(currentInteger)) {
            hashmap.put(currentInteger, 1);
     

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Subproblem: 
Code Snippet:
public class SubproblemSolver {

    public static void main(String[] args) {


Subproblem: This algorithm works because it checks if a number is in the hashmap. If it is, it increments the count and removes the number from the hashmap. If it is not in the hashmap, it adds the number to the result list. This way, we only keep the unique numbers in the result list.
Code Snippet:
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public class UniqueNumbers {

    public static List<Integer> getUniqueNumbers(List<Integer> numbers) {
        List<Integer> result = new ArrayList<>();
        HashMap<Integer, Integer> map = new HashMap<>();

        for (int num : numbers) {
            if (map.containsKey(num)) {
                map.put(num, map.get(num) + 1);
            } else {
                result.add(num);
            }
        }

        return result;
    }

    public


Subproblem: 
Code Snippet:
public class SubproblemSolver {

In [14]:
import numpy as np

In [15]:
# Function to calculate Cosine Similarity
def calculate_cosine_similarity(true_responses, predictions):
    vectorizer = CountVectorizer().fit_transform(true_responses + predictions)
    vectors = vectorizer.toarray()
    cosine_similarities = []
    for i in range(len(true_responses)):
        cosine_sim = cosine_similarity([vectors[i]], [vectors[i + len(true_responses)]])
        cosine_similarities.append(cosine_sim[0][0])
    return np.mean(cosine_similarities)

# Function to calculate Jaccard Similarity
def calculate_jaccard_similarity(true_responses, predictions):
    vectorizer = CountVectorizer(binary=True).fit(true_responses + predictions)
    true_responses_vectors = vectorizer.transform(true_responses).toarray()
    predictions_vectors = vectorizer.transform(predictions).toarray()
    jaccard_similarities = []
    for i in range(len(true_responses)):
        jaccard_sim = jaccard_score(true_responses_vectors[i], predictions_vectors[i])
        jaccard_similarities.append(jaccard_sim)
    return np.mean(jaccard_similarities)

In [20]:
# Calculate Cosine and Jaccard Similarity
average_cosine_similarity = calculate_cosine_similarity([reference_text], [instruction])
average_jaccard_similarity = calculate_jaccard_similarity([reference_text], [instruction])

# Print Cosine and Jaccard Similarity
print(f"Average Cosine Similarity: {average_cosine_similarity}")
print(f"Average Jaccard Similarity: {average_jaccard_similarity}")

# Generate another sample if the scores are not good
if (average_cosine_similarity < 0.1 or average_jaccard_similarity < 0.1):
    # Iterate over each subproblem prompt and generate code
    for prompt in prompts:
        code_snippet = generate_code_for_subproblem(prompt)
        print(f"Subproblem: {prompt}")
        print(f"Code Snippet:\n{code_snippet}")
        print("\n")



Average Cosine Similarity: 0.12395595736018243
Average Jaccard Similarity: 0.09859154929577464
Subproblem: 1. Create a hashmap called `counts` to store the count of integers with the same value.
Code Snippet:
import java.util.HashMap;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> counts = new HashMap<>();
    }
}


Subproblem: 2. Initialize an empty list called `result` to store the unique integers.
Code Snippet:
List<Integer> result = new ArrayList<>();


Subproblem: 3. Iterate through the input array `nums`.
Code Snippet:
int[] nums = {1, 2, 3, 4, 5};

for (int i = 0; i < nums.length; i++) {
    System.out.println(nums[i]);
}


Subproblem: 4. If the current integer is not in the hashmap, add it to the hashmap with a count of 1.
Code Snippet:
import java.util.HashMap;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> hashmap = new HashMap<>();
        int currentInteger = 5;

      

In [25]:
# Initialize a list to collect all code snippets
all_code_snippets = []

# Iterate over each subproblem prompt and generate code
for prompt in prompts:
    code_snippet = generate_code_for_subproblem(prompt)
    all_code_snippets.append(code_snippet)

# Combine all code snippets into a single string
combined_code = "\n\n".join(all_code_snippets)

# Print the combined code snippets
print(f"Combined Code Snippets:\n{combined_code}")

Combined Code Snippets:
import java.util.HashMap;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> counts = new HashMap<>();
    }
}

List<Integer> result = new ArrayList<>();

int[] nums = {1, 2, 3, 4, 5};

for (int i = 0; i < nums.length; i++) {
    System.out.println(nums[i]);
}

import java.util.HashMap;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> hashmap = new HashMap<>();
        int currentInteger = 5;

        if (!hashmap.containsKey(currentInteger)) {
            hashmap.put(currentInteger, 1);
        }
    }
}

import java.util.HashMap;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> map = new HashMap<>();
        map.put(1, 2);
        map.put(2, 3);
        map.put(3, 1);

        for (int key : map.keySet()) {
            if (map.get(key) > 1) {
                map.remove(key);
            }
        }

        Syst

# Code Generation Implementation Phase

In [27]:
# Set up your OpenAI API key
openai.api_key = ""

## Merge the Code Snippets to Generate Complex Code

In [28]:
# Function to ask GPT-3.5 Turbo to combine the snippets into a cohesive function
def ask_gpt(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an expert Java programmer. Give the only the cohesive function as output for the below.\n"},
            {"role": "user", "content": f"Please merge the following Java code snippets into a single, cohesive function only. No more extra things to do:\n\n{prompt}"}
        ]
    )
    return response.choices[0].message['content']

# Get the response from GPT-3.5 Turbo
response = ask_gpt(combined_code)

# Print the response
print(response)

```java
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public class Main {
    public static void main(String[] args) {
        HashMap<Integer, Integer> counts = new HashMap<>();
        
        List<Integer> result = new ArrayList<>();

        int[] nums = {1, 2, 3, 4, 5};

        for (int i = 0; i < nums.length; i++) {
            System.out.println(nums[i]);
        }

        HashMap<Integer, Integer> hashmap = new HashMap<>();
        int currentInteger = 5;

        if (!hashmap.containsKey(currentInteger)) {
            hashmap.put(currentInteger, 1);
        }

        HashMap<Integer, Integer> map = new HashMap<>();
        map.put(1, 2);
        map.put(2, 3);
        map.put(3, 1);

        for (int key : new HashMap<>(map).keySet()) {
            if (map.get(key) > 1) {
                map.remove(key);
            }
        }

        System.out.println(map);
    }
}
```  
