In [None]:
from google.colab import userdata
from huggingface_hub import login
import os, gc

hf_token = userdata.get('HF_Read_Token')
os.environ["HF_TOKEN"] = hf_token
login(token=hf_token)

In [None]:
%pip install --quiet --upgrade transformers accelerate torch

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, pipeline

In [None]:
def clear_cache():
  gc.collect()
  torch.cuda.empty_cache()
  print("\n\nCleared GPU Cache")

In [None]:
# model_name: str = "Salesforce/codegen-350M-mono"
# model_name: str = "bigcode/santacoder"
model_name: str = "microsoft/Phi-3.5-mini-instruct"
# nlp_model_name: str = "google/flan-t5-large"
# nlp_model_name: str = "google/flan-t5-base"

is_cuda: bool = torch.cuda.is_available()
device = torch.device("cuda" if is_cuda else "cpu")
# nlp_device = torch.device("cpu")

print("Using device:", device)
print("Using model:", model_name)
# print("Using NLP model:", nlp_model_name)

In [None]:
try:
  del tokenizer
  del model
  # del nlp_tokenizer
  # del nlp_model
  clear_cache()
except:
  pass

tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)

print("Initialized Tokenizer")


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=hf_token,
    device_map="auto",
    dtype=torch.float16
)

print("Pretrained the Model")

# nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name, token=hf_token)

# print("Initialized NLP Tokenizer")

# nlp_model = AutoModelForSeq2SeqLM.from_pretrained(
#       nlp_model_name,
#       token=hf_token,
#       device_map=nlp_device,
#       dtype=torch.float16
# )

# print("Pretrained the NLP Model")

clear_cache()

**Code Starts Here**

In [None]:
language: str = "Python" # "C", "Python"
prompt: str = f"You're a {language} Programmer who can write code in {language} language\n\
Generate a perfectly working Standard {language} code for Bubble-Sort Algorithm for sorting a provided int array of size `n`.\n\
Do not include code from any other language. Only return {language} code.\n\
Sure, here is the code:\n```{language.lower()}\n"

tokenized_prompt = tokenizer(
    prompt,
    return_tensors="pt"
).to(device)

output_tokens = model.generate(
        **tokenized_prompt,
        do_sample=False,
        # temperature=0.7,
        # top_p=0.95,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=100 if language.capitalize() == "Python" else 250 if language.capitalize() == "C" else 150,
        min_new_tokens=10
)

outputs = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
print(outputs)

outputs = outputs[len(prompt):]
# print("\n\nRequired Code:\n", outputs, "\n\n")

triple_backtick_index: int = outputs.find("```")
if triple_backtick_index >= 0:
  print(f"\n\n\nExpected code upto triple_backtick_index={triple_backtick_index}:\n", outputs[:triple_backtick_index])
print(f"\nThis was the length of the prompt={len(prompt)}")

del outputs
del output_tokens

In [None]:
# def generate_prompt(prompt: str, max_tokens: int=50, min_tokens: int=10) -> str:
#   nlp_prompt = f"""### Instruction:
# You are a prompt refiner.
# Rewrite the following prompt into a clear, strict instruction for a coding agent.
# Always include:
# - "You are a {{language}} programmer"
# - "Only return code in that language"
# Do not add explanations or commentary. Only return the improved prompt.

# ### Prompt:
# {prompt}

# ### Response:"""
#   tokenized_nlp_prompt = nlp_tokenizer(
#       nlp_prompt,
#       return_tensors="pt",
#       truncation=True,
#       padding=True
#   ).to(nlp_device)

#   nlp_output_tokens = nlp_model.generate(
#           **tokenized_nlp_prompt,
#           do_sample=True,
#           temperature=0.7,
#           top_p=0.9,
#           pad_token_id=nlp_tokenizer.eos_token_id,
#           eos_token_id=nlp_tokenizer.eos_token_id,
#           max_new_tokens=max_tokens,
#           min_new_tokens=min_tokens if min_tokens > 0 else 10
#   )

#   # print("Prompt passed=\n", nlp_prompt)

#   nlp_output = nlp_tokenizer.decode(nlp_output_tokens[0], skip_special_tokens=True)
#   # print("\n\nRefined prompt=\n", nlp_output)

#   return nlp_output

In [None]:
def generate_code(prompt: str, max_tokens: int=200, min_tokens: int=10) -> str:
  tokenized_prompt = tokenizer(
      prompt,
      return_tensors="pt"
  ).to(device)

  output_tokens = model.generate(
          **tokenized_prompt,
          do_sample=False,
          # temperature=0.7,
          # top_p=0.95,
          pad_token_id=tokenizer.eos_token_id,
          eos_token_id=tokenizer.eos_token_id,
          max_new_tokens=max_tokens,
          min_new_tokens=min_tokens
  )

  outputs = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
  # print(outputs)

  outputs = outputs[len(prompt):]
  # print("\n\nRequired Code:\n", outputs, "\n\n")

  triple_backtick_index: int = outputs.find("```")
  if triple_backtick_index >= 0:
    outputs = outputs[:triple_backtick_index]
    # print(f"\n\n\nExpected code upto triple_backtick_index={triple_backtick_index}:\n", outputs[:triple_backtick_index])
  # print(f"\nThis was the length of the prompt={len(prompt)}")

  return outputs

In [None]:
python_max_tokens: int = 200
c_max_tokens: int = 300

for language in ["C", "Python"]:
  prompt: str = f"Write a function in {language} programming language for Bubble-Sort Algorithm"
  print("prompt=\n", prompt, end="\n\n")

  # nlp_prompt: str = generate_prompt(prompt)
  # print("nlp_prompt=\n", nlp_prompt, end="\n\n")

  # nlp_prompted_code: str = generate_code(prompt=nlp_prompt, max_tokens=python_max_tokens if language.capitalize() == "Python" else c_max_tokens if language.upper() == "C" else 150)
  # print("\nnlp_prompt_code=\n", nlp_prompted_code)

  general_code: str = generate_code(prompt=prompt, max_tokens=python_max_tokens if language.capitalize() == "Python" else c_max_tokens if language.upper() == "C" else 150)
  print("\ngeneral_code=\n", general_code)

In [None]:
try:
  del python_max_tokens
  del c_max_tokens
  # del nlp_prompt
  # del nlp_prompted_code
  del general_code
except:
  pass

clear_cache()

In [None]:
prompt: str = "You're a Javascript Programmer who can write code in Javascript language\n\
    Generate a perfectly working Vanilla-Javascript code a Program for traversing through all the HTML DOM elements and changing their CSS-style background to red.\n\
    Do not include code from any other language. Only return Javascript code.\
    Sure, here is the code:\n```javascript\n"

prompt: str = "Write a program in Javascript programming language to traverse through all HTML DOM elements and change their CSS-style background to red.\n```\n"

In [None]:
print("prompt=\n", prompt, end="\n\n")

# nlp_prompt: str = generate_prompt(prompt)
# print("nlp_prompt=\n", nlp_prompt, end="\n\n")

# nlp_prompted_code: str = generate_code(prompt=nlp_prompt, max_tokens=100)
# print("\nnlp_prompt_code=\n", nlp_prompted_code)

general_code: str = generate_code(prompt=prompt, max_tokens=100)
print("\ngeneral_code=\n", general_code)

In [None]:
# del nlp_prompt
# del nlp_prompted_code
del general_code
clear_cache()