In [1]:
%%capture
%pip install -U transformers datasets torch sentencepiece peft accelerate evaluate

In [2]:
import os
import json
import torch
import shutil
import logging
import transformers
import pandas as pd

In [3]:
from evaluate import load
from datasets import Dataset, load_dataset
from huggingface_hub import login, Repository

from transformers import (
    AdamW,
    RobertaTokenizer,
    T5ForConditionalGeneration,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    DataCollatorForSeq2Seq
)
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType,
    PeftModel, 
    PeftConfig
)

# Define Variables

In [4]:
base_model = "Salesforce/codet5-base"

new_model = "codet5-base_CODEX"

model_path = "model"

tokenizer_path = "tokenizer"

dataset_path = "dataset"  # dataset dir path

dataset = "CodexAI/Eval4Deepseek-Coder"  # dataset name at huggingface

repo_url = f'https://huggingface.co/datasets/{dataset}'

In [6]:
# shutil.rmtree("wandb")

In [7]:
# if not os.path.exists(dataset_path):
#     os.makedirs(dataset_path)

# Get Dataset
Clone the dataset from HF, it's fast as fuck!

In [8]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()

In [9]:
# login(user_secrets.get_secret("HF_TOKEN"))

In [10]:
# print("Cloning Dataset...")

In [11]:
# repo = Repository(local_dir=dataset_dir,clone_from=repo_url)

# Playing with Dataset

In [12]:
def load_json_data(dir_name):
  """
  This function is used to load the json data from the given directory.
  After reading the data store them in a list
  After storing all the data in a list we can return the list.
  """

  data=[]
  for root_folder in os.listdir(dir_name):
    if root_folder!=".git" and root_folder!=".gitattributes":
      for files in os.listdir(os.path.join(dir_name,root_folder)):
        if files.endswith(".json"):
          with open(os.path.join(dir_name,root_folder,files),"r")as f:
            json_file=json.load(f)
            data.append(json_file)
  return data

In [13]:
print(f"Loading dataset from /{dataset_path}/...")

Loading dataset from /dataset/...


In [14]:
json_data=load_json_data(dataset_path)

In [15]:
print(f"Length of loaded dataset is: {len(json_data)}")

Length of loaded dataset is: 78534


In [16]:
tmp=json_data  # in case if this is required again

## Dataset Limit = 1000
Dataset limit is set to 1000 and this bcz of testing this script. For actual training change this value
`json_data[:1000]` to something greater or simply comment the cell below to use the complete dataset

In [18]:
json_data=json_data[:1000]

In [19]:
print(f"Length of dataset is: {len(json_data)}")

Length of dataset is: 1000


In [20]:
print("Loading dataset...")

Loading dataset...


In [21]:
df=Dataset.from_list(json_data)

In [22]:
df

Dataset({
    features: ['instruction', 'output'],
    num_rows: 1000
})

In [23]:
df.features

{'instruction': Value(dtype='string', id=None),
 'output': Value(dtype='string', id=None)}

In [24]:
df['instruction'][0]

'Generate a unit test case for the following Java method: Ignite implements Extension<IgniteBucketBuilder> { public <T extends Serializable> ProxyManager<T> proxyManagerForCache(IgniteCache<T, GridBucketState> cache) { return new IgniteProxyManager<>(cache); }  @Override IgniteBucketBuilder builder(); ProxyManager<T> proxyManagerForCache(IgniteCache<T, GridBucketState> cache);  }'

In [25]:
df['output'][0]

'The unit test case for the given Java method is: @Test(expected = IllegalArgumentException.class) @Override public void testThatImpossibleToPassNullCacheToProxyManagerConstructor() { Bucket4j.extension(getExtensionClass()).proxyManagerForCache(null); }'

In [26]:
print("Spliting dataset...")

Spliting dataset...


In [27]:
df=df.train_test_split(test_size=0.2)

In [28]:
print(df)

DatasetDict({
    train: Dataset({
        features: ['instruction', 'output'],
        num_rows: 800
    })
    test: Dataset({
        features: ['instruction', 'output'],
        num_rows: 200
    })
})


In [29]:
train=df['train']
test=df['test']

In [30]:
train

Dataset({
    features: ['instruction', 'output'],
    num_rows: 800
})

In [31]:
test

Dataset({
    features: ['instruction', 'output'],
    num_rows: 200
})

In [32]:
print("Checking dataset...")

Checking dataset...


In [33]:
tokenizer = RobertaTokenizer.from_pretrained(base_model)

tokenizer_config.json:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/703k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/294k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/12.5k [00:00<?, ?B/s]



In [34]:
instruction = tokenizer(train['instruction'][0])
print(instruction)

{'input_ids': [1, 4625, 279, 2836, 1842, 648, 364, 326, 3751, 5110, 707, 30, 8086, 2257, 4331, 3231, 4115, 48, 27574, 4331, 32, 2655, 2257, 653, 16, 1445, 2257, 653, 34, 288, 1071, 4055, 32, 620, 2257, 653, 34, 336, 12, 6385, 20496, 32, 12242, 34, 7827, 16, 727, 10034, 852, 1852, 11994, 16, 727, 1525, 498, 13, 288, 327, 13122, 2160, 12, 24790, 16, 852, 1852, 11994, 16, 498, 13, 263, 1458, 12, 1132, 827, 323, 2866, 18109, 1769, 289, 632, 15598, 225, 8086, 2257, 4331, 26964, 2610, 25444, 727, 5266, 32, 12242, 34, 328, 27574, 5494, 16, 203, 13491, 727, 7400, 2864, 21734, 2864, 16, 203, 13491, 727, 8086, 2257, 653, 827, 323, 498, 827, 323, 16, 203, 13491, 727, 1445, 2257, 653, 827, 323, 460, 827, 323, 1769, 4055, 32, 620, 2257, 653, 34, 336, 12, 6385, 20496, 32, 12242, 34, 7827, 16, 727, 10034, 852, 1852, 11994, 16, 727, 1525, 498, 1769, 4055, 32, 12242, 34, 13122, 2160, 12, 6385, 20496, 32, 12242, 34, 7827, 16, 727, 10034, 852, 1852, 11994, 16, 727, 1525, 498, 1769, 1250, 1914, 863, 1852,

In [35]:
tokens = tokenizer.convert_ids_to_tokens(instruction.input_ids)
print(tokens)

['<s>', 'Generate', 'Ġa', 'Ġunit', 'Ġtest', 'Ġcase', 'Ġfor', 'Ġthe', 'Ġfollowing', 'ĠJava', 'Ġmethod', ':', 'ĠRange', 'Store', 'Db', 'Ġextends', 'ĠAbstract', 'L', 'mdb', 'Db', '<', 'Range', 'Store', 'Key', ',', 'ĠValue', 'Store', 'Key', '>', 'Ġ{', 'Ġpublic', 'ĠOptional', '<', 'Value', 'Store', 'Key', '>', 'Ġget', '(', 'final', 'ĠTxn', '<', 'ByteBuffer', '>', 'Ġtxn', ',', 'Ġfinal', 'ĠUID', 'Ġmap', 'Definition', 'Uid', ',', 'Ġfinal', 'Ġlong', 'Ġkey', ')', 'Ġ{', 'Ġreturn', 'ĠgetAs', 'Bytes', '(', 'txn', ',', 'Ġmap', 'Definition', 'Uid', ',', 'Ġkey', ')', 'Ġ.', 'map', '(', 'value', 'Ser', 'de', '::', 'deserialize', ');', 'Ġ}', 'Ġ@', 'Inject', 'Ġ', 'ĠRange', 'Store', 'Db', '(@', 'Ass', 'isted', 'Ġfinal', 'ĠEnv', '<', 'ByteBuffer', '>', 'Ġl', 'mdb', 'Environment', ',', 'Ċ', 'ĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ', 'Ġfinal', 'ĠByteBuffer', 'Pool', 'ĠbyteBuffer', 'Pool', ',', 'Ċ', 'ĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ', 'Ġfinal', 'ĠRange', 'Store', 'Key', 'Ser', 'de', 'Ġkey', 'Ser', 'de', ',', 'Ċ', 'ĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ

In [36]:
tokenizer.convert_tokens_to_string(tokens)

'<s>Generate a unit test case for the following Java method: RangeStoreDb extends AbstractLmdbDb<RangeStoreKey, ValueStoreKey> { public Optional<ValueStoreKey> get(final Txn<ByteBuffer> txn, final UID mapDefinitionUid, final long key) { return getAsBytes(txn, mapDefinitionUid, key) .map(valueSerde::deserialize); } @Inject  RangeStoreDb(@Assisted final Env<ByteBuffer> lmdbEnvironment,\n                        final ByteBufferPool byteBufferPool,\n                        final RangeStoreKeySerde keySerde,\n                        final ValueStoreKeySerde valueSerde); Optional<ValueStoreKey> get(final Txn<ByteBuffer> txn, final UID mapDefinitionUid, final long key); Optional<ByteBuffer> getAsBytes(final Txn<ByteBuffer> txn, final UID mapDefinitionUid, final long key); boolean containsMapDefinition(final Txn<ByteBuffer> txn, final UID mapDefinitionUid); void deleteMapEntries(final Txn<ByteBuffer> writeTxn,\n                                 final UID mapUid,\n                               

In [37]:
print(f"Vocab size : {tokenizer.vocab_size}")

Vocab size : 32100


In [38]:
print(f"max length : {tokenizer.model_max_length}")

max length : 512


In [39]:
print(f"model input : {tokenizer.model_input_names}")

model input : ['input_ids', 'attention_mask']


In [40]:
batch = tokenizer(train['instruction'][0],max_length=512,truncation=True,padding="max_length",return_tensors="pt")

In [41]:
batch

{'input_ids': tensor([[    1,  4625,   279,  2836,  1842,   648,   364,   326,  3751,  5110,
           707,    30,  8086,  2257,  4331,  3231,  4115,    48, 27574,  4331,
            32,  2655,  2257,   653,    16,  1445,  2257,   653,    34,   288,
          1071,  4055,    32,   620,  2257,   653,    34,   336,    12,  6385,
         20496,    32, 12242,    34,  7827,    16,   727, 10034,   852,  1852,
         11994,    16,   727,  1525,   498,    13,   288,   327, 13122,  2160,
            12, 24790,    16,   852,  1852, 11994,    16,   498,    13,   263,
          1458,    12,  1132,   827,   323,  2866, 18109,  1769,   289,   632,
         15598,   225,  8086,  2257,  4331, 26964,  2610, 25444,   727,  5266,
            32, 12242,    34,   328, 27574,  5494,    16,   203, 13491,   727,
          7400,  2864, 21734,  2864,    16,   203, 13491,   727,  8086,  2257,
           653,   827,   323,   498,   827,   323,    16,   203, 13491,   727,
          1445,  2257,   653,   827,  

In [42]:
print("Tokenizing dataset...")

Tokenizing dataset...


In [43]:
def tokenize_data(data):
  input_col=tokenizer(data['instruction'],max_length=512,truncation=True,padding="max_length",return_tensors="pt")
  target_col=tokenizer(data['output'],max_length=512,truncation=True,padding="max_length",return_tensors="pt")

  return {
      "input_ids":input_col["input_ids"],
      "attention_mask":input_col["attention_mask"],
      "labels":target_col["input_ids"]
  }

In [44]:
print("Mapping train data...")
train=train.map(tokenize_data,batched=True)

Mapping train data...


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

In [45]:
train

Dataset({
    features: ['instruction', 'output', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 800
})

In [46]:
print("Mappig test data...")
test=test.map(tokenize_data,batched=True)

Mappig test data...


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [47]:
test

Dataset({
    features: ['instruction', 'output', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 200
})

In [48]:
train=train.remove_columns(["instruction","output"])
test=test.remove_columns(["instruction","output"])

In [49]:
train

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 800
})

# Fine-tuning

In [50]:
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name()}")
    device="cuda"
    torch_type=torch.bfloat16
else:
    device="cpu"
    torch_type=torch.bfloat16

CUDA device: Tesla P100-PCIE-16GB


In [51]:
# peft_config=LoraConfig(
#     task_type=TaskType.SEQ_2_SEQ_LM,
#     r=16,
#     lora_alpha=32,
#     lora_dropout=0.05,
#     bias="none",
#     target_modules=["q","v"]
# )

In [52]:
model = T5ForConditionalGeneration.from_pretrained(base_model,device_map=device)

config.json:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [53]:
# peft_model=get_peft_model(model,peft_config)

In [54]:
print(f"BF16 support is {transformers.file_utils.is_torch_bf16_available()}")

BF16 support is True




In [57]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    run_name ="./loggings",
    overwrite_output_dir=True,
    eval_strategy="epoch",
#     learning_rate=5e-5,
    gradient_accumulation_steps=1,
#     per_device_train_batch_size=8,
#     per_device_eval_batch_size=8,
    auto_find_batch_size = True,
    weight_decay=0.01,
    num_train_epochs=1,
    bf16=True,
    optim="adafactor",
    save_strategy="no",
    log_level="info",
    logging_first_step=True,
    ignore_data_skip =True,
    
#     torch_empty_cache_steps=1
)

In [58]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [59]:
trainer=Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=test,
    data_collator=data_collator
)

You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
Using auto half precision backend


In [60]:
print("Start trainer...")

Start trainer...


In [61]:
trainer.train()

***** Running training *****
  Num examples = 800
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 100
  Number of trainable parameters = 222,882,048
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112720622223706, max=1.0…

Epoch,Training Loss,Validation Loss
1,4.3333,0.459644



***** Running Evaluation *****
  Num examples = 200
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=100, training_loss=0.7524083137512207, metrics={'train_runtime': 205.4643, 'train_samples_per_second': 3.894, 'train_steps_per_second': 0.487, 'total_flos': 487166312448000.0, 'train_loss': 0.7524083137512207, 'epoch': 1.0})

In [62]:
print("finished. Saving model...")

finished. Saving model...


In [64]:
model.save_pretrained(model_path)
tokenizer.save_pretrained(tokenizer_path)

Configuration saved in model/config.json
Configuration saved in model/generation_config.json
Model weights saved in model/model.safetensors
tokenizer config file saved in tokenizer/tokenizer_config.json
Special tokens file saved in tokenizer/special_tokens_map.json


('tokenizer/tokenizer_config.json',
 'tokenizer/special_tokens_map.json',
 'tokenizer/vocab.json',
 'tokenizer/merges.txt',
 'tokenizer/added_tokens.json')

In [85]:
# torch.cuda.empty_cache()

# Evaluation

In [66]:
# config = PeftConfig.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)
# model = PeftModel.from_pretrained(model,model_path,is_trainable=True)

loading configuration file model/config.json
Model config T5Config {
  "_name_or_path": "Salesforce/codet5-base",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "bos_token_id": 1,
  "classifier_dropout": 0.0,
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 2,
  "feed_forward_proj": "relu",
  "gradient_checkpointing": false,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 20

In [68]:
# check if it's working
# model.print_trainable_parameters()

In [69]:
tokenizer = RobertaTokenizer.from_pretrained(tokenizer_path)

loading file vocab.json
loading file merges.txt
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading file tokenizer.json


In [70]:
df

DatasetDict({
    train: Dataset({
        features: ['instruction', 'output'],
        num_rows: 800
    })
    test: Dataset({
        features: ['instruction', 'output'],
        num_rows: 200
    })
})

In [71]:
eval=df['test']

In [72]:
eval

Dataset({
    features: ['instruction', 'output'],
    num_rows: 200
})

## BLEU

In [73]:
bleu = load("bleu")

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

In [74]:
references = [[example['instruction']] for example in eval]
predictions = [example['output'] for example in eval]

In [75]:
bleu_score = bleu.compute(references=references, predictions=predictions)

In [76]:
print("BLEU score:", bleu_score['bleu'])

BLEU score: 0.030562210362714618


## code_eval

In [77]:
code_eval = load("code_eval")

Downloading builder script:   0%|          | 0.00/9.18k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/6.10k [00:00<?, ?B/s]

In [78]:
test_cases = [example['instruction'] for example in eval]
candidates = [[example['output']] for example in eval]

In [79]:
os.environ["HF_ALLOW_CODE_EVAL"] = "1"

pass_at_k, results = code_eval.compute(references=test_cases, predictions=candidates, k=[1, 2])

  self.pid = os.fork()


In [80]:
# Print the pass@1 and pass@2 scores
print("Pass@k:", pass_at_k)

Pass@k: {'pass@1': 0.0}


In [81]:
# Iterate over the results and print each one
for task_id, result_list in results.items():
    for result in result_list:
        idx, details = result
        print(f"Task ID: {details['task_id']}, Passed: {details['passed']}, Result: {details['result']}")

Task ID: 1, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 0, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 3, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 2, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 4, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 5, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 6, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 7, Passed: False, Result: failed: invalid decimal literal (<string>, line 2)
Task ID: 9, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 8, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 10, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 11, Passed: False, Result: failed: invalid syntax (<string>, line 1)
Task ID: 14, Passed: False, Result: failed: invalid syntax (<stri

# Inference

In [82]:
def generate_unit_tests(instruction):
    
  inputs = tokenizer(instruction, max_length=512, truncation=True, padding="max_length", return_tensors="pt")

  outputs = model.generate(
      input_ids=inputs["input_ids"],
      attention_mask=inputs["attention_mask"],
      max_length=512,
      num_beams=5,
      do_sample=True,  # Enable sampling for diverse output
      temperature=0.2,  # Control randomness
      top_k=100,  # Limit the sampling pool to top K tokens
      top_p=0.9,
      no_repeat_ngram_size=5,
      repetition_penalty=1.5,
      length_penalty=1.0,
      early_stopping=True
  )

  # Decode the generated output
  generated_test = tokenizer.decode(outputs[0], skip_special_tokens=True)

  return generated_test

In [83]:
instruction = """
public class SimpleCalculator {
    // Method to add two numbers
    public int add(int a, int b) {
        return a + b;
    }

    // Method to subtract two numbers
    public int subtract(int a, int b) {
        return a - b;
    }

    // Method to multiply two numbers
    public int multiply(int a, int b) {
        return a * b;
    }

    // Method to divide two numbers
    // Throws ArithmeticException if divisor is zero
    public double divide(int a, int b) {
        if (b == 0) {
            throw new ArithmeticException("Cannot divide by zero");
        }
        return (double) a / b;
    }
}
"""
prompt="Generate a unit test case for the following Java method: "+instruction
print(prompt)

Generate a unit test case for the following Java method: 
public class SimpleCalculator {
    // Method to add two numbers
    public int add(int a, int b) {
        return a + b;
    }

    // Method to subtract two numbers
    public int subtract(int a, int b) {
        return a - b;
    }

    // Method to multiply two numbers
    public int multiply(int a, int b) {
        return a * b;
    }

    // Method to divide two numbers
    // Throws ArithmeticException if divisor is zero
    public double divide(int a, int b) {
        if (b == 0) {
            throw new ArithmeticException("Cannot divide by zero");
        }
        return (double) a / b;
    }
}



In [84]:
generated_test = generate_unit_tests(prompt)
print(generated_test)

The unit test case for the given Java method is: @Test public void testDivideByZero() { int a = 1; int b = 2; int a2 = 2; int b3 = 3; int a4 = 4; int a5 = 5; int a6 = 6; int a7 = 0; int a8 = 10; int a9 = 9; int a10 = 11; int a11 = 10; int b12 = 10; double a9 = Math.round(Math.round(System.identityHashCode(a11)); assertThat(a11)).isEqualTo(1); assertThat(a12).isEqualTo(13); double a2 = Math.pow(a, a5); assertThat(b11).isNotEqualTo(10); double a3 = Math.min(a, a12); double a4 = Math.max(a, b12); double a5 = (double) Math.log(a-a-a+a+a12); double a15 = Math.log((double) Math.pow(10, a-a12); assertThat(i12).isGreaterThan(0); assertThat(j12).isLessThan(j12); }


In [86]:
torch.cuda.empty_cache()

# Push to HF

In [None]:
from huggingface_hub import HfApi, HfFolder, Repository

In [None]:
repo_name = "CodeT5"
organization_name = "CodexAI"
repo_url = f"{organization_name}/{repo_name}"

In [None]:
model.push_to_hub(repo_url)
tokenizer.push_to_hub(repo_url)