<a href="https://colab.research.google.com/github/VellummyilumVinoth/CPU_Usage_Memory_Usage_using_GPU/blob/main/CPU_Usage_Memory_Usage_using_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m103.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.27.4


In [None]:
import torch
import datetime
from transformers import AlbertForMaskedLM,RobertaTokenizerFast
from tabulate import tabulate
import os
import psutil

# Start CPU and memory usage monitoring for load the model
process = psutil.Process()
start_mem = process.memory_info().rss  / 1024 / 1024
start_cpu = process.cpu_percent()

# Load the trained model and tokenizer
output_dir = os.path.expanduser("/content/drive/MyDrive/finetuned_albert")
model = AlbertForMaskedLM.from_pretrained(output_dir)
tokenizer = RobertaTokenizerFast.from_pretrained(output_dir)

def predict_masked_token(masked_statement, tokenizer, model):

    # Tokenize the masked statement
    input_ids = tokenizer.encode(masked_statement, add_special_tokens=False, return_tensors='pt')

    # Find the position of the masked token
    masked_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1][0].item()

    # Generate predictions for the masked token using the fine-tuned model
    with torch.no_grad():
        outputs = model(input_ids)
        predictions = outputs[0]

    # Get the top 5 predictions and their probability scores from the fine-tuned model
    probs_ft = torch.nn.functional.softmax(predictions[0, masked_token_index], dim=-1)
    top_k_ft = torch.topk(probs_ft, k=5)

    # Create a table with the top predictions and their probabilities from both models
    table = [["Fine-Tuned Model", f"{tokenizer.mask_token}"] + [tokenizer.convert_ids_to_tokens([idx])[0].replace('Ġ', '').lower() for idx in top_k_ft.indices],
             ["Probability", ""] + [f"{probs_ft[idx].item():.4f}" for idx in top_k_ft.indices]]

    # Print the table
    print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

    return table

# Define a sample masked statement
masked_statement = "int <mask> = getAge();"

# Get the current timestamp
start_time = datetime.datetime.now()

# Start CPU and memory usage monitoring for this function
start_cpu1 = process.cpu_percent()
start_mem1 = process.memory_info().rss  / 1024 / 1024

# Call the function to generate predictions for the masked token
table = predict_masked_token(masked_statement, tokenizer, model)

# Stop CPU and memory usage monitoring for this function and print the results
end_cpu = process.cpu_percent()
end_mem = process.memory_info().rss  / 1024 / 1024
print(f"CPU usage for predict_masked_token: {(end_cpu - start_cpu1)/8 :.2f}%")
print(f"CPU usage for load_model and predict_masked_token: {(end_cpu - start_cpu)/8 :.2f}%")
print(f"Memory usage for predict_masked_token: {(end_mem - start_mem1) :.2f} MB")
print(f"Memory usage for load_model and predict_masked_token: {(end_mem - start_mem) :.2f} MB")

end_time = datetime.datetime.now()

# calculate elapsed time in milliseconds
elapsed_ms = (end_time.timestamp() - start_time.timestamp()) * 1000

print(f"Time taken for predict_masked_token: {elapsed_ms:.2f} ms")


╒════════════════════╤══════════╤═════════╤════════╤════════╤════════╤══════════╕
│ Fine-Tuned Model   │ <mask>   │   check │    int │      s │     bb │   string │
╞════════════════════╪══════════╪═════════╪════════╪════════╪════════╪══════════╡
│ Probability        │          │  0.0979 │ 0.0768 │ 0.0236 │ 0.0206 │   0.0168 │
╘════════════════════╧══════════╧═════════╧════════╧════════╧════════╧══════════╛
CPU usage for predict_masked_token: 3.60%
CPU usage for load_model and predict_masked_token: 5.85%
Memory usage for predict_masked_token: 6.61 MB
Memory usage for load_model and predict_masked_token: 96.49 MB
Time taken for predict_masked_token: 151.57 ms
