<a href="https://colab.research.google.com/github/VellummyilumVinoth/CPU_Memory_Usage/blob/main/CPU_Memory_Usage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
cpu_info = !lscpu
for inf_item in cpu_info.get_list():
  print(inf_item)

Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Address sizes:                   39 bits physical, 48 bits virtual
Byte Order:                      Little Endian
CPU(s):                          8
On-line CPU(s) list:             0-7
Vendor ID:                       GenuineIntel
Model name:                      Intel(R) Core(TM) i5-10210U CPU @ 1.60GHz
CPU family:                      6
Model:                           142
Thread(s) per core:              2
Core(s) per socket:              4
Socket(s):                       1
Stepping:                        12
CPU max MHz:                     4200.0000
CPU min MHz:                     400.0000
BogoMIPS:                        4199.88
Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid ape

In [2]:
import torch
import datetime
from transformers import AlbertForMaskedLM,RobertaTokenizerFast
from tabulate import tabulate
import os
import psutil

# Start CPU and memory usage monitoring for load the model
process = psutil.Process()
start_mem = process.memory_info().rss  / 1024 / 1024
start_cpu = process.cpu_percent()

# Load the trained model and tokenizer
output_dir = os.path.expanduser("./finetuned_albert")
model = AlbertForMaskedLM.from_pretrained(output_dir)
tokenizer = RobertaTokenizerFast.from_pretrained(output_dir)

def predict_masked_token(masked_statement, tokenizer, model):

    # Tokenize the masked statement
    input_ids = tokenizer.encode(masked_statement, add_special_tokens=False, return_tensors='pt')

    # Find the position of the masked token
    masked_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1][0].item()

    # Generate predictions for the masked token using the fine-tuned model
    with torch.no_grad():
        outputs = model(input_ids)
        predictions = outputs[0]

    # Get the top 5 predictions and their probability scores from the fine-tuned model
    probs_ft = torch.nn.functional.softmax(predictions[0, masked_token_index], dim=-1)
    top_k_ft = torch.topk(probs_ft, k=5)

    # Create a table with the top predictions and their probabilities from both models
    table = [["Fine-Tuned Model", f"{tokenizer.mask_token}"] + [tokenizer.convert_ids_to_tokens([idx])[0].replace('Ġ', '').lower() for idx in top_k_ft.indices],
             ["Probability", ""] + [f"{probs_ft[idx].item():.4f}" for idx in top_k_ft.indices]]

    # Print the table
    print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

    return table

# Define a sample masked statement
masked_statement = "int <mask> = getAge();"

# Get the current timestamp
start_time = datetime.datetime.now()

# Start CPU and memory usage monitoring for this function
start_cpu1 = process.cpu_percent()
start_mem1 = process.memory_info().rss  / 1024 / 1024

# Call the function to generate predictions for the masked token
table = predict_masked_token(masked_statement, tokenizer, model)

# Stop CPU and memory usage monitoring for this function and print the results
end_cpu = process.cpu_percent()
end_mem = process.memory_info().rss  / 1024 / 1024
print(f"CPU usage for predict_masked_token: {(end_cpu - start_cpu1)/8 :.2f}%")
print(f"CPU usage for load_model and predict_masked_token: {(end_cpu - start_cpu)/8 :.2f}%")
print(f"Memory usage for predict_masked_token: {(end_mem - start_mem1) :.2f} MB")
print(f"Memory usage for load_model and predict_masked_token: {(end_mem - start_mem) :.2f} MB")

end_time = datetime.datetime.now()

# calculate elapsed time in milliseconds
elapsed_ms = (end_time.timestamp() - start_time.timestamp()) * 1000

print(f"Time taken for predict_masked_token: {elapsed_ms:.2f} ms")


╒════════════════════╤══════════╤═════════╤════════╤════════╤════════╤══════════╕
│ Fine-Tuned Model   │ <mask>   │   check │    int │      s │     bb │   string │
╞════════════════════╪══════════╪═════════╪════════╪════════╪════════╪══════════╡
│ Probability        │          │  0.0979 │ 0.0768 │ 0.0236 │ 0.0206 │   0.0168 │
╘════════════════════╧══════════╧═════════╧════════╧════════╧════════╧══════════╛
CPU usage for predict_masked_token: 16.77%
CPU usage for load_model and predict_masked_token: 33.92%
Memory usage for predict_masked_token: 7.24 MB
Memory usage for load_model and predict_masked_token: 95.60 MB
Time taken for predict_masked_token: 66.73 ms
