# README
This tests that no GPU memory is "leaked" after running successive trials of each model.

In [1]:
print("Importing torch...")
import torch
import torch.nn as nn
print("Importing HF...")
from transformers import AutoTokenizer, AutoModelForCausalLM
print("Importing python modules...")
from timehelp import time_start, time_end
from model_wrapper import Model, ModelFamily, MultipleChoiceStrategy
import re
print("Done!")

Importing torch...
Importing HF...
Importing python modules...
Done!


In [2]:
# attempt to load all models in succession
for key, model_name in ModelFamily.CodeGen1.mono.items():
    print("=== Loading", key, f"({model_name}) ===")
    torch.cuda.empty_cache()
    model = Model(model_name)
    model.configure(time=True)
    inputs = model.tokenize("#")
    sample = model.generate(inputs, time=True, max_new_tokens=8)
    print(model.decode(sample, inputs))
    print()
    print("Freeing model...")
    del inputs, sample
    model.free()
    print("-" * 50)
    print()

=== Loading 350M (Salesforce/codegen-350M-mono) ===
[2024-05-03@18:51:57|model.device] Starting timer.
Configuring torch device...
Using device: cuda:0 aka cuda:0
[2024-05-03@18:51:58|model.device] Time elapsed: 69ms
[2024-05-03@18:51:58|model.tokenizer] Starting timer.
[2024-05-03@18:51:58|model.tokenizer] Time elapsed: 419ms
[2024-05-03@18:51:58|model.model] Starting timer.
Obtaining model...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[2024-05-03@18:52:09|model.model] Time elapsed: 11s 17ms
Token count in input: 1
Generating...
[2024-05-03@18:52:09|model.generate] Starting timer.
[2024-05-03@18:52:13|model.generate] Time elapsed: 4s 10ms
 -*- coding: utf-

Freeing model...
--------------------------------------------------

=== Loading 2B (Salesforce/codegen-2B-mono) ===
[2024-05-03@18:52:13|model.device] Starting timer.
Configuring torch device...
Using device: cuda:0 aka cuda:0
[2024-05-03@18:52:13|model.device] Time elapsed: ~0s
[2024-05-03@18:52:13|model.tokenizer] Starting timer.
[2024-05-03@18:52:13|model.tokenizer] Time elapsed: 162ms
[2024-05-03@18:52:13|model.model] Starting timer.
Obtaining model...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[2024-05-03@18:52:45|model.model] Time elapsed: 31s 604ms
Token count in input: 1
Generating...
[2024-05-03@18:52:45|model.generate] Starting timer.
[2024-05-03@18:52:45|model.generate] Time elapsed: 569ms
 -*- coding: utf-

Freeing model...
--------------------------------------------------

=== Loading 6B (Salesforce/codegen-6B-mono) ===
[2024-05-03@18:52:46|model.device] Starting timer.
Configuring torch device...
Using device: cuda:0 aka cuda:0
[2024-05-03@18:52:46|model.device] Time elapsed: ~0s
[2024-05-03@18:52:46|model.tokenizer] Starting timer.
[2024-05-03@18:52:46|model.tokenizer] Time elapsed: 167ms
[2024-05-03@18:52:46|model.model] Starting timer.
Obtaining model...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[2024-05-03@18:54:01|model.model] Time elapsed: 1min 14s
Token count in input: 1
Generating...
[2024-05-03@18:54:01|model.generate] Starting timer.
[2024-05-03@18:54:22|model.generate] Time elapsed: 20s 932ms
!/usr/bin/env python


Freeing model...
--------------------------------------------------

=== Loading 16B (Salesforce/codegen-16B-mono) ===
[2024-05-03@18:54:22|model.device] Starting timer.
Configuring torch device...
Using device: cuda:0 aka cuda:0
[2024-05-03@18:54:22|model.device] Time elapsed: ~0s
[2024-05-03@18:54:22|model.tokenizer] Starting timer.
[2024-05-03@18:54:22|model.tokenizer] Time elapsed: 183ms
[2024-05-03@18:54:22|model.model] Starting timer.
Obtaining model...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[2024-05-03@18:57:09|model.model] Time elapsed: 2min 46s
Token count in input: 1
Generating...
[2024-05-03@18:57:09|model.generate] Starting timer.
[2024-05-03@18:57:50|model.generate] Time elapsed: 41s 52ms
 -*- coding: utf-

Freeing model...
--------------------------------------------------

