In [2]:
import torch
import wandb
from datetime import datetime
import shutil
import importlib
import os
from importlib.machinery import SourceFileLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
import random
from torch.nn.utils.rnn import pad_sequence
from torch import nn

from GPUtil import showUtilization as gpu_usage
from numba import cuda

In [3]:
from transformers import (
    MODEL_WITH_LM_HEAD_MAPPING,
    WEIGHTS_NAME,
    AdamW,
    AutoConfig,
    AutoModelWithLMHead,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
    AutoModel,
    GPT2LMHeadModel,
    AutoModelForCausalLM,
    get_linear_schedule_with_warmup,
)

In [4]:
SPECIAL_TOKENS = { 
    "<sp_1>": "<sp_1>",
    "</sp_1>": "</sp_1>",
    "<sp_2>": "<sp_2>",
    "</sp_2>": "</sp_2>",
    "<persona>": "<persona>",
    "</persona>": "</persona>",
}
config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium")
# config.n_positions = 512 
# config.n_embd = 1024
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
tokenizer.add_tokens(list(SPECIAL_TOKENS.values()), special_tokens=True)

6

In [6]:
class BaseExperiment:
    def __init__(self, 
        model=None, 
        tokenizer=None, 
        dataloader_train=None,
        dataloader_valid=None,
        dataloader_test=None,
        loss_func_class=None,
        estimate_func_class=None,
        experiment_config=None,
        optimizer_class=None,
        sheduler_class=None,
        project_name=None,
        notebook_name=None,
        name_run="",
        model_description="",
        do_unit_tests=True,
        pretrained_model_name=None
        ): 
        assert notebook_name != None, f"notebook_name should be valid filename, but get {notebook_name}"

        # datasets
        self.dataloader_train = dataloader_train
        self.dataloader_valid = dataloader_valid
        self.dataloader_test = dataloader_test
        
        # wandb
        self.notebook_name = notebook_name
        self.project_name = project_name 
        self.experiment_config = experiment_config
        self.wandb_run = None
        self.name_run = name_run
        self.model_description = model_description
        self.model_name = "pytorch_model"
        self.pure_model_name = "pytorch_model"
        self.model_artifact = None
        self.pretrained_model_name = pretrained_model_name

        self.optimizer_class = optimizer_class
        self.sheduler_class = sheduler_class
        self.loss_func_class = loss_func_class
        self.estimate_func_class = estimate_func_class

        self.model = model
        self.tokenizer = tokenizer
        self.optimizer = None
        self.sheduler = None
        self.loss_func = None
        self.estimate_func = None
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # self.device = torch.device('cpu')
        print(f"Using device {self.device}")

        # prepare for experiment
        self.setup()
        if do_unit_tests:
            self.unit_tests()

    def setup(self):
        self.model.to(self.device)
        # self.model.transformer.resize_token_embeddings(len(self.tokenizer))

        self.optimizer = self.optimizer_class(self.model.parameters(), **self.experiment_config['optimizer'])
        
        if self.sheduler_class != None:
            self.sheduler = self.sheduler_class(self.optimizer, **self.experiment_config['sheduler'])

        self.loss_func = self.loss_func_class()
        self.estimate_func = self.estimate_func_class()

        # setup wandb
        # save model structure and weights to wandb
        self.model_artifact = wandb.Artifact(
            self.name_run, type="model",
            description=self.model_description,
            metadata=self.experiment_config)


    def get_date(self):
        now = datetime.now()
        date_time = now.strftime("%m_%d_%Y__%H:%M:%S")
        return date_time

    def unit_tests(self):
        # test training
        X, y = next(iter(self.dataloader_train))
        X, y = X.to(self.device), y.to(self.device)

        loss = self.model(X, labels=X).loss
        # pred = pred[..., :-1, :].contiguous().view(-1, pred.size(-1))
        # y = y[..., 1:].contiguous().view(-1)

        # loss = self.loss_func(pred, y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # test valid
        X, y = next(iter(self.dataloader_valid))
        X, y = X.to(self.device), y.to(self.device)
        test_loss = self.model(X, labels=X).loss
        # pred = pred[..., :-1, :].contiguous().view(-1, pred.size(-1))
        # y = y[..., 1:].contiguous().view(-1)
        # test_loss = self.estimate_func(pred, y).item()
        # correct = (pred.argmax(1) == y).type(torch.float).sum().item()

        # initial validation
        self.model.eval()
        test_loss, correct = 0, 0
        num_batches = len(self.dataloader_valid)
        size = len(self.dataloader_valid.dataset)

        with torch.no_grad():
            for X, y in self.dataloader_valid:
                X, y = X.to(self.device), y.to(self.device)
                valid_loss = self.model(X, labels=X).loss
                # pred = pred[..., :-1, :].contiguous().view(-1, pred.size(-1))
                # y = y[..., 1:].contiguous().view(-1)
                test_loss += valid_loss
                break
                # correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        test_loss /= num_batches
        # correct /= size
        # print("Initial val = ", test_loss)

        print("tests ok")


    def train(self):
        # https://colab.research.google.com/github/wandb/examples/blob/master/colabs/wandb-artifacts/Pipeline_Versioning_with_W%26B_Artifacts.ipynb#scrollTo=qrAWbBV1rd4I
        # если попытаться создать переменную чтобы не городить тут код возникает ошибка с wandb!
        with wandb.init(project=self.project_name, entity="dimweb",
                        settings=wandb.Settings(
                            start_method="thread", 
                            # symlink=False
                            ),
                        reinit=True,
                        name=self.name_run,
                        config=self.experiment_config,
                        # sync_tensorboard=True
                        ) as run:

            self.run = run
            
            # save model class
            # self.save_model_class()

            # start train
            epochs = self.experiment_config['epochs']
            for i in range(epochs):
                print(f"Epoch: {i}")
                self.train_steps()
                self.valid_steps()
            
            # sync model
            self.wandb_save_model()
            
            print(f"train end")
    
    def save_model_class(self):
        # save class
        model_class_name = self.experiment_config['model_class_name']
        class_script_path_dest = f"{os.path.join(wandb.run.dir, model_class_name)}.py"
        class_script_path_src = f"./models/{model_class_name}.py"
        shutil.copy2(class_script_path_src, class_script_path_dest)
        self.model_artifact.add_file(class_script_path_dest)
        wandb.save(class_script_path_dest)

    def wandb_save_model(self):
        # wandb использует symlinks для того чтобы сохранять файлы
        # но из-за проблем с правами доступа возникает ошибка и модель нельзя сохранить
        # поэтому пришлось сохранять модель в дирректорию с самим запуском
        # https://docs.wandb.ai/guides/track/advanced/save-restore#example-of-saving-a-file-to-the-wandb-run-directory
        # model_save_path = os.path.join(wandb.run.dir, self.model_name)
        # optimizer_save_path = os.path.join(wandb.run.dir, f"optimizer.pt")
        # sheduler_save_path = os.path.join(wandb.run.dir, f"scheduler.pt")

        saved_path = str(wandb.run.dir).replace("/files", "_local")
        self.model.save_pretrained(saved_path)
        self.tokenizer.save_pretrained(saved_path)
        
        # torch.save(self.model.state_dict(), model_save_path)
        # torch.save(self.optimizer.state_dict(), optimizer_save_path)
        # torch.save(self.sheduler.state_dict(), sheduler_save_path)

        # self.model_artifact.add_file(model_save_path)
        # self.model_artifact.add_file(optimizer_save_path)
        # self.model_artifact.add_file(sheduler_save_path)

        # wandb.save(model_save_path)
        # wandb.save(optimizer_save_path)
        # wandb.save(sheduler_save_path)

        self.experiment_config['saved_path'] = saved_path 

        # save notebook
        notebook_path = os.path.join(wandb.run.dir, self.notebook_name)
        shutil.copy2(self.notebook_name, notebook_path)
        self.model_artifact.add_file(notebook_path)
        wandb.save(notebook_path)

        wandb.log_artifact(self.model_artifact)
    
    def train_steps(self):
        raise NotImplementedError("You need specify training steps")

    def valid_steps(self):
        raise NotImplementedError("You need specify valid steps")
    
    def load_model(self, artifact_name=""):
        with wandb.init(project="gpt_persona_bot", job_type="inference"):
            model_artifact = wandb.use_artifact(artifact_name)
            model_config = model_artifact.metadata
            model_folder = model_config['saved_path'] 
            self.model = AutoModelForCausalLM.from_pretrained(model_folder)
            self.tokenizer = AutoTokenizer.from_pretrained(model_folder)
            self.model.to(self.device)
            self.free_gpu_cache()
    
    def free_gpu_cache(self):
        print("Initial GPU Usage")
        gpu_usage()                             

        torch.cuda.empty_cache()

        cuda.select_device(0)
        cuda.close()
        cuda.select_device(0)

        print("GPU Usage after emptying the cache")
        gpu_usage()

    def test(self, artifact_name=""):
        raise NotImplementedError("You need specify test steps")


class Experiment(BaseExperiment):
    def __init__(self, **kwargs): 
        super(Experiment, self).__init__(**kwargs)
    
    def train_steps(self):
        self.model.train()
        interval = self.experiment_config['check_interval']
        
        for batch, (X, y) in enumerate(self.dataloader_train):
            # Send data to training device
            X, y = X.to(self.device), y.to(self.device)
            
            # Compute prediction error
            loss = self.model(X, labels=X).loss
            perplexity = torch.exp(torch.tensor(loss.item()))
            # Backpropagation
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            if self.sheduler != None:
                self.sheduler.step()
            
            # Progress output
            if batch % interval == 0:
                wandb.log({"train_loss": loss.item()})
                wandb.log({"train_perplexity": perplexity})

    def valid_steps(self):
        self.model.eval()
        test_loss, correct = 0, 0
        num_batches = len(self.dataloader_valid)
        size = len(self.dataloader_valid.dataset)

        with torch.no_grad():
            for X, y in self.dataloader_valid:
                X, y = X.to(self.device), y.to(self.device)
                loss = self.model(X, labels=y).loss
                test_loss += loss.item()

        test_loss /= num_batches
        # correct /= size
        perplexity = torch.exp(torch.tensor(test_loss))
        
        wandb.log({"val_loss": test_loss})
        wandb.log({"valid_perplexity": perplexity})
    
    @staticmethod
    def test(artifact_name="", persona="", user_inputs=None, interact=False, cuda=False):
        with wandb.init(project="gpt_persona_bot", job_type="inference"):
            model_artifact = wandb.use_artifact(artifact_name)
            
            model_dir = model_artifact.download()
            model_config = model_artifact.metadata
            model_folder = model_config['saved_path'] 
            print(model_folder)
            model = AutoModelForCausalLM.from_pretrained(model_folder)
            model.eval()
            tokenizer = AutoTokenizer.from_pretrained(model_folder)
            device = torch.device("cuda") if cuda else torch.device("cpu")
            model = model.to(device)
            print("Start conversation")
            print(persona)
            persona = f"{SPECIAL_TOKENS['<persona>']}{persona}{SPECIAL_TOKENS['</persona>']}"
            # persona = f"{SPECIAL_TOKENS['<persona>']}{persona}"
            persona_ids = tokenizer.encode(persona, return_tensors='pt')
            persona_ids = persona_ids.to(device)
            VOCAB_TOKENS = tokenizer.get_added_vocab()

            last_index = 0
            steps = len(user_inputs)
            history = []
            if interact:
                steps = 15
            # global_step
            for step in range(steps):
                user_input = user_inputs[step]
                if interact:
                    user_input = input()
                print(f"User: {user_input}")

                # user_input = f"{SPECIAL_TOKENS['<sp_1>']} {user_inputs[step]} {SPECIAL_TOKENS['</sp_1>']} {SPECIAL_TOKENS['<sp_2>']}"
                user_input = f"{SPECIAL_TOKENS['<sp_1>']}{user_input}{SPECIAL_TOKENS['</sp_1>']}{SPECIAL_TOKENS['<sp_2>']}"
                history.append(user_input)
                # user_input = f"{SPECIAL_TOKENS['<sp_1>']}{user_inputs[step]}{SPECIAL_TOKENS['<sp_2>']}"
                new_user_input_ids = tokenizer.encode(user_input, return_tensors='pt')
                new_user_input_ids = new_user_input_ids.to(device)
                # chat_history_ids = chat_history_ids.to(device) 
                history_chat = "".join(history[step-2:])
                print("-"*100)
                print(history_chat)
                print("-"*100)
                history_ids = tokenizer.encode(history_chat, return_tensors='pt')
                history_ids = history_ids.to(device)
                # chat_history_ids = 
                bot_input_ids = torch.cat([persona_ids, new_user_input_ids], dim=-1)
                
                # print("-"*100)
                # print(tokenizer.decode(bot_input_ids[0]))
                # print("-"*100)

                # generated a response while limiting the total chat history to 1000 tokens, 
                model_response = model.generate(
                    bot_input_ids, 
                    max_length=250,
                    pad_token_id=tokenizer.eos_token_id,  
                    no_repeat_ngram_size=3,       
                    do_sample=True, 
                    top_k=100,
                    top_p=0.7,
                    temperature = 0.8,
                )

                # print("-"*100)
                # print(tokenizer.decode(model_response[0]))
                # print("-"*100)
                model_response = model_response.to(device)
                model_response_list = list(model_response[0])
                end_speaker_index = last_index+model_response_list[last_index+1:].index(VOCAB_TOKENS['</sp_2>']) +1
                last_index = end_speaker_index+1
                model_response = model_response[:, :end_speaker_index+1]

                chat_history_ids = model_response
                bot_response_decode = tokenizer.decode(chat_history_ids[0][len(bot_input_ids[0])-1:], skip_special_tokens=True) 
                last_history = history[-1]
                last_history = f"{last_history}{bot_response_decode}{SPECIAL_TOKENS['</sp_2>']}"
                history[-1] = last_history
                print(f"Bot: {bot_response_decode}")
                print("-"*100)
                print(history)
                print("-"*100)


## Создаем датасет

In [7]:
persona_chat_original = pd.read_csv("./persona_chat.csv")
# persona_chat_original = persona_chat_original
# persona_chat_original = persona_chat_original[:3000]
# persona_chat_original.head(3)

In [12]:
class PersonaChatGenerator:
	def __init__(self, 
		initial_dataset=None,
		tokenizer=None
	):
		self.initial_dataset = initial_dataset
		self.processed_dataset = []
		self.tokenizer = tokenizer
		self.process_dataset()

	def process_dataset(self):
		processed_dataset = {
			"persona": [],
			"history": [],
			# "target": []
		}

		sp_1_start = SPECIAL_TOKENS['<sp_1>']
		sp_1_end = SPECIAL_TOKENS['</sp_1>']
		sp_2_start = SPECIAL_TOKENS['<sp_2>']
		sp_2_end = SPECIAL_TOKENS['</sp_2>']
		persona_start = SPECIAL_TOKENS['<persona>']
		persona_end = SPECIAL_TOKENS['</persona>']
		relu = lambda x: x if x > 0 else 0 
		for i in range(len(self.initial_dataset)):
			persona = self.initial_dataset['Persona'].iloc[i]
			# persona = f"{persona_start} {persona} {persona_end}"
			chat = self.initial_dataset['chat'].iloc[i].split("\n")
			chat = chat[:-1]
			history = []
			for j in range(len(chat)):
				reply = chat[j]
				if (j+1) % 2 == 0:
					# reply = f"{sp_2_start} {reply} {sp_2_end}"
					reply = f"{sp_2_start}{reply}{sp_2_end}"
					history.append(reply)
					# temp_his = f"{history}{tokenizer.eos_token}"
					temp_history = history[relu(j-4):j+1]
					temp_history = "".join(temp_history)
					# temp_history = "".join(history)
					processed_dataset['persona'].append(persona)
					processed_dataset['history'].append(temp_history)
					# processed_dataset['target'].append(reply)

				else:
					# reply = f"{sp_1_start} {reply} {sp_1_end}"
					reply = f"{sp_1_start}{reply}{sp_1_end}"
					# history += reply 
					history.append(reply)

		dataset = pd.DataFrame(data=processed_dataset)
		return dataset

train_dataset_csv, valid_dataset_csv = train_test_split(persona_chat_original, test_size=0.01)
train_dataset_csv, valid_dataset_csv = train_dataset_csv.reset_index(), valid_dataset_csv.reset_index()
train_dataset_generator = PersonaChatGenerator(
	initial_dataset=train_dataset_csv,
	tokenizer=tokenizer
)

valid_dataset_generator = PersonaChatGenerator(
	initial_dataset=valid_dataset_csv,
	tokenizer=tokenizer
)

In [13]:
class PersonaChatDataset(Dataset):
	def __init__(self, 
		initial_dataset=None,
		tokenizer=None,
		is_validation=False
	):
		self.initial_dataset = initial_dataset
		self.tokenizer = tokenizer
		self.is_validation = is_validation
	
	def __len__(self):
		return len(self.initial_dataset)
	
	def __getitem__(self, idx):
		row = self.initial_dataset.iloc[idx]
		persona = [item.strip() for item in row['persona'].split(".") if len(item) > 0 ]
		if not self.is_validation:
			random.shuffle(persona)
		persona = [item+". " for item in persona]
		persona[-1] = persona[-1][:-1]
		persona = [SPECIAL_TOKENS['<persona>']] + persona + [SPECIAL_TOKENS['</persona>']]
		# persona = [SPECIAL_TOKENS['<persona>']] + persona
		# print(persona)
		persona = [torch.tensor(self.tokenizer.encode(item)).flatten() for item in persona]
		persona = torch.cat([*persona])

		history = row['history']
		# print(history)
		history = self.tokenizer.encode(history)
		history = torch.tensor(history).flatten()
	
		feature = torch.cat([persona, history])

		return {
			"feature": feature,
			"target": feature 
		}

train_dataset = PersonaChatDataset(
	initial_dataset=train_dataset_generator.process_dataset(),
	tokenizer=tokenizer
)

valid_dataset = PersonaChatDataset(
	initial_dataset=valid_dataset_generator.process_dataset(),
	tokenizer=tokenizer,
	is_validation=True
)

def collate(examples):
	features = [item['feature'] for item in examples]
	features = pad_sequence(features, batch_first=True)
	
	target = [item['target'] for item in examples]
	target = pad_sequence(features, batch_first=True)
	return features.to(torch.long), target.to(torch.long) 

train_dataloader = DataLoader(
    train_dataset, 
	batch_size=4, 
	collate_fn=collate, 
	drop_last = True,
	shuffle=True
)

valid_dataloader = DataLoader(
    valid_dataset, 
	batch_size=8, 
	collate_fn=collate, 
	drop_last=False,
	shuffle=False
)

In [14]:
tokenizer.decode(train_dataset[4]['feature'])

'<persona>i am the youngest sibling of four. chocolate is my favorite food. i geocache in my spare time. i am learning how to play the piano.</persona><sp_2>i love chocolate, my sisters hate it.</sp_2><sp_1>i stay away from the sun also. as a fair skinned caucasian i burn.</sp_1><sp_2>do you play the piano? i am learning.</sp_2><sp_1>i have tried but i cannot keep my interest in it. do you like it?</sp_1><sp_2>so far, i do. i am not a music person, but my sisters play.</sp_2>'

## Тестируем dumb модель

In [15]:
exp_config = {
    "batch_size": 4,
    "check_interval": 100,
    "epochs": 1,
    "optimizer": {
        "lr": 1e-3
    },
    "model_name": "pytorch_model",
    "saved_path": ""
}

exp_config["sheduler"] = {
    "max_lr": 0.01, 
    "steps_per_epoch": len(train_dataloader), 
    "epochs": exp_config["epochs"]
}
model =  AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium", config=config, ignore_mismatched_sizes=True)
# model =  AutoModelForCausalLM.from_pretrained("/home/dimweb/sandbox/persona_bot/gpt_persona_v1/wandb/run-20220708_230130-aurwatvq_local")
model.resize_token_embeddings(len(tokenizer))
# model = GPT2LMModel(**exp_config['model_args'])

# не хочу создавать глобальные переменные 
exp_params = {
    "model": model, 
    "tokenizer": tokenizer,
    "dataloader_train": train_dataloader,
    "dataloader_valid": valid_dataloader,
    "dataloader_test": valid_dataloader,
    "loss_func_class": nn.CrossEntropyLoss,
    "estimate_func_class": nn.CrossEntropyLoss,
    "experiment_config": exp_config,
    "optimizer_class": torch.optim.Adam,
    "sheduler_class": None,
    "notebook_name": "gpt_persona_v1.ipynb",
    "project_name": "gpt_persona_bot",
    "name_run": "persona_gpt",
    "model_description": "тренирую на полном датасете",
    "do_unit_tests": True,
}

experiment_test = Experiment(**exp_params)

Using device cuda
tests ok


In [16]:
experiment_test.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdimweb[0m. Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 0


In [25]:
# persona = train_dataset_csv['Persona'][1]
persona = valid_dataset_csv['Persona'][18]
# persona = "i like catsi like to travelfavorite color is greeni got a new jobi like cars"
user_inputs = [
    "Hi. What is your name?",
    "What do you like?",
    "What is your job?",
	"Where is your mom?",
	"Fuck you leather man!"
]

Experiment.test(
	artifact_name="persona_gpt:v9",
	persona=persona,
	user_inputs=user_inputs,
	interact=False,
	cuda=True
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


/home/dimweb/sandbox/persona_bot/gpt_persona_v1/wandb/run-20220709_022541-cnmyn0dz_local
Start conversation
 i have six siblings. the future scares me. i was adopted when i was a baby. my adopted dad works at hp.
User: Hi. What is your name?
----------------------------------------------------------------------------------------------------
<sp_1>Hi. What is your name?</sp_1><sp_2>
----------------------------------------------------------------------------------------------------
Bot: i am not sure if i have a good name, what is it?
----------------------------------------------------------------------------------------------------
['<sp_1>Hi. What is your name?</sp_1><sp_2>i am not sure if i have a good name, what is it?</sp_2>']
----------------------------------------------------------------------------------------------------
User: What do you like?
----------------------------------------------------------------------------------------------------
<sp_1>What do you like?</sp_1><s

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [17]:
import torch
from GPUtil import showUtilization as gpu_usage
from numba import cuda

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()                             

    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

free_gpu_cache()

Initial GPU Usage
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
| ID | GPU | MEM |
------------------
|  0 |  0% | 99% |
GPU Usage after emptying the cache
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
| ID | GPU | MEM |
------------------
|  0 |  1% |  1% |
