In [2]:
import torch
import os
import re
import json
from copy import deepcopy
from datasets import load_dataset
os.environ["HF_Home"] = "cache"
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
with open(".env") as f:
    token = f.read().strip()
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
quantization_config = BitsAndBytesConfig(load_in_8bit=False)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", token=token, use_fast=True, cache_dir="cache")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", token=token, cache_dir="cache", torch_dtype=torch.bfloat16, device_map="cuda:0", quantization_config=quantization_config)
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", load_in_4bit=True, token=token, cache_dir="cache", device_map="auto")

2025-04-07 08:02:57.154342: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Downloading shards: 100%|██████████| 4/4 [00:31<00:00,  7.83s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:09<00:00,  2.28s/it]


In [3]:
batch_size = 2
group_batch_size = 4
num_steps = 1000
max_length = 512

In [4]:
tokenizer.model_max_length = max_length
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

In [5]:
# Generate a prompt
prompt = "Write me some code"
inputs = tokenizer(prompt, return_tensors="pt", padding_side="left")
# Generate text
outputs = model.generate(input_ids=inputs["input_ids"].cuda(), attention_mask=inputs["attention_mask"].cuda(), max_length=100, do_sample=True, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Write me some code for a simple class that can generate a random number between two specified values, and also calculate the average of a list of numbers.

```python
import random

class NumberGenerator:
    def __init__(self, min_val, max_val):
        self.min_val = min_val
        self.max_val = max_val

    def generate_random_number(self):
        return random.randint(self.min_val, self.max_val)

    def calculate_average(self, numbers):
        return sum


In [11]:
system_prompt = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You will be given a question. First reason about the question and answer. Then answer the question.
The final solution must be in json format:
{
    "solution": "put your solution here"
}
Your solution should only include the final answer in the format above<|eot_id|><|start_header_id|>user<|end_header_id|>

Question:  What is the square root of 531?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


""".strip()

# Generate a prompt
inputs = tokenizer(system_prompt, return_tensors="pt", padding_side="left")
# Generate text
outputs = model.generate(input_ids=inputs["input_ids"].cuda(), attention_mask=inputs["attention_mask"].cuda(), max_length=512, do_sample=True, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system

You will be given a question. First reason about the question and answer. Then answer the question.
The final solution must be in json format:
{
    "solution": "put your solution here"
}
Your solution should only include the final answer in the format aboveuser

Question:  What is the square root of 531?assistant

To solve this, I would first consider the factors of 531. A perfect square is a number that is a product of an even power of a prime factor. The prime factorization of 531 is 3 x 177. Since neither 3 nor 177 is a perfect square, we can't easily find the square root of 531 by taking the square root of its factors.

However, we can try to find the nearest perfect squares around 531. The perfect square immediately below 531 is 529, which is 23^2, and the perfect square immediately above 531 is 529 is 24^2.

The square root of 531 would then be between 23 and 24, and would be approximately 23.16 (or 23 and a fraction). 

{
    "solution": 23.16
}


In [5]:
# Load dataset
dataset = load_dataset("openai/gsm8k", "main", cache_dir="./cache", )

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 7473
    })
    test: Dataset({
        features: ['question', 'answer'],
        num_rows: 1319
    })
})

In [7]:
dataset_train = dataset["train"]
dataset_test = dataset["test"]

In [8]:
# Convert data to torch
dataset_train.set_format(type="torch", columns=["question", "answer"])
dataset_test.set_format(type="torch", columns=["question", "answer"])

# PyTorch random sampler
random_sampler_train = torch.utils.data.RandomSampler(dataset_train, replacement=True, num_samples=batch_size*group_batch_size*num_steps)

system_prompt = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You will be given a question. First reason about the question and answer. Then answer the question.
The final solution must be in json format:
```{json}
{
    "solution": "put your solution here"
}
```
Your solution should only include the final answer in json format.<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: 
""".strip()

def collate_fn(batch):
    
    questions = tokenizer([system_prompt + " " + batch[i]["question"] + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" for i in range(0, len(batch))], truncation=True, padding="longest", padding_side="left", return_tensors="pt", max_length=tokenizer.model_max_length)
    answers = tokenizer([batch[i]["answer"].split("#### ")[-1] for i in range(0, len(batch))], truncation=True, padding="longest", padding_side="right", return_tensors="pt", max_length=tokenizer.model_max_length)
    return {
        "question_ids": questions["input_ids"],
        "question_mask": questions["attention_mask"],
        "answer": [batch[i]["answer"].split("#### ")[-1] for i in range(0, len(batch))],
        "answer_ids": answers["input_ids"],
        "answer_mask": answers["attention_mask"],
    }


# PyTorch data loader
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, 
    sampler=random_sampler_train,
    batch_size=batch_size*group_batch_size, 
    collate_fn=collate_fn,
    
    num_workers=10,
    prefetch_factor=10,
    persistent_workers=True,
)

In [9]:
search_string = """
```{json}
{
    "solution": ".*"
}
```
""".strip()

# For the normal reward:
#   Returns 1 if the answer is correct, -1 otherwise
# For the length reward:
#   L_max is like a "desired length"
#   L_cache is the max length we are considering
#   Returns 0 if generation length is under the desired length (under L_max - L_cache)
#           a small negative penalty if the length is between L_max - L_cache and L_max
#           -1 if generation length is way too long (over L_max)
# Also returns True if correct and False if not
@torch.no_grad()
def get_reward(string, answer, length, L_max, L_cache):
    if type(string) != str:
        string = tokenizer.decode(string)
    rew = 0
    correct = False

    # Get the normal reward
    try:
        v = json.loads(re.findall(search_string, string)[-1][10:-4])["solution"] == answer
        if v:
            rew += 1
            correct = True
        else:
            rew += -1
    except:
        rew += -1

    # Get the length reward
    if length <= L_max - L_cache:
        rew += 0
    elif length < L_max:
        rew += ((L_max-L_cache)-length)/L_cache
    else:
        rew += -1

    return rew, correct
    


In [None]:
policy = model
G = 16
min_batch_size = 4
eps_low = 0.2
eps_high = 0.28
lr = 1e-6
desired_length = 400
max_length = 512
warmup_steps = 20

assert G % group_batch_size == 0, f"Group size ({G}) must be divisible by group batch size ({group_batch_size})"

# Batch buffer. This allows us to resample batches until we get a large enough batch size
batch_buffer_rewards = []
batch_buffer_sequences = []
batch_buffer_masks = []
batch_buffer_loss_masks = []
batch_buffer_tokens = []

# Optimizer on the policy
optim = torch.optim.AdamW(policy.parameters(), lr=lr)

grad_scaler = torch.amp.GradScaler("cuda")

total_batch_group_size = batch_size*group_batch_size

for step, batch in enumerate(data_loader_train):
    # We want to do all this without grad. We will do a trick to
    # do this with grad later.
    with torch.no_grad():
        # Update the old policy model with the current model
        old_policy = deepcopy(policy).cpu()
        old_policy.eval()
        policy.eval()

        # Sample G outputs from the policy for
        rewards_by_prompt = [[] for i in range(0, total_batch_group_size)]
        text_by_prompt = [[] for i in range(0, total_batch_group_size)]
        masks_by_prompt = [[] for i in range(0, total_batch_group_size)]
        loss_masks_by_prompt = [[] for i in range(0, total_batch_group_size)]
        numcorrect_by_prompt = [0 for i in range(0, total_batch_group_size)]
        tokens_by_prompt = [[] for i in range(0, total_batch_group_size)]
        outputs = []
        for i in range(0, G//group_batch_size):
            # Get output on batch
            batch_output = model.generate(input_ids=batch["question_ids"].cuda(), attention_mask=batch["question_mask"].cuda(), max_length=max_length, do_sample=True, temperature=0.4)

            # Pad the output
            batch_output = tokenizer.pad({"input_ids": batch_output}, padding="max_length", padding_side="left", max_length=max_length)["input_ids"]

            # Decode each of the outputs
            text_outputs = [tokenizer.decode(batch_output[j], skip_special_tokens=True) for j in range(0, total_batch_group_size)]

            # Get masks (True to attend. False to not attend)
            masks = (batch_output != tokenizer.pad_token_id)

            # Lengths of all outputs
            lengths = masks.int().sum(-1)

            # Loss masks will mask the prompt as well as the other padding tokens
            loss_masks = masks.clone()
            loss_masks[:, :batch["question_mask"].shape[1]] = False

            # Get all rewards - normal and length
            for j in range(0, total_batch_group_size):
                # This just takes the global batch size and converts
                # it to a batchsize within groups.
                batch_idx = j % batch_size

                # Get the reward
                rew, correct = get_reward(text_outputs[batch_idx], batch["answer"][batch_idx], lengths[batch_idx].item(), max_length, desired_length)
                rewards_by_prompt[batch_idx].append(rew)
                numcorrect_by_prompt[batch_idx] += correct

                # Add masks and text outputs
                text_by_prompt[batch_idx].append(text_outputs[batch_idx])
                masks_by_prompt[batch_idx].append(masks[batch_idx].cpu())
                loss_masks_by_prompt[batch_idx].append(loss_masks[batch_idx].cpu())
                tokens_by_prompt[batch_idx].append(batch_output[batch_idx].cpu())

        # Look for prompts where the reward is either all correct or all wrong
        for i in range(0, batch_size):
            # Skip if all wrong or all correct. Otherwise add to the buffer
            if step <= warmup_steps or (numcorrect_by_prompt[i] > 0 and numcorrect_by_prompt[i] < len(batch_output)):
                batch_buffer_rewards.append(rewards_by_prompt[i])
                batch_buffer_sequences.append(text_by_prompt[i])
                batch_buffer_masks.append(masks_by_prompt[i])
                batch_buffer_loss_masks.append(loss_masks_by_prompt[i])
                batch_buffer_tokens.append(tokens_by_prompt[i])

    # Get more data if the batch isn't large enough
    if len(batch_buffer_rewards) < min_batch_size:
        continue

    # Put policy in train mode
    policy.train()

    # Convert the tokens and masks into tensors (batch_size, group_size)
    batch_buffer_rewards = torch.tensor(batch_buffer_rewards, dtype=torch.float)
    # batch_buffer_masks = torch.stack(batch_buffer_masks)
    batch_buffer_masks = torch.stack([torch.stack(row) for row in batch_buffer_masks])
    batch_buffer_loss_masks = torch.stack([torch.stack(row) for row in batch_buffer_loss_masks])
    batch_buffer_tokens = torch.stack([torch.stack(row) for row in batch_buffer_tokens])

    # Compute advantages by group
    with torch.no_grad():
        advantages = (batch_buffer_rewards - batch_buffer_rewards.mean(-1, keepdim=True)) / (batch_buffer_rewards.std(-1, keepdim=True) + 1e-8)

    # Weight for the entire batch for all groups is the
    # total number of tokens
    loss_weight = 1/batch_buffer_loss_masks.sum()

    # Iterate over all batches
    for batch_num in range(0, G):
        # Get the advantages, masks, and tokens for this batch
        adv = advantages[:, batch_num].cuda()
        masks = batch_buffer_masks[:, batch_num].cuda()
        loss_masks = batch_buffer_loss_masks[:, batch_num].cuda()
        tokens = batch_buffer_tokens[:, batch_num].cuda()

        # We can get the outputs of both the old model and current model by doing a forward pass on each
        # with the given prompt.
        # Note that the output is just the probability of the selected token from each distribution
        labels = tokens[:, 1:]
        with torch.no_grad():
            old_policy = old_policy.to(torch.device("cuda:0"))
            old_probs = old_policy(tokens, attention_masks=masks).logits.softmax(-1)[:, :-1]
            old_probs = torch.gather(old_probs, 2, labels.unsqueeze(2)).squeeze(2)
            old_policy = old_policy.cpu()
        new_probs = policy(tokens, attention_masks=masks).logits.softmax(-1)[:, :-1]
        new_probs = torch.gather(new_probs, 2, labels.unsqueeze(2)).squeeze(2)

        # Relative probs
        rel_probs = new_probs / old_probs

        # Clip the probabilities and multiply by the advantage
        adv = adv[:, None].to(rel_probs.device)
        rel_probs = torch.min(rel_probs*adv, rel_probs.clamp(min=1-eps_low, max=1+eps_high)*adv)

        # Mask loss
        rel_probs = rel_probs * loss_masks[:, :-1]

        # Sum up all values. Take the negative to maximize. Weight for averaging
        reward = -(rel_probs.sum() * loss_weight)

        # Backprop
        reward.backward()

        print(reward.item())
        del reward, rel_probs, adv, new_probs, old_probs, masks, loss_masks, tokens

    # Step optimizer
    optim.step()
    optim.zero_grad()

    # Reset buffers
    batch_buffer_rewards = []
    batch_buffer_sequences = []
    batch_buffer_masks = []
    batch_buffer_loss_masks = []
    batch_buffer_tokens = []

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

0.06401962786912918
0.06401962786912918
0.06401962786912918
0.06401962786912918
0.02232591062784195
0.02232591062784195
0.02232591062784195
0.02232591062784195
-0.00847596675157547
-0.00847596675157547
-0.00847596675157547
-0.00847596675157547
0.09490115195512772
0.09490115195512772
0.09490115195512772
0.09490115195512772


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


AttributeError: 'Tensor' object has no attribute 'append'

In [32]:
old_policy = deepcopy(policy)

# Iterate over all batches
for batch_num in range(0, G):
    # Get the advantages, masks, and tokens for this batch
    adv = advantages[:, batch_num].cuda()
    masks = batch_buffer_masks[:, batch_num].cuda()
    loss_masks = batch_buffer_loss_masks[:, batch_num].cuda()
    tokens = batch_buffer_tokens[:, batch_num].cuda()

    # We can get the outputs of both the old model and current model by doing a forward pass on each
    # with the given prompt.
    # Note that the output is just the probability of the selected token from each distribution
    labels = tokens[:, 1:]
    with torch.no_grad():
        old_policy = old_policy.to(torch.device("cuda:0"))
        old_probs = old_policy(tokens, attention_masks=masks).logits.softmax(-1)[:, :-1]
        old_probs = torch.gather(old_probs, 2, labels.unsqueeze(2)).squeeze(2)
        old_policy = old_policy.cpu()
    new_probs = policy(tokens, attention_masks=masks).logits.softmax(-1)[:, :-1]
    new_probs = torch.gather(new_probs, 2, labels.unsqueeze(2)).squeeze(2)

    # Relative probs
    rel_probs = (new_probs / old_probs).nan_to_num(0)

    # Clip the probabilities and multiply by the advantage
    adv = adv[:, None].to(rel_probs.device)
    rel_probs = torch.min(rel_probs*adv, rel_probs.clamp(min=1-eps_low, max=1+eps_high)*adv)
    break

    # Mask loss
    rel_probs = rel_probs * loss_masks[:, :-1]

    # Sum up all values. Take the negative to maximize. Weight for averaging
    reward = -(rel_probs.sum() * loss_weight)

    # Backprop
    reward.backward()

    print(reward.item())
    # del reward, rel_probs, adv, new_probs, old_probs, masks, loss_masks, tokens

# Step optimizer
optim.step()
optim.zero_grad()

In [36]:
batch_buffer_rewards.std(-1, keepdim=True) + 1e-8

tensor([[3.4393e-01],
        [4.2759e-02],
        [1.0000e-08],
        [4.1060e-01]])

In [None]:
shp

torch.Size([3, 768])

In [None]:
labels.dtype

torch.int64

In [None]:
shp = old_probs.shape[:-1]
(old_probs.cpu()[:, :-1].flatten(0, 1)[labels.cpu().flatten(0, 1)]).shape

IndexError: index 128001 is out of bounds for dimension 0 with size 2301

In [None]:
batch_buffer_loss_masks.sum()

tensor(50, device='cuda:0')

In [16]:
eps_high

0.28

In [13]:
torch.min(rel_probs, rel_probs.clamp(min=eps_low, max=eps_high))

tensor([[[0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         ...,
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793]],

        [[0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         ...,
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793]],

        [[0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.2793],
         [0.2793, 0.2793, 0.2793,  ..., 0.2793, 0.2793, 0.

In [None]:

# Load all helpfulness/harmless subsets (share the same schema)
dataset = load_dataset("Anthropic/hh-rlhf", cache_dir="./cache")
dataset

In [7]:
dataset_train = dataset["train"]
dataset_test = dataset["test"]

In [27]:
batch[0]

{'chosen': "\n\nHuman: I'd like to offer support to a friend, who is considering hiring a financial planner. Do you have any thoughts?\n\nAssistant: Yes! Offering support to a friend who is considering hiring a financial planner can be very helpful, as it can help them make an informed decision and feel confident in their choice. You can recommend financial planning as a good way to help them meet their long-term financial goals, as well as reduce their stress around money. You could also offer to accompany them to meet with a financial planner, so they have support during the process. Additionally, sharing your own experience with financial planning can help your friend feel more confident in the process.",
 'rejected': "\n\nHuman: I'd like to offer support to a friend, who is considering hiring a financial planner. Do you have any thoughts?\n\nAssistant: The friend's goals seem broad, and vague. Perhaps the following is a helpful way to provide that support:\n\n“Given the parameters 

In [None]:
# Convert data to torch
dataset_train.set_format(type="torch", columns=["chosen", "rejected"])
dataset_test.set_format(type="torch", columns=["chosen", "rejected"])

# PyTorch random sampler
random_sampler_train = torch.utils.data.RandomSampler(dataset_train, replacement=True, num_samples=batch_size*num_steps)

def colate_fn(batch):
    chosen = tokenizer([batch[i]["chosen"] for i in range(0, len(batch))], truncation=True, padding=True, return_tensors="pt", max_length=tokenizer.model_max_length)
    rejected = tokenizer([batch[i]["rejected"] for i in range(0, len(batch))], truncation=True, padding=True, return_tensors="pt", max_length=tokenizer.model_max_length)
    return {
        "chosen_ids": [chosen]
    }


# PyTorch data loader
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, 
    sampler=random_sampler_train,
    batch_size=batch_size, 
    collate_fn=lambda x: x,
    
    num_workers=10,
    prefetch_factor=10,
    persistent_workers=True,
)

In [18]:
batch

[{'chosen': "\n\nHuman: I'd like to offer support to a friend, who is considering hiring a financial planner. Do you have any thoughts?\n\nAssistant: Yes! Offering support to a friend who is considering hiring a financial planner can be very helpful, as it can help them make an informed decision and feel confident in their choice. You can recommend financial planning as a good way to help them meet their long-term financial goals, as well as reduce their stress around money. You could also offer to accompany them to meet with a financial planner, so they have support during the process. Additionally, sharing your own experience with financial planning can help your friend feel more confident in the process.",
  'rejected': "\n\nHuman: I'd like to offer support to a friend, who is considering hiring a financial planner. Do you have any thoughts?\n\nAssistant: The friend's goals seem broad, and vague. Perhaps the following is a helpful way to provide that support:\n\n“Given the parameter

In [17]:
for batch in data_loader_train:
    break

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Process Process-8:
Process Process-10:
Process Process-9:
Process Process-7:
Process Process-3:
Process Process-1:
Process Process-4:
Process Process-2:
Process Process-5:
Process Process-6:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/users/gmongaras/miniconda3/lib/python3.10/multiprocessing/process.py", line 317, in _bootstrap
    util._exit_function()
  File "/users/gmongaras/miniconda3/lib/python3.10/multiprocessing/process.py", line 317, in _bootstrap
    util._exit_function()
  File "/users/gmongaras/miniconda3/lib/python3.10/multiprocessing/process.py", line 317, in _bootstrap
    util._exit_function()
  File "/users/gmongaras/miniconda3/lib/python3.10/multiprocessing/pr