### **Llama 3.2 1B + Behavioral Cloning**

Replicated by [Shashank Dubey](https://github.com/catchshashank) to the [stopping-agents](https://github.com/catchshashank/optimal-stopping) repository.

1. **Import Dependencies**

In [None]:
import datasets
import huggingface_hub # needed for Llama models
import math
import numpy as np
import pandas as pd
import torch
import transformers

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from tqdm.auto import tqdm

2. **Set parameters**

In [None]:
HF_TOKEN = "HF_TOKEN"

COST_PER_UNIT_TIME = 0.1
BENEFIT_PER_POSITIVE_OUTCOME = 10.0
DECISION_OPPORTUNITIES = [45, 60] # time in seconds at which the
                                  # agent can decide to quit or wait
                                  # code is tailored to just 2 right now

3. **Load Data + Define df**

In [None]:
dataset_url = "https://raw.githubusercontent.com/emaadmanzoor/stopping-agents/refs/heads/main/datasets/synthetic_sales_conversations.csv?token=GHSAT0AAAAAADBUAD4WOA6XRF2GSIX5UC4Y2EEF66Q"

diarized_conversations = pd.read_csv(dataset_url)

# Add column ("is_sale or not")
diarized_conversations["is_sale"] =\
        diarized_conversations["outcome"].apply(
            lambda x: 1 if x == "sale" else 0 if x == "no sale" else np.nan)

# Add column ("duration")
diarized_conversations["duration"] =\
    diarized_conversations.groupby("conversation_id")["end_time"].transform("max")

diarized_conversations.head()

4. **Split into train, validation, and test conversations**

In [None]:
all_conversation_ids =\
    diarized_conversations[["conversation_id", "is_sale"]].drop_duplicates()["conversation_id"].values
all_outcomes =\
    diarized_conversations[["conversation_id", "is_sale"]].drop_duplicates()["is_sale"].values

# Split dataset into train + test datasets ensuring class balance
train_conversation_ids, test_conversation_ids, train_outcomes, test_outcomes =\
    train_test_split(all_conversation_ids, all_outcomes, test_size=0.25, random_state=42,
                     stratify=all_outcomes)

# Further split training data into train + val dataset
train_conversation_ids, val_conversation_ids, train_outcomes, val_outcomes =\
    train_test_split(train_conversation_ids, train_outcomes, test_size=0.25, random_state=42,
                     stratify=train_outcomes)

# Final diarized conversation datasets = train + val + test (25%)
diarized_conversations_train =\
    diarized_conversations[diarized_conversations["conversation_id"].isin(train_conversation_ids)]
diarized_conversations_val =\
    diarized_conversations[diarized_conversations["conversation_id"].isin(val_conversation_ids)]
diarized_conversations_test =\
    diarized_conversations[diarized_conversations["conversation_id"].isin(test_conversation_ids)]

print(len(diarized_conversations_train), "train conversations.")
print(len(diarized_conversations_val), "validation conversations.")
print(len(diarized_conversations_test), "test conversations.")