
# Generative AI with Machine Learning Models for Psychological Chatbots

In [1]:
from IPython.display import clear_output
!pip install -U bitsandbytes
!pip install datasets
clear_output()

Below is a step-by-step Python implementation for fine-tuning https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B  using the Hugging Face transformers library.

1. Prepare the Dataset
The dataset includes examples of Evaluative Words, Feeling(s), and corresponding Need(s).

In [2]:
import csv

# Data to be saved in the CSV file
data = [
    ["Evaluative Word", "Feeling(s)", "Need(s)"],
    ["abandoned", "terrified, hurt, bewildered, sad, frightened, lonely", "nurturing, connection, belonging, support, caring"],
    ["abused", "angry, frustrated, frightened", "caring, nurturing, support, emotional or physical well-being, consideration, for all living things to flourish"],
    ["(not) accepted", "upset, scared, lonely", "inclusion, connection, community, belonging, contribution, peer respect"],
    ["attacked", "scared, angry", "safety"],
    ["belittled", "angry, frustrated, tense, distressed", "respect, autonomy, to be seen, acknowledgment, appreciation"],
    ["betrayed", "angry, hurt, disappointed, enraged", "trust, dependability, honesty, honor, commitment, clarity"],
    ["blamed", "angry, scared, confused, antagonistic, hostile, bewildered, hurt", "accountability, causality, fairness, justice"],
    ["bullied", "angry, scared, pressured", "autonomy, choice, safety, consideration"],
    ["caged/boxed in", "angry, thwarted, scared, anxious", "autonomy, choice, freedom"],
    ["cheated", "resentful, hurt, angry", "honesty, fairness, justice, trust, reliability"],
    ["coerced", "angry, frustrated, frightened, thwarted, scared", "choice, autonomy, freedom (to act freely, to choose freely)"],
    ["cornered", "angry, scared, anxious, thwarted", "autonomy, freedom"],
    ["criticized", "in pain, scared, anxious, frustrated, humiliated, angry, embarrassed", "understanding, acknowledgment, recognition, accountability, nonjudgmental communication"],
    ["discounted/diminished", "hurt, angry, embarrassed, frustrated", "acknowledgment, inclusion, recognition, respect, to matter"],
    ["disliked", "sad, lonely, hurt", "connection, appreciation, understanding, acknowledgment, friendship, inclusion"],
    ["distrusted", "sad, frustrated", "trust, honesty"],
    ["dumped on", "angry, overwhelmed", "respect, consideration"],
    ["harassed", "angry, frustrated, pressured, frightened", "respect, space, consideration, peace"],
    ["hassled", "irritated, distressed, angry, frustrated", "serenity, autonomy, to do things at my own pace and in my own way, calm, space"],
    ["ignored", "lonely, scared, hurt, sad, embarrassed", "connection, belonging, inclusion, community, participation"],
    ["insulted", "angry, embarrassed", "respect, consideration, acknowledgment, recognition"],
    ["interrupted", "angry, frustrated, resentful, hurt", "respect, to be heard, consideration"],
    ["intimidated", "scared, anxious", "safety, equality, empowerment"],
    ["invalidated", "angry, hurt, resentful", "appreciation, respect, acknowledgment, recognition"],
    ["invisible", "sad, angry, lonely, scared", "to be seen and heard, inclusion, belonging, community"],
    ["isolated", "lonely, afraid, scared", "community, inclusion, belonging, contribution"],
    ["left out", "sad, lonely, anxious", "inclusion, belonging, community, connection"],
    ["let down", "sad, disappointed, frightened", "consistency, trust, dependability, consistency"],
    ["manipulated", "angry, scared, powerless, thwarted, frustrated", "autonomy, empowerment, trust, equality, freedom, free choice, connection, genuineness"],
    ["mistrusted", "sad, angry", "trust"],
    ["misunderstood", "upset, angry, frustrated", "to be heard, understanding, clarity"],
    ["neglected", "lonely, scared", "connection, inclusion, participation, community, care, to matter, consideration"],
    ["overpowered", "angry, impotent, helpless, confused", "equality, justice, autonomy, freedom"],
    ["overworked", "angry, tired, frustrated", "respect, consideration, rest, caring"],
    ["patronized", "angry, frustrated, resentful", "recognition, equality, respect, mutuality"],
    ["pressured", "anxious, resentful, overwhelmed", "relaxation, clarity, space, consideration"],
    ["provoked", "angry, frustrated, hostile, antagonistic, resentful", "respect, consideration"],
    ["put down", "angry, sad, embarrassed", "respect, acknowledgment, understanding"],
    ["rejected", "hurt, scared, angry, defiant", "belonging, inclusion, closeness, to be seen, acknowledgment, connection"],
    ["ripped off/screwed", "anger, resentment, disappointed", "consideration, justice, fairness, acknowledgment, trust"],
    ["smothered/suffocated", "frustrated, fear, desperation", "space, freedom, autonomy, authenticity, self-expression"],
    ["taken for granted", "sad, angry, hurt, disappointed", "appreciation, acknowledgment, recognition, consideration"],
    ["threatened", "scared, frightened, alarmed, agitated, defiant", "safety, autonomy"],
    ["trampled", "angry, frustrated, overwhelmed", "empowerment, connection, community, to be seen, consideration, equality, respect, acknowledgment"],
    ["tricked", "embarrassed, angry, resentful", "integrity, trust, honesty"],
    ["unappreciated", "sad, angry, hurt, frustrated", "appreciation, respect, acknowledgment, consideration"],
    ["unheard", "sad, hostile, frustrated", "understanding, consideration, empathy"],
    ["unloved", "sad, bewildered, frustrated", "love, appreciation, empathy, connection, community"],
    ["unseen", "sad, anxious, frustrated", "acknowledgment, appreciation, to be seen and heard"],
    ["unsupported", "sad, hurt, resentful", "support, understanding"],
    ["unwanted", "sad, anxious, frustrated", "belonging, inclusion, caring"],
    ["used", "sad, angry, resentful", "autonomy, equality, consideration, mutuality"],
    ["victimized", "frightened, helpless", "empowerment, mutuality, safety, justice"],
    ["violated", "sad, agitated, anxious", "privacy, safety, trust, space, respect"],
    ["wronged", "angry, hurt, resentful, irritated", "respect, justice, trust, safety, fairness"]
]

# Specify the CSV file name
csv_file = 'feelings_needs.csv'

# Write the data to the CSV file
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(data)

print(f"Data saved to {csv_file}")

Data saved to feelings_needs.csv


In [3]:
import pandas as pd
from datasets import Dataset

# Step 1: Read the CSV file
df = pd.read_csv('feelings_needs.csv')
df.head()

Unnamed: 0,Evaluative Word,Feeling(s),Need(s)
0,abandoned,"terrified, hurt, bewildered, sad, frightened, ...","nurturing, connection, belonging, support, caring"
1,abused,"angry, frustrated, frightened","caring, nurturing, support, emotional or physi..."
2,(not) accepted,"upset, scared, lonely","inclusion, connection, community, belonging, c..."
3,attacked,"scared, angry",safety
4,belittled,"angry, frustrated, tense, distressed","respect, autonomy, to be seen, acknowledgment,..."


In [4]:
import pandas as pd
from datasets import Dataset

# Load CSV
csv_file = 'feelings_needs.csv'
df = pd.read_csv(csv_file)

# Function to generate storytelling-style input-output pairs
def create_input_output(row):
    # Generate a storytelling-style input
    input_text = (
        f"Given the evaluative word: \"{row['Evaluative Word']}\", "
        f"describe the feelings and needs associated with it."
    )
    # Generate a logical, conversational-style output
    output_text = (
        f"The evaluative word \"{row['Evaluative Word']}\" expresses feelings such as "
        f"\"{row['Feeling(s)']}\" and points to underlying needs like \"{row['Need(s)']}\"."
    )
    return input_text, output_text

# Apply the function to create input-output pairs
df[['input', 'output']] = df.apply(create_input_output, axis=1, result_type="expand")

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(df[['input', 'output']])
train_test_split = dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
test_dataset = train_test_split["test"]

# Display the first entry of the new training dataset
print(train_dataset[:1])




{'input': ['Given the evaluative word: "let down", describe the feelings and needs associated with it.'], 'output': ['The evaluative word "let down" expresses feelings such as "sad, disappointed, frightened" and points to underlying needs like "consistency, trust, dependability, consistency".']}


In [5]:
# Display the first entry of the new training dataset
print(train_dataset[:5])

{'input': ['Given the evaluative word: "let down", describe the feelings and needs associated with it.', 'Given the evaluative word: "distrusted", describe the feelings and needs associated with it.', 'Given the evaluative word: "misunderstood", describe the feelings and needs associated with it.', 'Given the evaluative word: "left out", describe the feelings and needs associated with it.', 'Given the evaluative word: "violated", describe the feelings and needs associated with it.'], 'output': ['The evaluative word "let down" expresses feelings such as "sad, disappointed, frightened" and points to underlying needs like "consistency, trust, dependability, consistency".', 'The evaluative word "distrusted" expresses feelings such as "sad, frustrated" and points to underlying needs like "trust, honesty".', 'The evaluative word "misunderstood" expresses feelings such as "upset, angry, frustrated" and points to underlying needs like "to be heard, understanding, clarity".', 'The evaluative wo

In [11]:
# Step 1: Install Required Libraries
# !pip install transformers datasets peft accelerate bitsandbytes torch pandas --upgrade

# Step 2: Import Libraries
import os
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from transformers import BitsAndBytesConfig, EarlyStoppingCallback
from huggingface_hub import login
import numpy as np

# Step 3: Set Up Environment Variables
#MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"
MODEL_NAME="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
OUTPUT_DIR = "./fine_tuned_llama_deepseek"
USE_4BIT = True


In [12]:

# Check if running on Google Colab
is_colab = False
try:
    from google.colab import drive, userdata
    is_colab = True
except ImportError:
    pass

if is_colab:
    # Mount Google Drive
    try:
        drive.mount('/content/drive', force_remount=True)
    except Exception as e:
        print(f"Error mounting Google Drive: {e}")

    # Create model cache directory in Google Drive
    MODEL_CACHE_DIR = f"/content/drive/MyDrive/models/{MODEL_NAME.replace('/', '_')}"
    os.makedirs(MODEL_CACHE_DIR, exist_ok=True)

    # Create fine-tuned model directory in Google Drive
    FINETUNED_MODEL_DIR = f"/content/drive/MyDrive/models/finetuned/{MODEL_NAME.replace('/', '_')}"
    os.makedirs(FINETUNED_MODEL_DIR, exist_ok=True)

    print(f"Model cache directory set to: {MODEL_CACHE_DIR}")
    print(f"Fine-tuned model directory set to: {FINETUNED_MODEL_DIR}")
else:
    MODEL_CACHE_DIR = None
    FINETUNED_MODEL_DIR = None
    print("Not running on Google Colab. Using default cache directory.")

Mounted at /content/drive
Model cache directory set to: /content/drive/MyDrive/models/deepseek-ai_DeepSeek-R1-Distill-Llama-8B
Fine-tuned model directory set to: /content/drive/MyDrive/models/finetuned/deepseek-ai_DeepSeek-R1-Distill-Llama-8B


In [13]:


# Hugging Face authentication
if is_colab:
    HF_TOKEN = userdata.get('HF_TOKEN')
    if not HF_TOKEN:
        raise ValueError("Please set the 'HF_TOKEN' environment variable in Colab.")
else:
    HF_TOKEN = os.getenv('HF_TOKEN')
    if not HF_TOKEN:
        raise ValueError("Please set the 'HF_TOKEN' environment variable.")

login(token=HF_TOKEN)

# Step 5: Load Tokenizer and Model
cache_dir = MODEL_CACHE_DIR if is_colab else None

# Load the tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir)

# Set Bits and Bytes configuration if using 4-bit quantization
bnb_config = BitsAndBytesConfig(load_in_4bit=USE_4BIT) if USE_4BIT else None

# Load the model
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    cache_dir=cache_dir
)

# Update OUTPUT_DIR to save fine-tuned model to Google Drive if on Colab
if is_colab:
    OUTPUT_DIR = FINETUNED_MODEL_DIR

print(f"Model will be fine-tuned and saved to: {OUTPUT_DIR}")

print("Model loaded successfully.")

# Step 6: Prepare Tokenizer
print("Preparing tokenizer...")
tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))

# Step 7: Tokenize Dataset
def tokenize_function(examples):
    tokenized_inputs = tokenizer(
        examples["input"], padding="max_length", truncation=True, max_length=512
    )
    tokenized_outputs = tokenizer(
        examples["output"], padding="max_length", truncation=True, max_length=512
    )
    tokenized_inputs["labels"] = tokenized_outputs["input_ids"]
    tokenized_inputs["labels"] = [
        [-100 if token == tokenizer.pad_token_id else token for token in label]
        for label in tokenized_inputs["labels"]
    ]
    return tokenized_inputs

print("Tokenizing datasets...")
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_test = test_dataset.map(tokenize_function, batched=True)

tokenized_train = tokenized_train.remove_columns(["input", "output"])
tokenized_test = tokenized_test.remove_columns(["input", "output"])

# Step 8: Set Up LoRA Configuration

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.2,  # Increased dropout
    bias="none",
    task_type="CAUSAL_LM"
)




model = get_peft_model(model, lora_config)

# Step 9: Define Training Arguments
'''
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,  # Reduced epochs
    learning_rate=1e-4,  # Reduced learning rate
    weight_decay=0.01,  # Added weight decay
    logging_dir=f"{OUTPUT_DIR}/logs",
    logging_steps=10,
    eval_strategy="steps",
    save_strategy="steps",
    save_steps=50,
    eval_steps=50,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="none",
    fp16=True,
    lr_scheduler_type="cosine",  # Learning rate scheduler
    push_to_hub=False,
)
'''
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=4,  # Consider reducing if out-of-memory errors occur
    per_device_eval_batch_size=4,  # Consider reducing if out-of-memory errors occur
    num_train_epochs=5,  # Reduced epochs further to prevent overfitting
    learning_rate=5e-5,  # Further reduced learning rate
    weight_decay=0.01,  # Added weight decay
    logging_dir=f"{OUTPUT_DIR}/logs",
    logging_steps=10,
    eval_strategy="steps",
    save_strategy="steps",
    save_steps=50,
    eval_steps=50,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="none",
    fp16=True,
    lr_scheduler_type="cosine",  # Learning rate scheduler
    push_to_hub=False,
    gradient_accumulation_steps=4, # Added gradient accumulation to increase effective batch size
)


# Step 10: Custom compute metrics for display
def compute_metrics(eval_preds):
    predictions, labels = eval_preds
    predictions = np.argmax(predictions, axis=-1)
    labels = labels.flatten()
    predictions = predictions.flatten()
    validation_loss = ((predictions - labels) ** 2).mean()
    return {
        "eval_loss": validation_loss,
    }

# Step 11: Custom Trainer to Log Training and Validation Loss
class CustomTrainer(Trainer):
    def log_metrics(self, logs):
        training_loss = logs.get("loss", None)
        if training_loss is not None:
            print(f"Step: {logs.get('step', 'N/A')}, Training Loss: {training_loss:.4f}")

        validation_loss = logs.get("eval_loss", None)
        if validation_loss is not None:
            print(f"Step: {logs.get('step', 'N/A')}, Validation Loss: {validation_loss:.4f}")

    def on_log(self, logs):
        super().on_log(logs)
        print("Logs:", logs)
        self.log_metrics(logs)

# Step 12: Initialize Custom Trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],  # Add EarlyStoppingCallback
)

# Step 13: Train the Model
print("Starting training...")
trainer.train()

# Display Final Metrics
metrics = trainer.evaluate()
print("Final Evaluation Metrics:", metrics)

# Step 14: Save the Fine-Tuned Model
print("Saving model and tokenizer...")
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Model and tokenizer saved to {OUTPUT_DIR}")


Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/3.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

Loading model...


config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/7.39G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Model will be fine-tuned and saved to: /content/drive/MyDrive/models/finetuned/deepseek-ai_DeepSeek-R1-Distill-Llama-8B
Model loaded successfully.
Preparing tokenizer...
Tokenizing datasets...


Map:   0%|          | 0/44 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

  trainer = CustomTrainer(


Starting training...




Step,Training Loss,Validation Loss


config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

Final Evaluation Metrics: {'eval_loss': 11.686188697814941, 'eval_runtime': 3.2202, 'eval_samples_per_second': 3.416, 'eval_steps_per_second': 0.932, 'epoch': 3.3636363636363638}
Saving model and tokenizer...
Model and tokenizer saved to /content/drive/MyDrive/models/finetuned/deepseek-ai_DeepSeek-R1-Distill-Llama-8B


In [14]:
OUTPUT_DIR

'/content/drive/MyDrive/models/finetuned/deepseek-ai_DeepSeek-R1-Distill-Llama-8B'

In [15]:
MODEL_NAME

'deepseek-ai/DeepSeek-R1-Distill-Llama-8B'

In [16]:
# Verification and Comparison Script
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
# Set the output directory for the fine-tuned model
#OUTPUT_DIR = "/content/drive/MyDrive/models/finetuned/meta-llama_Meta-Llama-3.1-8B-Instruct" if "google.colab" in str(get_ipython()) else "./fine_tuned_llama"
#MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#USE_4BIT = True

# Load the tokenizer
try:
    print("Loading tokenizer...")
    tokenizer_finetuned = AutoTokenizer.from_pretrained(OUTPUT_DIR)
    #tokenizer_finetuned = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer_base = AutoTokenizer.from_pretrained(MODEL_NAME)
except OSError as e:
    raise ValueError(f"Error loading tokenizer. Ensure the OUTPUT_DIR and MODEL_NAME are correct:\n{e}")

# Load the models
try:
    # Check if LoRA configuration exists
    if os.path.exists(os.path.join(OUTPUT_DIR, "adapter_config.json")):
        # Load the base model
        print("Loading the base model...")
        base_model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            device_map="auto",
            torch_dtype="auto",  # Use appropriate dtype for your hardware
            cache_dir=f"/content/drive/MyDrive/models/{MODEL_NAME.replace('/', '_')}" if "google.colab" in str(get_ipython()) else None
        )

        # Load the LoRA configuration
        print("Applying LoRA configuration...")
        model_finetuned = PeftModel.from_pretrained(base_model, OUTPUT_DIR)
    else:
        # Load the fully fine-tuned model directly if no LoRA configuration
        print("Loading the fully fine-tuned model...")
        model_finetuned = AutoModelForCausalLM.from_pretrained(
            OUTPUT_DIR,
            device_map="auto",
            cache_dir=f"/content/drive/MyDrive/models/finetuned/{MODEL_NAME.replace('/', '_')}" if "google.colab" in str(get_ipython()) else None
        )


    # Load the base model for comparison with 4-bit quantization
    print("Loading the base model for comparison with 4-bit quantization...")
    bnb_config = BitsAndBytesConfig(load_in_4bit=USE_4BIT) if USE_4BIT else None
    model_base = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        device_map={"": "cuda"},  # Ensure proper mapping to GPU
        torch_dtype="auto",
        quantization_config=bnb_config,

        cache_dir=f"/content/drive/MyDrive/models/{MODEL_NAME.replace('/', '_')}" if "google.colab" in str(get_ipython()) else None
    )

    print("Models and tokenizer loaded successfully!")
except Exception as e:
    raise RuntimeError(f"Error loading models: {e}")

# Generate evaluation questions
evaluation_questions = []
for row in train_dataset:
    evaluation_questions.append({
        "prompt": row['input'],
        "expected_output": row['output']
    })

# Display the first few evaluation questions for verification
for question in evaluation_questions[:1]:
    print(f"Prompt:\n{question['prompt']}\n")
    print(f"Expected Output:\n{question['expected_output']}\n")
    print("-" * 80)



Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/3.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

Loading the base model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Applying LoRA configuration...
Loading the base model for comparison with 4-bit quantization...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Models and tokenizer loaded successfully!
Prompt:
Given the evaluative word: "let down", describe the feelings and needs associated with it.

Expected Output:
The evaluative word "let down" expresses feelings such as "sad, disappointed, frightened" and points to underlying needs like "consistency, trust, dependability, consistency".

--------------------------------------------------------------------------------


In [17]:
# Iterate through evaluation questions
for question in evaluation_questions[:5]:  # Test with the first evaluation question
    prompt = question["prompt"]
    expected_keywords = question["expected_output"]

    # Generate response from the fine-tuned model
    inputs_finetuned = tokenizer_finetuned(prompt, return_tensors="pt").to("cuda")
    outputs_finetuned = model_finetuned.generate(
        **inputs_finetuned,
        max_length=150,  # Increase max_length if necessary for larger outputs
        pad_token_id=tokenizer_finetuned.eos_token_id  # Set pad_token_id to eos_token_id for consistency
    )
    generated_finetuned = tokenizer_finetuned.decode(outputs_finetuned[0], skip_special_tokens=True)

    # Remove the prompt text from the generated response
    if generated_finetuned.startswith(prompt):
        generated_finetuned = generated_finetuned[len(prompt):].strip()

    # Generate response from the base model using 4-bit quantization for faster inference
    inputs_base = tokenizer_base(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs_base = model_base.generate(
            **inputs_base,
            max_length=150,
            pad_token_id=tokenizer_base.eos_token_id,
            num_beams=2,  # Use beam search for faster inference
            early_stopping=True  # Stop generation when an EOS token is generated
        )
    generated_base = tokenizer_base.decode(outputs_base[0], skip_special_tokens=True)

    # Remove the prompt text from the generated response
    if generated_base.startswith(prompt):
        generated_base = generated_base[len(prompt):].strip()

    # Display prompt, generated responses, and expected output
    print(f"Prompt: {prompt}")
    print(f"Generated Response (Fine-Tuned): {generated_finetuned}")
    print(f"Generated Response (Base): {generated_base}")
    print(f"Expected output: {expected_keywords}")
    print("-" * 80)

Prompt: Given the evaluative word: "let down", describe the feelings and needs associated with it.
Generated Response (Fine-Tuned): Then, think about how to respond effectively.
Okay, so I need to figure out how to respond when someone says "let me down." Hmm, first, I should understand what that phrase really means. I think "let me down" is when someone feels like they were expecting something better from another person or a situation, but it didn't live up to their expectations. It can make the person feel disappointed or maybe even frustrated. 

So, the feelings associated with "let me down" might include things like disappointment, maybe even a bit of sadness or frustration. The person might feel like their hopes or expectations weren't met, which can lead
Generated Response (Base): Then, create a character-driven story that reflects these feelings and needs.

First, let's break down the word "let down". What emotions does it evoke? What does it mean to feel "let down"? How does th

TO tesst

```python
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,  # Reduced batch size further
    per_device_eval_batch_size=2,  # Reduced batch size further
    num_train_epochs=3,  # Reduced epochs further
    learning_rate=2e-5,  # Reduced learning rate further
    weight_decay=0.005,  # Reduced weight decay slightly
    logging_dir=f"{OUTPUT_DIR}/logs",
    logging_steps=10,
    eval_strategy="steps",
    save_strategy="steps",
    save_steps=50,
    eval_steps=50,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="none",
    fp16=True,
    lr_scheduler_type="cosine",
    push_to_hub=False,
    gradient_accumulation_steps=8,  # Increased gradient accumulation steps
    warmup_ratio=0.1,  # Added warmup ratio for learning rate
)
```
Key changes and explanations:

Reduced per_device_train_batch_size and per_device_eval_batch_size: Decreased from 4 to 2 to further reduce memory consumption and potentially improve training stability.
Reduced num_train_epochs: Decreased from 5 to 3 to prevent overfitting, as the model showed signs of overfitting in the previous analysis.
Reduced learning_rate: Decreased from 5e-5 to 2e-5 to promote more stable training and prevent the model from overshooting optimal weights.
Reduced weight_decay: Slightly decreased from 0.01 to 0.005 to potentially improve generalization. You can experiment with different values for this parameter.
Increased gradient_accumulation_steps: Increased from 4 to 8 to further simulate a larger batch size without increasing memory consumption.
Added warmup_ratio: Set to 0.1 to gradually increase the learning rate at the beginning of training. This can help stabilize the training process.

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.9.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [None]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define a system prompt
SYSTEM_PROMPT = """
You are a compassionate assistant trained to help users explore their emotions and unmet needs.
When users share evaluative words or experiences, respond empathetically by identifying associated feelings and unmet needs. Use clear, concise, and natural responses.

Example of Answer:
1. User: "I feel betrayed by my friend."
   Response: "It sounds like you're feeling betrayed. This might involve emotions like hurt or disappointment. Do you think this could be related to a need for trust or honesty in your friendship?"

2. User: "No one respects my ideas at work."
   Response: "I hear that you're feeling unheard or unimportant. It seems like this might be connected to a need for respect or acknowledgment. Can you tell me more about how this situation has been affecting you?"

3. User: "I feel invisible in my family."
   Response: "It seems like you're feeling invisible, which could bring up emotions such as sadness or loneliness. This might point to needs like being seen and heard, inclusion, or belonging. Would you like to share more about how this affects you?"

4. User: Given the evaluative word: "distrusted," describe the feelings and needs associated with it.
   Response: "The evaluative word 'distrusted' expresses feelings such as sadness or frustration and points to underlying needs like trust or honesty."

Stay concise and empathetic in all your responses, focusing on validating the user’s emotions and encouraging reflection.
"""

# Initialize the history
history = []

def clean_repeated_phrases(response):
    """Remove repeated phrases in the response."""
    sentences = response.split('. ')
    seen = set()
    cleaned_sentences = []
    for sentence in sentences:
        if sentence not in seen:
            seen.add(sentence)
            cleaned_sentences.append(sentence)
    return '. '.join(cleaned_sentences)

def chat_with_model(prompt):
    global history
    try:
        # Construct the conversational context
        conversation = "\n".join([f"User: {u}\nAI: {a}" for u, a in history])
        full_context = f"{SYSTEM_PROMPT}\n\n{conversation}\nUser: {prompt}\nAI:"

        # Tokenize the input prompt
        inputs = tokenizer_finetuned(full_context, return_tensors="pt", padding=True, truncation=True)
        inputs = inputs.to(model_finetuned.device)

        # Generate response using the fine-tuned model
        output = model_finetuned.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=200,
            num_return_sequences=1,
            temperature=0.4,
            top_p=0.8,
            repetition_penalty=1.2,
            do_sample=True,
            pad_token_id=tokenizer_finetuned.eos_token_id
        )

        # Decode the response and clean the output
        response = tokenizer_finetuned.decode(output[0], skip_special_tokens=True).strip()
        response_cleaned = clean_repeated_phrases(response.split("\nAI:")[-1].strip())

        # Update history with user input and AI response
        history.append((prompt, response_cleaned))
        return history
    except Exception as e:
        error_message = f"Error: {str(e)}"
        history.append((prompt, error_message))
        return history

# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Chat with Empathetic AI")

    with gr.Row():
        chat_window = gr.Chatbot(label="Chat History")

    with gr.Row():
        user_input = gr.Textbox(label="Your Prompt", placeholder="Type your message here...")

    with gr.Row():
        submit_button = gr.Button("Submit")

    def update_chat_window(prompt):
        updated_history = chat_with_model(prompt)
        return updated_history

    # Define button click behavior
    submit_button.click(fn=update_chat_window, inputs=user_input, outputs=chat_window)

# Launch the application
demo.launch()




Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e0b9e091c1df864b1a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Examples:

User: Given the evaluative word: "distrusted," describe the feelings and needs associated with it.

User: Given the evaluative word: "wronged," describe the feelings and needs associated with it.

User: Given the evaluative word: "invisible," describe the feelings and needs associated with it.

User: Given the evaluative word: "caged/boxed in," describe the feelings and needs associated with it.

User: Given the evaluative word: "coerced," describe the feelings and needs associated with it.

Your primary goal is to promote emotional understanding and support the user in exploring their feelings and needs constructively.


In [19]:
from huggingface_hub import HfApi, upload_folder

def upload_finetuned_model_to_huggingface(output_dir, model_name, token, private=True):
    """
    Upload a fine-tuned model to Hugging Face Hub.

    Parameters:
    - output_dir (str): Directory containing the fine-tuned model and tokenizer.
    - model_name (str): Name of the model repository on Hugging Face Hub.
    - token (str): Your Hugging Face token.
    - private (bool): Whether to make the repository private (default: True).
    """
    # Authenticate with Hugging Face
    api = HfApi()
    user = api.whoami(token=token)
    user_name=user['name']
    user_name="ai-medical"
    repo_id = f"{user_name}/{model_name}"

    # Create a new model repository (if it doesn't exist)
    print(f"Creating repository '{repo_id}'...")
    api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True)

    # Upload model files to the repository
    print(f"Uploading model files from '{output_dir}' to Hugging Face Hub...")
    upload_folder(
        folder_path=output_dir,
        repo_id=repo_id,
        repo_type="model",
        token=token,
        commit_message="Upload fine-tuned model"
    )
    print(f"Model successfully uploaded to Hugging Face Hub: https://huggingface.co/{repo_id}")

# Example usage:
# output_dir = "/path/to/fine_tuned_model"
# model_name = "fine_tuned_llama"
# token = "your_huggingface_token"
# upload_finetuned_model_to_huggingface(output_dir, model_name, token)


In [20]:
#OUTPUT_DIR = "/content/drive/MyDrive/models/finetuned/meta-llama_Meta-Llama-3.1-8B-Instruct" if "google.colab" in str(get_ipython()) else "./fine_tuned_llama"
model_name = "fine_tuned_deepseek_v1_empathy"  # Desired name for the repository on Hugging Face
hf_token = HF_TOKEN  # Your Hugging Face token


In [24]:
HF=""

In [25]:
# Upload the fine-tuned model
upload_finetuned_model_to_huggingface(OUTPUT_DIR, model_name, HF)

Creating repository 'ai-medical/fine_tuned_deepseek_v1_empathy'...
Uploading model files from '/content/drive/MyDrive/models/finetuned/deepseek-ai_DeepSeek-R1-Distill-Llama-8B' to Hugging Face Hub...


scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

Upload 9 LFS files:   0%|          | 0/9 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/27.4M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

Model successfully uploaded to Hugging Face Hub: https://huggingface.co/ai-medical/fine_tuned_deepseek_v1_empathy
