In [1]:
!pip uninstall -y pyarrow datasets
!pip install -U pyarrow datasets
!pip install -q -U peft
!pip install -q -U trl
!pip install -q -U bitsandbytes
!pip install -U transformers accelerate

Found existing installation: pyarrow 18.1.0
Uninstalling pyarrow-18.1.0:
  Successfully uninstalled pyarrow-18.1.0
Found existing installation: datasets 4.0.0
Uninstalling datasets-4.0.0:
  Successfully uninstalled datasets-4.0.0
Collecting pyarrow
  Downloading pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.0 kB)
Collecting datasets
  Downloading datasets-4.5.0-py3-none-any.whl.metadata (19 kB)
Downloading pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (47.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.6/47.6 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-4.5.0-py3-none-any.whl (515 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.2/515.2 kB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow, datasets
Successfully installed datasets-4.5.0 pyarrow-23.0.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m530.9/530.9 kB[0m [31m28.9 MB/s[0m e

In [None]:
import os
os.kill(os.getpid(), 9)

# Synthetic Data Generation and Prompt Construction

In [1]:
import numpy as np
import pandas as pd
import random

# =========================================================
# Reproducibility
# =========================================================
np.random.seed(42)
random.seed(42)

# =========================================================
# Configuration
# =========================================================
N_STUDENTS = 30
SAMPLES_PER_STUDENT = 20
SUBJECTS = ["English", "Health", "Language", "Maths", "Politics", "Science"]
LABELS = ["engagement", "neutral", "disengagement"]

# =========================================================
# Temporal generator
# =========================================================
def generate_temporal_values(mean, std, low, high):
    values = np.random.normal(mean, std, 10)
    return np.clip(values, low, high)

# =========================================================
# Label-dependent parameter settings
# =========================================================
PARAMS = {
    "engagement": {
        "ACC": (38, 10),
        "HR": (88, 10),
        "EDA": (0.45, 0.10),
        "TEMP": (30.3, 0.25)
    },
    "neutral": {
        "ACC": (28, 14),
        "HR": (80, 12),
        "EDA": (0.55, 0.12),
        "TEMP": (30.2, 0.30)
    },
    "disengagement": {
        "ACC": (18, 18),
        "HR": (72, 12),
        "EDA": (0.75, 0.15),  # ← Disengagement has higher EDA
        "TEMP": (30.1, 0.35)
    }
}

# =========================================================
# Data generation
# =========================================================
statistical_rows = []

for pid in range(1, N_STUDENTS + 1):
    gender = random.choice(["Male", "Female"])
    age = random.randint(15, 18)

    for _ in range(SAMPLES_PER_STUDENT):
        class_id = random.randint(100, 160)
        subject = random.choice(SUBJECTS)
        label = random.choice(LABELS)

        key = f"{class_id}_{pid}"
        p = PARAMS[label]

        # Temporal values (10-min window)
        ACC = generate_temporal_values(p["ACC"][0], p["ACC"][1], -50, 80)
        HR  = generate_temporal_values(p["HR"][0],  p["HR"][1],  60, 120)
        EDA = generate_temporal_values(p["EDA"][0], p["EDA"][1], 0.2, 1.2)
        TEMP = generate_temporal_values(p["TEMP"][0], p["TEMP"][1], 29.5, 31.0)

        statistical_rows.append({
            "Key": key,
            "ACC_mean": ACC.mean(),
            "ACC_sd": ACC.std(),
            "ACC_max": ACC.max(),
            "ACC_min": ACC.min(),
            "HR_mean": HR.mean(),
            "HR_sd": HR.std(),
            "HR_max": HR.max(),
            "HR_min": HR.min(),
            "EDA_mean": EDA.mean(),
            "EDA_sd": EDA.std(),
            "EDA_max": EDA.max(),
            "EDA_min": EDA.min(),
            "TEMP_mean": TEMP.mean(),
            "TEMP_sd": TEMP.std(),
            "TEMP_max": TEMP.max(),
            "TEMP_min": TEMP.min(),
            "Class_id": class_id,
            "Subject": subject,
            "Pid": pid,
            "Gender": gender,
            "Age": age,
            "label": label
        })

# =========================================================
# Save dataset
# =========================================================
df = pd.DataFrame(statistical_rows)
df.to_csv("data_statistical.csv", index=False)

print("data_statistical.csv generated successfully.")


data_statistical.csv generated successfully.


In [2]:
import pandas as pd

# =========================================================
# Load statistical feature dataset
# =========================================================
df = pd.read_csv('data_statistical.csv')

# =========================================================
# Construct natural language prompts for LLM input
# =========================================================
df['text'] = df.apply(
    lambda row: (
        f"The following data were obtained from one {row['Gender']} student {row['Pid']} "
        f"of {row['Subject']} class {row['Class_id']} in any 10 minutes of 80. "
        f"The statistics (mean, standard deviation, maximum, and minimum values) "
        f"of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), "
        f"and skin temperature (TEMP) per minute are as follows:\n"
        f"HR: mean:{row['HR_mean']}, sd:{row['HR_sd']}, max:{row['HR_max']}, min:{row['HR_min']}\n"
        f"ACC: mean:{row['ACC_mean']}, sd:{row['ACC_sd']}, max:{row['ACC_max']}, min:{row['ACC_min']}\n"
        f"EDA: mean:{row['EDA_mean']}, sd:{row['EDA_sd']}, max:{row['EDA_max']}, min:{row['EDA_min']}\n"
        f"TEMP: mean:{row['TEMP_mean']}, sd:{row['TEMP_sd']}, max:{row['TEMP_max']}, min:{row['TEMP_min']}"
    ),
    axis=1
)

# =========================================================
# Select columns required for LLM-based classification
# =========================================================
df_llm = df[['label', 'text', 'Key']]
df_llm.to_csv('data_statistical_llm.csv', index=False)

df_llm.head()


Unnamed: 0,label,text,Key
0,engagement,The following data were obtained from one Male...,147_1
1,disengagement,The following data were obtained from one Male...,114_1
2,disengagement,The following data were obtained from one Male...,106_1
3,engagement,The following data were obtained from one Male...,157_1
4,engagement,The following data were obtained from one Male...,137_1


# LLM-Based Classification of Student Engagement from Physiological Features

In [1]:
# Suppress unnecessary warnings for cleaner output
import warnings
warnings.filterwarnings("ignore")
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
import torch
import numpy as np
import pandas as pd
import os
# from tqdm import tqdm
import bitsandbytes as bnb
# import torch
# import torch.nn as nn
# import transformers
from datasets import Dataset
# from trl import SFTTrainer
# from transformers import (
#     AutoModelForCausalLM,
#     AutoTokenizer,
#     BitsAndBytesConfig,
#     TrainingArguments,
#     pipeline,
#     logging
# )
# from sklearn.metrics import (
#     accuracy_score,
#     classification_report,
#     confusion_matrix
# )
from sklearn.model_selection import train_test_split


# =========================
# Load dataset
# =========================
filename = "data_statistical_llm.csv"
df = pd.read_csv(filename, encoding="utf-8", encoding_errors="replace")

# Name of the target label used in prompts
label_name = "engagement level"

# Containers for train / test / evaluation splits
X_train = []
X_test = []
X_eval = []


# =========================
# Stratified split by Key to avoid data leakage
# Each Key appears in only one split
# =========================
for sentiment in ["engagement", "disengagement", "neutral"]:
    subset = df[df.label == sentiment]
    unique_keys = subset["Key"].unique()

    # Randomly shuffle and split keys
    train_keys, temp_keys = train_test_split(
        unique_keys, train_size=0.5, random_state=42
    )
    test_keys, eval_keys = train_test_split(
        temp_keys, test_size=0.5, random_state=42
    )

    # Select data based on Key
    train_data = subset[subset["Key"].isin(train_keys)]
    test_data = subset[subset["Key"].isin(test_keys)]
    eval_data = subset[subset["Key"].isin(eval_keys)]

    X_train.append(train_data)
    X_test.append(test_data)
    X_eval.append(eval_data)

# Concatenate and shuffle training data
X_train = pd.concat(X_train).sample(frac=1, random_state=10).reset_index(drop=True)
X_test = pd.concat(X_test).reset_index(drop=True)
X_eval = pd.concat(X_eval).reset_index(drop=True)


# =========================
# Prompt generation functions
# =========================
def generate_prompt(data_point):
    """
    Generate a supervised fine-tuning prompt including the correct label.
    """
    return f"""The {label_name} of the following phrase: '{data_point["text"]}' is
            \n\n Engagement
            \n Disengagement
            \n Neutral
            \n Cannot be determined
            \n\nSolution: The correct option is {data_point["label"]}""".strip()


def generate_test_prompt(data_point):
    """
    Generate a prompt for inference without revealing the correct label.
    """
    return f"""The {label_name} of the following phrase: '{data_point["text"]}' is
            \n\n Engagement
            \n Disengagement
            \n Neutral
            \n Cannot be determined
            \n\nSolution: The correct option is""".strip()


# Apply prompts
X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1), columns=["text"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1), columns=["text"])

# Ground truth labels for evaluation
y_true = X_test["label"]

X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])


# =========================
# Convert to HuggingFace Dataset format
# =========================
train_data = Dataset.from_pandas(X_train)
test_data = Dataset.from_pandas(X_test)
eval_data = Dataset.from_pandas(X_eval)

print(train_data)
print(test_data)
print(eval_data)


# =========================
# Evaluation function
# =========================
def evaluate(y_true, y_pred):
    """
    Evaluate predictions using accuracy, per-class accuracy,
    classification report, and confusion matrix.
    """
    labels = ['neutral', 'engagement', 'disengagement']
    mapping = {'neutral': 0, 'engagement': 1, 'disengagement': 2, 'none': 0}

    def map_func(x):
        return mapping.get(x, 1)

    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)

    # Overall accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

    # Per-class accuracy
    for label in set(y_true):
        indices = [i for i in range(len(y_true)) if y_true[i] == label]
        acc = accuracy_score(
            [y_true[i] for i in indices],
            [y_pred[i] for i in indices]
        )
        print(f'Accuracy for label {label}: {acc:.3f}')

    # Detailed classification report
    print('\nClassification Report:')
    print(classification_report(y_true, y_pred, digits=4))

    # Confusion matrix
    print('\nConfusion Matrix:')
    print(confusion_matrix(y_true, y_pred, labels=[0, 1, 2]))


# =========================
# Prediction function using text generation
# =========================
def predict(X_test, model, tokenizer):
    """
    Generate predictions by prompting the LLM and parsing its output.
    """
    y_pred = []

    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]

        pipe = pipeline(
            task="text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=10,
            temperature=0.0,
            use_cache=False,
            do_sample=False
        )

        result = pipe(prompt, pad_token_id=pipe.tokenizer.eos_token_id)
        print(result)
        # Extract the predicted label from generated text
        answer = result[0]['generated_text'].split(
            "The correct option is"
        )[-1].strip().lower()

        if "disengagement" in answer:
            y_pred.append("disengagement")
        elif "engagement" in answer:
            y_pred.append("engagement")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("none")

    return y_pred


# =========================
# Load quantized Phi-3 model
# =========================
model_name = "microsoft/Phi-3-mini-128k-instruct"


# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_use_double_quant=True,
# )

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="cuda",
#     torch_dtype=torch.float16,   # ← 明示！
#     trust_remote_code=True,
#     quantization_config=bnb_config,
# )

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=bnb_config,
)


model.config.use_cache = False
model.config.pretraining_tp = 1

# =========================
# Tokenizer setup
# =========================
max_seq_length = 2048

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
    max_seq_length=max_seq_length,
)

tokenizer.pad_token = tokenizer.eos_token


Dataset({
    features: ['text'],
    num_rows: 300
})
Dataset({
    features: ['text'],
    num_rows: 152
})
Dataset({
    features: ['text'],
    num_rows: 148
})


This model config has set a `rope_parameters['original_max_position_embeddings']` field, to be used together with `max_position_embeddings` to determine a scaling factor. Please set the `factor` field of `rope_parameters`with this ratio instead -- we recommend the use of this field over `original_max_position_embeddings`, as it is compatible with most model architectures.


Loading weights:   0%|          | 0/195 [00:00<?, ?it/s]

# LoRA-Based Fine-Tuning and Evaluation of an LLM for Engagement Prediction

In [15]:
import peft, trl
print(peft.__version__)
print(trl.__version__)

0.18.1
0.27.2


In [None]:
from peft import LoraConfig, PeftConfig, get_peft_model
from trl import SFTTrainer
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
# from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
import bitsandbytes as bnb
from tqdm import tqdm



from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix
)

# =========================
# LoRA configuration
# =========================
peft_config = LoraConfig(
    r=16,                   # Rank of LoRA decomposition (default: 8)
    lora_alpha=16,          # LoRA scaling factor (default: 32)
    target_modules="all-linear",  # Target modules to which LoRA is applied
    lora_dropout=0.00,      # Dropout rate for LoRA layers
    bias="none",            # Bias handling ("none", "all", "lora_only")
    task_type="CAUSAL_LM",  # Task type (e.g., CAUSAL_LM, SEQ_2_SEQ_LM, TOKEN_CLS)
)


# =========================
# Training configuration
# =========================
training_arguments = TrainingArguments(
    output_dir="logs",
    num_train_epochs=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,  # Effective batch size via gradient accumulation
    optim="adamw_torch",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    eval_strategy="epoch"
)


# =========================
# Initialize Supervised Fine-Tuning Trainer
# =========================
trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    # dataset_text_field="text",
    # tokenizer=tokenizer,
    # max_seq_length=max_seq_length,
    args=training_arguments,
    # packing=False,
)




# =========================
# Train the model
# =========================
trainer.train()


# =========================
# Save the fine-tuned model
# =========================
trainer.model.save_pretrained("trained-model-20250204")


# =========================
# Run inference on the test set
# =========================
y_pred = predict(X_test, trainer.model, tokenizer)


# =========================
# Evaluate prediction performance
# =========================
evaluate(y_true, y_pred)


# =========================
# Save prediction results
# =========================
import pandas as pd

# Create a DataFrame with ground truth and predictions
df = pd.DataFrame({
    'y_true': y_true,
    'y_pred': y_pred
})

# Reset indices to align rows correctly
X_test = X_test.reset_index(drop=True)
df = df.reset_index(drop=True)

# Concatenate input prompts and prediction results
result_df = pd.concat([X_test, df], axis=1)

# Save results to CSV
result_df.to_csv("output.csv", index=False)

result_df


warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


Adding EOS to train dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/148 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/148 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/148 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,7.113607,2.300419


In [14]:
# =========================
# Prediction function using text generation
# =========================
def predict(X_test, model, tokenizer):
    """
    Generate predictions by prompting the LLM and parsing its output.
    """
    y_pred = []

    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]

        pipe = pipeline(
            task="text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=3,
            temperature=0.6,
            use_cache=False,
            do_sample=True
        )
        print(prompt)
        result = pipe(prompt, pad_token_id=pipe.tokenizer.eos_token_id)
        print(result[0]['generated_text'])
        # Extract the predicted label from generated text
        answer = result[0]['generated_text'].split(
            "The correct option is"
        )[-1].strip().lower()

        if "disengagement" in answer:
            y_pred.append("disengagement")
        elif "engagement" in answer:
            y_pred.append("engagement")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("none")

    return y_pred

# =========================
# Run inference on the test set
# =========================
y_pred = predict(X_test, trainer.model, tokenizer)

  0%|          | 0/152 [00:00<?, ?it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 1 of Language class 147 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:80.09341765417585, sd:7.165639736308028, max:91.14247332595274, min:68.86719755342202
ACC: mean:42.48061111698756, sd:6.85905930294495, max:53.79212815507391, min:33.30525614065048
EDA: mean:0.4278156435330545, sd:0.0772131251784213, max:0.5965648768921554, min:0.3075251813786543
TEMP: mean:30.22247333347034, sd:0.2716162427618175, max:30.763069546127237, min:29.810082469030057' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is


  1%|          | 1/152 [00:01<02:34,  1.02s/it]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 1 of Language class 147 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:80.09341765417585, sd:7.165639736308028, max:91.14247332595274, min:68.86719755342202
ACC: mean:42.48061111698756, sd:6.85905930294495, max:53.79212815507391, min:33.30525614065048
EDA: mean:0.4278156435330545, sd:0.0772131251784213, max:0.5965648768921554, min:0.3075251813786543
TEMP: mean:30.22247333347034, sd:0.2716162427618175, max:30.763069546127237, min:29.810082469030057' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is537
The engagement level of the following phrase: 'The following data were obtained from one Male stude

  1%|▏         | 2/152 [00:01<02:28,  1.01it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 1 of Maths class 137 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:91.58464790156452, sd:10.313692573505506, max:115.2016916658962, min:79.10485570374476
ACC: mean:41.24620211263972, sd:9.236887256071196, max:56.96792982653948, min:28.253183297726785
EDA: mean:0.4270939518645944, sd:0.07603876627216, max:0.5214000494092093, min:0.2985152775314135
TEMP: mean:30.25304608009036, sd:0.1671603699814124, max:30.514099698580868, min:29.988565305322005' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is

2
The engagement level of the following phrase: 'The following data were obtained from one Male student

  2%|▏         | 3/152 [00:02<02:23,  1.04it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 1 of Health class 117 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:84.2799709917904, sd:7.691732547122824, max:98.0629280921444, min:71.3847993773104
ACC: mean:31.98973694909413, sd:8.724159562159793, max:43.70598668593159, min:19.95117899335481
EDA: mean:0.4355000572817598, sd:0.1083699161061483, max:0.59415686206579, min:0.202835549987271
TEMP: mean:30.308303610066517, sd:0.1247610502500044, max:30.591945515414952, min:30.17809844398188' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is



The engagement level of the following phrase: 'The following data were obtained from one Female student 2 

  3%|▎         | 4/152 [00:03<02:23,  1.03it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Female student 2 of Language class 114 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:93.96376427260424, sd:6.62012516780421, max:101.82158991037528, min:79.83064329012764
ACC: mean:45.00337840868062, sd:9.49513810950633, max:63.26932425873622, min:32.69131227079679
EDA: mean:0.4871520312775755, sd:0.0899262285072419, max:0.6663254723305461, min:0.3140143859020081
TEMP: mean:30.361602426519728, sd:0.1326272319867664, max:30.69436331994087, min:30.18839159636237' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is01

The engagement level of the following phrase: 'The following data were obtained from one Male stud

  3%|▎         | 5/152 [00:04<02:24,  1.02it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 4 of Language class 156 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:90.87299525108048, sd:8.434023502311543, max:103.19283923641768, min:77.08066099604687
ACC: mean:35.35896308162607, sd:9.218309720640177, max:56.0930634973311, min:23.94432980821358
EDA: mean:0.4246786602099028, sd:0.0884117429192824, max:0.5348174212666669, min:0.3132862193539754
TEMP: mean:30.181310450983595, sd:0.2007316876314514, max:30.445506641771185, min:29.80934357708185' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is7:

The engagement level of the following phrase: 'The following data were obtained from one Male stud

  4%|▍         | 6/152 [00:06<02:29,  1.03s/it]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 4 of English class 160 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:94.4703819630082, sd:9.414453321531647, max:107.8706111169944, min:79.20185646497946
ACC: mean:42.96259829030597, sd:11.130842903196818, max:57.625871905307946, min:27.410924129433997
EDA: mean:0.429553607026949, sd:0.0905218635016423, max:0.5058139514518506, min:0.2
TEMP: mean:30.32787709762952, sd:0.2258321124808102, max:30.738868924578878, min:30.017346166265614' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is
58
The engagement level of the following phrase: 'The following data were obtained from one Male student 4 of Politi

  5%|▍         | 7/152 [00:07<02:32,  1.05s/it]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 4 of Politics class 148 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:90.62891822765678, sd:10.542156629864778, max:106.325570434792, min:68.72327420654389
ACC: mean:37.80309722214048, sd:11.583354914819989, max:64.93033663980496, min:20.237536665335483
EDA: mean:0.4753371857529263, sd:0.0873609341137984, max:0.6204105516255491, min:0.3094810153196425
TEMP: mean:30.3926791616014, sd:0.3188661483958663, max:30.854695058362037, min:29.87403210329332' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is722
The engagement level of the following phrase: 'The following data were obtained from one Male stud

  5%|▌         | 8/152 [00:08<02:27,  1.02s/it]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 4 of Politics class 160 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:90.34302811730804, sd:15.042756820136438, max:120.0, min:66.694335201928
ACC: mean:31.10471209807856, sd:12.50126596865335, max:51.88337778208448, min:10.524951572747652
EDA: mean:0.427842053144262, sd:0.0788844352658778, max:0.5757149216862573, min:0.2982826303328321
TEMP: mean:30.406780771453725, sd:0.1896307645423296, max:30.762152208324103, min:30.117260931071133' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is692
The engagement level of the following phrase: 'The following data were obtained from one Male student 4 of Eng

  6%|▌         | 9/152 [00:09<02:23,  1.00s/it]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 4 of English class 128 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:84.88949707802594, sd:8.845620035289143, max:97.75312458477978, min:73.33214809384323
ACC: mean:33.04505783798656, sd:5.978818242195819, max:42.55904195170703, min:20.48171193248281
EDA: mean:0.4803596449481327, sd:0.1019350715150625, max:0.6329620201716087, min:0.3303211353573688
TEMP: mean:30.23615378500049, sd:0.3150923651525177, max:30.87041295913471, min:29.82921244798536' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is

2
The engagement level of the following phrase: 'The following data were obtained from one Male student

  7%|▋         | 10/152 [00:10<02:20,  1.01it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 5 of English class 114 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:86.97822769393063, sd:13.70514440146313, max:104.76393027208204, min:63.1219134346492
ACC: mean:34.37676725988452, sd:10.080135239880333, max:53.03419828428999, min:16.4761645539082
EDA: mean:0.4840823366713415, sd:0.082375608142342, max:0.6261188124867374, min:0.3474247316359722
TEMP: mean:30.36998901910088, sd:0.2969584463136865, max:30.880010180504318, min:30.0045314686448' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is,
1
The engagement level of the following phrase: 'The following data were obtained from one Male student 

  7%|▋         | 11/152 [00:10<02:17,  1.02it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Male student 5 of English class 114 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:85.81204489634484, sd:10.955429585338862, max:106.07196643637886, min:71.17341305877315
ACC: mean:33.43763843169445, sd:10.933764416741267, max:57.53259137526568, min:20.0610750213211
EDA: mean:0.5039974023398026, sd:0.0852300817101107, max:0.6665056067806603, min:0.3683782755458035
TEMP: mean:30.347971237818296, sd:0.3199846547495235, max:30.8678624363557, min:29.905750879452253' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is
65
The engagement level of the following phrase: 'The following data were obtained from one Female st

  8%|▊         | 12/152 [00:11<02:15,  1.03it/s]Both `max_new_tokens` (=3) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The engagement level of the following phrase: 'The following data were obtained from one Female student 6 of Maths class 113 in any 10 minutes of 80. The statistics (mean, standard deviation, maximum, and minimum values) of heart rate (HR), acceleration (ACC), electrodermal activity (EDA), and skin temperature (TEMP) per minute are as follows:
HR: mean:83.35921461218322, sd:8.397013363860847, max:100.06608154863916, min:72.72832143410352
ACC: mean:41.98073290928864, sd:10.445176290550542, max:58.93197932209945, min:22.881165804367864
EDA: mean:0.3947527580479025, sd:0.0767171970938196, max:0.5231955502509846, min:0.2902127865505162
TEMP: mean:30.289794051438825, sd:0.2133736755752162, max:30.581977248313397, min:29.86357314178509' is
            

 Engagement
            
 Disengagement
            
 Neutral
            
 Cannot be determined
            

Solution: The correct option is



The engagement level of the following phrase: 'The following data were obtained from one Female 

  8%|▊         | 12/152 [00:12<02:29,  1.07s/it]


KeyboardInterrupt: 

In [23]:
# =========================================
# Extract original feature information from text
# =========================================
import re
import pandas as pd
import os
import glob

# Reload the saved CSV file
output_path = "output.csv"
df = pd.read_csv(output_path)
df

# Define regex pattern for feature extraction
pattern = re.compile(
    r"(?P<Gender>Male|Female) student (?P<Pid>\d+) of (?P<Subject>\w+) class (?P<Class_id>\d+).*?"
    r"HR: mean:(?P<HR_mean>[\d\.\-]+), sd:(?P<HR_sd>[\d\.\-]+), max:(?P<HR_max>[\d\.\-]+), min:(?P<HR_min>[\d\.\-]+).*?"
    r"ACC: mean:(?P<ACC_mean>[\d\.\-]+), sd:(?P<ACC_sd>[\d\.\-]+), max:(?P<ACC_max>[\d\.\-]+), min:(?P<ACC_min>[\d\.\-]+).*?"
    r"EDA: mean:(?P<EDA_mean>[\d\.\-]+), sd:(?P<EDA_sd>[\d\.\-]+), max:(?P<EDA_max>[\d\.\-]+), min:(?P<EDA_min>[\d\.\-]+).*?"
    r"TEMP: mean:(?P<TEMP_mean>[\d\.\-]+), sd:(?P<TEMP_sd>[\d\.\-]+), max:(?P<TEMP_max>[\d\.\-]+), min:(?P<TEMP_min>[\d\.\-]+)",
    re.DOTALL
)

# Extract feature values from the text column
def extract_info(text):
    match = pattern.search(text)
    if match:
        return match.groupdict()
    else:
        return {key: None for key in [
            "Gender", "Pid", "Subject", "Class_id",
            "HR_mean", "HR_sd", "HR_max", "HR_min",
            "ACC_mean", "ACC_sd", "ACC_max", "ACC_min",
            "EDA_mean", "EDA_sd", "EDA_max", "EDA_min",
            "TEMP_mean", "TEMP_sd", "TEMP_max", "TEMP_min"
        ]}

# Create a new DataFrame with extracted features
df_new = df["text"].apply(extract_info).apply(pd.Series)

# Convert numerical columns to appropriate data types
num_cols = [col for col in df_new.columns if col not in ["Gender", "Subject"]]
for col in num_cols:
    df_new[col] = pd.to_numeric(df_new[col].astype(str), errors="coerce")

# Concatenate original predictions with extracted features
df_2 = pd.concat([df, df_new], axis=1)

df_2


Unnamed: 0,text,y_true,y_pred,Gender,Pid,Subject,Class_id,HR_mean,HR_sd,HR_max,...,ACC_max,ACC_min,EDA_mean,EDA_sd,EDA_max,EDA_min,TEMP_mean,TEMP_sd,TEMP_max,TEMP_min
0,The engagement level of the following phrase: ...,engagement,none,Male,1,Maths,137,91.584648,10.313693,115.201692,...,56.967930,28.253183,0.427094,0.076039,0.521400,0.298515,30.253046,0.167160,30.514100,29.988565
1,The engagement level of the following phrase: ...,engagement,none,Male,1,English,151,84.193853,6.654104,97.551423,...,44.696725,17.325579,0.464463,0.076244,0.646473,0.380027,30.386951,0.358989,31.000000,29.724520
2,The engagement level of the following phrase: ...,engagement,none,Male,1,Health,117,84.279971,7.691733,98.062928,...,43.705987,19.951179,0.435500,0.108370,0.594157,0.202836,30.308304,0.124761,30.591946,30.178098
3,The engagement level of the following phrase: ...,engagement,none,Male,1,Politics,129,91.343460,9.518220,104.654744,...,45.432641,20.898316,0.384859,0.070191,0.501766,0.265913,30.300148,0.296621,30.958096,29.962079
4,The engagement level of the following phrase: ...,engagement,none,Male,1,Maths,159,88.373415,7.734443,97.725544,...,63.600845,30.968236,0.460357,0.085885,0.558078,0.283904,30.307606,0.170803,30.563288,29.980813
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,The engagement level of the following phrase: ...,neutral,none,Male,32,Maths,105,81.389655,8.853922,91.546612,...,51.026943,0.016275,0.508131,0.086477,0.622517,0.349736,30.044320,0.217737,30.479246,29.704327
194,The engagement level of the following phrase: ...,neutral,none,Male,33,English,132,77.314942,7.892414,93.740018,...,52.641156,12.519076,0.564439,0.101832,0.691897,0.364296,30.139404,0.230883,30.603864,29.915274
195,The engagement level of the following phrase: ...,neutral,none,Female,34,Science,141,85.606772,10.227166,110.622081,...,44.300865,9.484469,0.595224,0.100745,0.843184,0.493474,30.087860,0.401906,30.747717,29.612012
196,The engagement level of the following phrase: ...,neutral,none,Female,38,Politics,125,78.932416,14.687006,100.615368,...,42.407756,6.262133,0.540766,0.106344,0.742356,0.351523,30.147036,0.345304,30.747467,29.632293


In [24]:
# =========================================
# SHAP analysis for Engagement vs. Disengagement
# =========================================
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
import shap
import numpy as np

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix
)

# Prepare data (exclude Neutral predictions)
df_3 = df_2[df_2["y_pred"] != "neutral"]

# Binary label mapping
mapping = {"engagement": 1, "disengagement": 0}

feature_cols = [
    "Gender", "Subject",
    "HR_mean", "HR_sd", "HR_max", "HR_min",
    "ACC_mean", "ACC_sd", "ACC_max", "ACC_min",
    "EDA_mean", "EDA_sd", "EDA_max", "EDA_min",
    "TEMP_mean", "TEMP_sd", "TEMP_max", "TEMP_min"
]

target_col = "y_pred"

df_3_train = df_3.copy()
df_3_train["y_pred"] = df_3_train["y_pred"].map(mapping)

X = df_3_train[feature_cols]
y = df_3_train[target_col]

# One-hot encoding for categorical variables
X = pd.get_dummies(X, columns=["Gender", "Subject"], prefix=["Gender", "Subject"])

# Train LightGBM model
train_data = lgb.Dataset(X, y)

params = {
    "objective": "binary",
    "metric": "binary_error",
    "boosting_type": "gbdt",
    "learning_rate": 0.1,
    "num_leaves": 31,
    "verbose": -1
}

model = lgb.train(params, train_data, num_boost_round=200)

# Model evaluation
y_pred_prob = model.predict(X)
y_pred_binary = (y_pred_prob >= 0.5).astype(int)

accuracy = accuracy_score(y, y_pred_binary)
precision = precision_score(y, y_pred_binary)
recall = recall_score(y, y_pred_binary)
f1 = f1_score(y, y_pred_binary)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y, y_pred_binary))

print("\nClassification Report:")
print(classification_report(
    y, y_pred_binary,
    target_names=["Disengagement", "Engagement"]
))

# =========================================
# SHAP visualization (correct predictions only)
# =========================================
mask_correct = (y_pred_binary == y)
X_true = X[mask_correct]
y_true = y[mask_correct]

explainer = shap.TreeExplainer(model)
shap_values = explainer(X_true)

# SHAP summary plot
plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values, X_true, show=True)

# SHAP bar plot
plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values, X_true, plot_type="bar", show=True)


ValueError: Input y_true contains NaN.