In [1]:
import ast
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
from sentence_transformers import SentenceTransformer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df_sbert_classifier = pd.read_csv("cleaned_dataframe_for_sbert.csv")

grouped_all_responses_by_goal_sbert_classifier = pd.read_csv("grouped_all_responses_by_goal_for_sbert.csv")

human_llm_processed_responses_for_sbert = pd.read_csv("final_human_llm_processed_responses_for_sbert.csv")

In [3]:
# make all the rows of the "llm_responses" and "human_responses" columns into a list
human_responses = []
for index, row in human_llm_processed_responses_for_sbert.iterrows():
    human_responses.append(ast.literal_eval(row["human_responses"]))

llm_responses = []
for index, row in human_llm_processed_responses_for_sbert.iterrows():
    llm_responses.append(ast.literal_eval(row["llm_responses"]))

human_responses = [item for sublist in human_responses for item in sublist]
llm_responses = [item for sublist in llm_responses for item in sublist]


In [4]:
preferred_response_list = df_sbert_classifier["preferred_response"].tolist()
preferred_response_list = [x.strip() for x in preferred_response_list]
# merge the whole preferred_response_list into a single string

non_preferred_response_list = df_sbert_classifier["non_preferred_response"].tolist()
non_preferred_response_list = [x.strip() for x in non_preferred_response_list]

In [5]:
len(preferred_response_list), len(non_preferred_response_list)

(3120, 3120)

In [6]:
def load_data_for_sbert(responses_group1, responses_group2):
    # Create labels: 1 for preferred, 0 for non-preferred
    texts = responses_group1 + responses_group2
    labels = [1] * len(responses_group1) + [0] * len(responses_group2)
    
    return texts, labels

In [19]:
texts, labels = load_data_for_sbert(human_responses, llm_responses)

X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42, stratify=labels
)

print("Loading SBERT model...")
model_name = 'all-MiniLM-L6-v2'  # You can choose other models like 'paraphrase-distilroberta-base-v1'
sbert_model = SentenceTransformer(model_name)

print("Creating embeddings...")
X_train_embeddings = sbert_model.encode(X_train)
X_test_embeddings = sbert_model.encode(X_test)

# Step 4: Train a classifier on the embeddings
print("Training classifier...")
# We use multi_class='multinomial' for multiclass classification
classifier = LogisticRegression(multi_class='multinomial', max_iter=1000, solver='lbfgs')
classifier.fit(X_train_embeddings, y_train)

# Step 5: Evaluate the classifier
print("Evaluating classifier...")
y_pred = classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Non-preferred", "Preferred"]))

cv_scores = cross_val_score(classifier, X_train_embeddings, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean CV score: {cv_scores.mean():.4f}")



Loading SBERT model...
Creating embeddings...
Training classifier...




Evaluating classifier...
Accuracy: 0.8200

Classification Report:
               precision    recall  f1-score   support

Non-preferred       0.79      0.88      0.83       125
    Preferred       0.86      0.76      0.81       125

     accuracy                           0.82       250
    macro avg       0.82      0.82      0.82       250
 weighted avg       0.82      0.82      0.82       250





Cross-validation scores: [0.82       0.82       0.855      0.86432161 0.83919598]
Mean CV score: 0.8397


In [20]:
# Alternative classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

# Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_embeddings, y_train)

In [21]:


# SVM
svm_classifier = SVC(probability=True, random_state=42)
svm_classifier.fit(X_train_embeddings, y_train)

In [22]:


# Neural Network
nn_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
nn_classifier.fit(X_train_embeddings, y_train)

In [12]:
# Step 5: Evaluate the classifier
print("Evaluating classifier...")
y_pred = rf_classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nRandom Forest Classifier Report:")
print(classification_report(y_test, y_pred, target_names=["Non-preferred", "Preferred"]))

# Step 5: Evaluate the classifier
print("Evaluating classifier...")
y_pred = svm_classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Non-preferred", "Preferred"]))

# Step 5: Evaluate the classifier
print("Evaluating classifier...")
y_pred = nn_classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nMLP Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Non-preferred", "Preferred"]))



Evaluating classifier...
Accuracy: 0.5240

Random Forest Classifier Report:
               precision    recall  f1-score   support

Non-preferred       0.52      0.55      0.53       624
    Preferred       0.53      0.50      0.51       624

     accuracy                           0.52      1248
    macro avg       0.52      0.52      0.52      1248
 weighted avg       0.52      0.52      0.52      1248

Evaluating classifier...
Accuracy: 0.5264

SVM Classification Report:
               precision    recall  f1-score   support

Non-preferred       0.53      0.54      0.53       624
    Preferred       0.53      0.52      0.52       624

     accuracy                           0.53      1248
    macro avg       0.53      0.53      0.53      1248
 weighted avg       0.53      0.53      0.53      1248

Evaluating classifier...
Accuracy: 0.5272

MLP Classification Report:
               precision    recall  f1-score   support

Non-preferred       0.52      0.63      0.57       624
    Pre

In [94]:
grouped_all_responses_by_goal_sbert_classifier

Unnamed: 0,goal,all_raw_responses_list,claude_responses_list,gpt4o_responses_list,llama3_responses_list,LLM_responses_list,human_responses_list
0,both,"['While your presentation had potential, I not...","['While your presentation had potential, I not...","['Your presentation had potential, but there a...",[' I thought your presentation had some good p...,"['While your presentation had potential, I not...","['It could use some work.', ""Bob, let's go to ..."
1,default,"[""I think there's a lot of room for improvemen...","[""I think there's a lot of room for improvemen...",['Your presentation needs a lot of improvement...,[' I thought it could be improved in several a...,"[""I think there's a lot of room for improvemen...","['It was terrible.', ""The presentation wasn't ..."
2,informative,['Your presentation had significant issues wit...,['Your presentation had significant issues wit...,"[""Your presentation wasn't good and needs sign...",[' I thought your presentation was pretty weak...,['Your presentation had significant issues wit...,['You could really work on your public speakin...
3,social,"['You did a great job up there!', 'Your presen...","['You did a great job up there!', 'You did a n...",['Your presentation was interesting and had so...,[' I thought your presentation was very good.'...,"['You did a great job up there!', 'Your presen...","['It was not good.', ""I thought your presentat..."


In [13]:
both_goal_all_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['all_raw_responses_list'].iloc[0])
default_goal_all_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['all_raw_responses_list'].iloc[1])
informative_goal_all_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['all_raw_responses_list'].iloc[2])
social_goal_all_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['all_raw_responses_list'].iloc[3])


# for each item which is a string in above four lists, i want to do the strip() to remove whitespaces 
both_goal_all_responses_list = [x.strip() for x in both_goal_all_responses_list]
default_goal_all_responses_list = [x.strip() for x in default_goal_all_responses_list]
informative_goal_all_responses_list = [x.strip() for x in informative_goal_all_responses_list]
social_goal_all_responses_list = [x.strip() for x in social_goal_all_responses_list]

In [14]:
both_goal_human_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['human_responses_list'].iloc[0])
default_goal_human_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['human_responses_list'].iloc[1])
informative_goal_human_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['human_responses_list'].iloc[2])
social_goal_human_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['human_responses_list'].iloc[3])

both_goal_human_responses_list = [x.strip() for x in both_goal_human_responses_list]
default_goal_human_responses_list = [x.strip() for x in default_goal_human_responses_list]
informative_goal_human_responses_list = [x.strip() for x in informative_goal_human_responses_list]
social_goal_human_responses_list = [x.strip() for x in social_goal_human_responses_list]


both_goal_llm_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['LLM_responses_list'].iloc[0])
default_goal_llm_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['LLM_responses_list'].iloc[1])
informative_goal_llm_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['LLM_responses_list'].iloc[2])
social_goal_llm_responses_list = ast.literal_eval(grouped_all_responses_by_goal_sbert_classifier['LLM_responses_list'].iloc[3])

both_goal_llm_responses_list = [x.strip() for x in both_goal_llm_responses_list]
default_goal_llm_responses_list = [x.strip() for x in default_goal_llm_responses_list]
informative_goal_llm_responses_list = [x.strip() for x in informative_goal_llm_responses_list]
social_goal_llm_responses_list = [x.strip() for x in social_goal_llm_responses_list]

In [15]:
len(both_goal_human_responses_list), len(default_goal_all_responses_list), len(informative_goal_all_responses_list), len(social_goal_all_responses_list)

(156, 312, 312, 312)

In [18]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sentence_transformers import SentenceTransformer
import torch
from sklearn.preprocessing import LabelEncoder

# Step 1: Load your data - updated for multiple groups
def load_data(group1_responses, group2_responses, group3_responses, group4_responses):
    
    # Combine all texts and create corresponding labels
    texts = group1_responses + group2_responses + group3_responses + group4_responses
    
    # Create labels using the group names (or any meaningful label)
    labels = (["group1"] * len(group1_responses) + 
              ["group2"] * len(group2_responses) +
              ["group3"] * len(group3_responses) +
              ["group4"] * len(group4_responses))
    
    return texts, labels

# Step 2: Load data and split into train/test sets
texts, labels = load_data(both_goal_human_responses_list, default_goal_human_responses_list, informative_goal_human_responses_list, social_goal_human_responses_list)

# Convert categorical labels to numerical values
label_encoder = LabelEncoder()
numeric_labels = label_encoder.fit_transform(labels)

# Store label mapping for later reference
label_mapping = {i: label for i, label in enumerate(label_encoder.classes_)}
print("Label mapping:", label_mapping)

X_train, X_test, y_train, y_test = train_test_split(
    texts, numeric_labels, test_size=0.2, random_state=42, stratify=numeric_labels
)

# Step 3: Load SBERT model and create embeddings
print("Loading SBERT model...")
model_name = 'all-MiniLM-L6-v2'  # You can choose other models
sbert_model = SentenceTransformer(model_name)

# Generate embeddings for train and test sets
print("Creating embeddings...")
X_train_embeddings = sbert_model.encode(X_train)
X_test_embeddings = sbert_model.encode(X_test)

# Step 4: Train a classifier on the embeddings
print("Training classifier...")
# We use multi_class='multinomial' for multiclass classification
classifier = LogisticRegression(multi_class='multinomial', max_iter=1000, solver='lbfgs')
classifier.fit(X_train_embeddings, y_train)

# Step 5: Evaluate the classifier
print("Evaluating classifier...")
y_pred = classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Show confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Print classification report with proper class names
target_names = [label_mapping[i] for i in range(len(label_mapping))]
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))


# Step 6: Cross-validation
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(classifier, X_train_embeddings, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean CV score: {cv_scores.mean():.4f}")


# do the random forest classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
# Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_embeddings, y_train)
# Step 5: Evaluate the classifier
print("Evaluating Random Forest classifier...")
y_pred = rf_classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nRandom Forest Classifier Report:")
print(classification_report(y_test, y_pred, target_names=target_names))
# SVM
svm_classifier = SVC(probability=True, random_state=42)
svm_classifier.fit(X_train_embeddings, y_train)
# Step 5: Evaluate the classifier
print("Evaluating SVM classifier...")
y_pred = svm_classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))
# Neural Network
nn_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
nn_classifier.fit(X_train_embeddings, y_train)
# Step 5: Evaluate the classifier
print("Evaluating MLP classifier...")
y_pred = nn_classifier.predict(X_test_embeddings)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nMLP Classification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))



Label mapping: {0: 'group1', 1: 'group2', 2: 'group3', 3: 'group4'}
Loading SBERT model...
Creating embeddings...
Training classifier...
Evaluating classifier...
Accuracy: 0.3920

Confusion Matrix:
[[12  6  6  7]
 [ 8  8  8  7]
 [ 5  6 16  5]
 [ 5 11  2 13]]

Classification Report:
              precision    recall  f1-score   support

      group1       0.40      0.39      0.39        31
      group2       0.26      0.26      0.26        31
      group3       0.50      0.50      0.50        32
      group4       0.41      0.42      0.41        31

    accuracy                           0.39       125
   macro avg       0.39      0.39      0.39       125
weighted avg       0.39      0.39      0.39       125





Cross-validation scores: [0.45       0.36       0.32       0.35       0.36363636]
Mean CV score: 0.3687
Evaluating Random Forest classifier...
Accuracy: 0.3360

Random Forest Classifier Report:
              precision    recall  f1-score   support

      group1       0.22      0.19      0.21        31
      group2       0.23      0.32      0.27        31
      group3       0.57      0.41      0.47        32
      group4       0.42      0.42      0.42        31

    accuracy                           0.34       125
   macro avg       0.36      0.34      0.34       125
weighted avg       0.36      0.34      0.34       125

Evaluating SVM classifier...
Accuracy: 0.3680

SVM Classification Report:
              precision    recall  f1-score   support

      group1       0.31      0.35      0.33        31
      group2       0.24      0.26      0.25        31
      group3       0.54      0.44      0.48        32
      group4       0.42      0.42      0.42        31

    accuracy             