In [36]:
import pandas as pd
import json

# Load the arguments data (assuming it has columns 'id', 'premise')
arguments_df = pd.read_csv('/kaggle/input/projectvalues/Project_Data/arguments-training.tsv', delimiter='\t')

# Load the value labels data (assuming it has columns 'id', 'value_label')
labels_df = pd.read_csv('/kaggle/input/projectvalues/Project_Data/labels-training.tsv', delimiter='\t')

# Load the value descriptions from a JSON file
with open('/kaggle/input/value-categories-2/value-categories.json', 'r') as file:
    value_descriptions = json.load(file)

labels_long_df = labels_df.melt(id_vars='Argument ID', var_name='value_category', value_name='label')
labels_long_df['label'] = labels_long_df['label'].replace({0: 2, 1: 0})
# labels_long_df = labels_long_df[labels_long_df['value_category'] != 'Universalism: objectivity']
labels_long_df['value_description'] = labels_long_df['value_category'].apply(lambda x: value_descriptions[x.lower().replace(": ","-")]['personal-motivation'])
combined_df = pd.merge(arguments_df, labels_long_df, left_on='Argument ID', right_on='Argument ID')

In [37]:
labels_long_df

Unnamed: 0,Argument ID,value_category,label,value_description
0,A01002,Self-direction: thought,2,"It is important to be creative, forming own op..."
1,A01005,Self-direction: thought,2,"It is important to be creative, forming own op..."
2,A01006,Self-direction: thought,2,"It is important to be creative, forming own op..."
3,A01007,Self-direction: thought,2,"It is important to be creative, forming own op..."
4,A01008,Self-direction: thought,2,"It is important to be creative, forming own op..."
...,...,...,...,...
107855,E08016,Universalism: objectivity,2,"Having a better hold on numbers, relying on sc..."
107856,E08017,Universalism: objectivity,0,"Having a better hold on numbers, relying on sc..."
107857,E08018,Universalism: objectivity,2,"Having a better hold on numbers, relying on sc..."
107858,E08019,Universalism: objectivity,0,"Having a better hold on numbers, relying on sc..."


In [38]:
final_df=combined_df
df_majority = final_df[final_df.label == 2]
df_minority = final_df[final_df.label == 0]

# Determine the number of instances you want to keep from the majority class
# For example, you might want to have a 1:1 ratio
number_of_instances = len(df_minority)

# Downsample the majority class
df_majority_downsampled = df_majority.sample(n=number_of_instances)

# Combine the downsampled majority class with the minority class to get a balanced dataset
balanced_df = pd.concat([df_majority_downsampled, df_minority])

# Shuffle the dataset to mix the two classes well
balanced_df = balanced_df.sample(frac=1).reset_index(drop=True)

In [39]:
from torch.utils.data import DataLoader, TensorDataset, random_split
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.nn.functional import cross_entropy
from transformers import AutoModelForSequenceClassification, AutoTokenizer,AutoConfig
import torch
from sklearn.metrics import accuracy_score
model_name = 'pepa/roberta-base-snli'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=model.to(device)
inputs = tokenizer(list(balanced_df['Premise']), list(balanced_df['value_description']), padding=True, truncation=True, return_tensors="pt")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
labels = torch.tensor(balanced_df['label'].values)
print(labels)

tensor([2, 0, 0,  ..., 2, 2, 2])


In [40]:
balanced_df

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,value_category,label,value_description
0,A27196,We should subsidize embryonic stem cell research,against,we should not subsidize embryonic stem cell re...,Stimulation,2,"Always looking for something new to do, doing ..."
1,A24307,We should cancel pride parades,against,"they shouldnt be banned, it's just people comi...",Universalism: concern,0,"Protecting the weak and vulnerable, care about..."
2,A30210,We should prohibit school prayer,against,school prayer adds structure to a students lif...,Tradition,0,"Maintain traditional beliefs and values, follo..."
3,E07080,We need an inclusive and pluralistic European ...,against,Europe is full and cannot accept more illegal ...,Self-direction: action,2,It is important to make own decisions about li...
4,A12065,We should stop the development of autonomous cars,in favor of,they can be more dangerous than human drivers,Face,2,"Does not want to be shamed by others, protecti..."
...,...,...,...,...,...,...,...
36735,A18328,We should subsidize journalism,in favor of,keeping journalist in work helps the nation ke...,Universalism: nature,2,"Care about nature for nature's sake, protect t..."
36736,A25289,We should prohibit school prayer,against,every religious person has the right to pray,Achievement,2,"Being ambitious, successful and being admired ..."
36737,A28324,We should adopt a zero-tolerance policy in sch...,against,a zero-tolerance policy in schools shouldn't b...,Stimulation,2,"Always looking for something new to do, doing ..."
36738,A29347,We should introduce compulsory voting,against,the penalties on not voting would bring backla...,Power: dominance,2,"Want people to follow you, being the most infl..."


In [59]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW, get_linear_schedule_with_warmup, AutoModelForSequenceClassification, AutoTokenizer
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# Define contrastive loss function
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim=True)
        contrastive_loss = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return contrastive_loss

# Load the data

# Load tokenizer and model
model_name = 'pepa/roberta-base-snli'  # Assuming you're using RoBERTa
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Tokenize input data
inputs = tokenizer(list(balanced_df['Premise']), list(balanced_df['value_description']),
                   padding=True, truncation=True, return_tensors="pt")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
labels = torch.tensor(balanced_df['label'].values)

# Create dataset and dataloader
train_dataset = TensorDataset(input_ids, attention_mask, labels)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=8)

# Define optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=2e-5)
epochs = 3
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# Define a training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    steps = 0

    for batch in train_dataloader:
        b_input_ids, b_attention_mask, b_labels = [t.to(device) for t in batch]
        
        optimizer.zero_grad()
        
        # Forward pass through RoBERTa base model
        outputs = model(input_ids=b_input_ids, attention_mask=b_attention_mask, labels=b_labels)
        logits = outputs.logits
        
        # Calculate classification loss
        classification_loss = outputs.loss
        
        # Generate contrastive learning targets
        with torch.no_grad():
            shuffled_indices = torch.randperm(b_input_ids.size(0))
            shuffled_input_ids = b_input_ids[shuffled_indices]
            shuffled_attention_mask = b_attention_mask[shuffled_indices]
        
        # Forward pass through RoBERTa base model with shuffled inputs
        shuffled_outputs = model(input_ids=shuffled_input_ids, attention_mask=shuffled_attention_mask)
        shuffled_logits = shuffled_outputs.logits
        
        # Calculate contrastive loss
        contrastive_criterion = ContrastiveLoss()
        contrastive_loss = contrastive_criterion(logits, shuffled_logits, label=torch.ones_like(logits[:, 0]))
        
        # Total combined loss
        combined_loss = classification_loss + contrastive_loss

        combined_loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += combined_loss.item()
        steps += 1

        if steps % 200 == 0:
            print(f"Epoch {epoch + 1}, Step {steps}, Loss: {total_loss / steps:.4f}")

print("Training complete")




Epoch 1, Step 200, Loss: 0.8288
Epoch 1, Step 400, Loss: 0.7563
Epoch 1, Step 600, Loss: 0.7235
Epoch 1, Step 800, Loss: 0.7022
Epoch 1, Step 1000, Loss: 0.6867
Epoch 1, Step 1200, Loss: 0.6695
Epoch 1, Step 1400, Loss: 0.6576
Epoch 1, Step 1600, Loss: 0.6474
Epoch 1, Step 1800, Loss: 0.6371
Epoch 1, Step 2000, Loss: 0.6309
Epoch 1, Step 2200, Loss: 0.6244
Epoch 1, Step 2400, Loss: 0.6193
Epoch 1, Step 2600, Loss: 0.6134
Epoch 1, Step 2800, Loss: 0.6081
Epoch 1, Step 3000, Loss: 0.6047
Epoch 1, Step 3200, Loss: 0.5996
Epoch 1, Step 3400, Loss: 0.5948
Epoch 1, Step 3600, Loss: 0.5905
Epoch 1, Step 3800, Loss: 0.5874
Epoch 1, Step 4000, Loss: 0.5834
Epoch 1, Step 4200, Loss: 0.5801
Epoch 1, Step 4400, Loss: 0.5773
Training complete


In [60]:
# Define the file path to save the model
model_path = "contrastive_entailment_model.pth"

# Save the model state dictionary
torch.save(model, model_path)


In [61]:
import pandas as pd
import json

# Load the arguments data (assuming it has columns 'id', 'premise')
arguments_df = pd.read_csv('/kaggle/input/projectvalues/Project_Data/arguments-validation.tsv', delimiter='\t')

# Load the value labels data (assuming it has columns 'id', 'value_label')
labels_df = pd.read_csv('/kaggle/input/projectvalues/Project_Data/labels-validation.tsv', delimiter='\t')

# Load the value descriptions from a JSON file
with open('/kaggle/input/value-categories-2/value-categories.json', 'r') as file:
    value_descriptions = json.load(file)
    
labels_long_df = labels_df.melt(id_vars='Argument ID', var_name='value_category', value_name='label')
labels_long_df['label'] = labels_long_df['label'].replace({0: 2, 1: 0})
# labels_long_df = labels_long_df[labels_long_df['value_category'] != 'Universalism: objectivity']
labels_long_df['value_description'] = labels_long_df['value_category'].apply(lambda x: value_descriptions[x.lower().replace(": ","-")]['personal-motivation'])
combined_df_val = pd.merge(arguments_df, labels_long_df, left_on='Argument ID', right_on='Argument ID')
combined_df_val['Argument'] = combined_df_val.apply(
    lambda row: f"{row['Stance']} {row['Conclusion']} by saying {row['Premise']}",
    axis=1
)

In [62]:
test_inputs = tokenizer(list(combined_df_val['Argument']), list(combined_df_val['value_description']), padding=True, truncation=True, return_tensors="pt")
test_input_ids = test_inputs['input_ids'].to(device)
test_attention_mask = test_inputs['attention_mask'].to(device)
test_labels = torch.tensor(combined_df_val['label'].values).to(device)

test_dataset = TensorDataset(test_input_ids, test_attention_mask, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=16)

# Function to evaluate the model on the test set
def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            b_input_ids, b_attention_mask, b_labels = [b.to(device) for b in batch]

            outputs = model(b_input_ids, attention_mask=b_attention_mask)
            logits = outputs.logits

            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            batch_predictions = np.argmax(logits, axis=1)
            predictions.extend(batch_predictions)
            true_labels.extend(label_ids)

    return predictions, true_labels


In [67]:
from torch.utils.data import Dataset,DataLoader
model=model.to(device)
class CategoryDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=512):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        premise = item['Argument']
        description = item['value_description']
        label = item['label']
        category = item['value_category']
        
        # Tokenize the text pair
        encoding = self.tokenizer(premise, description, add_special_tokens=True, 
                                  max_length=self.max_len, padding='max_length', 
                                  truncation=True, return_tensors="pt")
        
        input_ids = encoding['input_ids'].squeeze(0)  # Remove the batch dimension
        attention_mask = encoding['attention_mask'].squeeze(0)
        
        return input_ids, attention_mask, torch.tensor(label, dtype=torch.float), category
    
dataset = CategoryDataset(combined_df_val,tokenizer)
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

def evaluate_model(model, dataloader, device):
    model.eval()
    all_logits = []
    all_labels = []
    all_categories = []
    all_predictions = []
    print(len(dataloader))

    with torch.no_grad():
        for (j,(input_ids, attention_mask, labels, categories)) in enumerate(dataloader):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Calculate probabilities using softmax for multi-class
            probabilities = torch.softmax(logits, dim=1)
            predictions = torch.argmax(probabilities, dim=1)  # Initial predictions

            # Adjust predictions where class '1' should be ignored
            for i, pred in enumerate(predictions):
                if pred == 1:
                    # Get probabilities for classes 0 and 2
                    prob_0 = probabilities[i, 0]
                    prob_2 = probabilities[i, 2]
                    # Choose the class (either 0 or 2) with the highest probability excluding class 1
                    predictions[i] = 0 if prob_0 > prob_2 else 2

            # Store the results
            all_logits.append(logits.cpu())
            all_labels.append(labels.cpu())
            all_predictions.append(predictions.cpu())
            all_categories.append(categories)  # Assuming categories can be batched directly
            print(j)

    # Concatenate results from all batches
    all_logits = torch.cat(all_logits).numpy()
    all_labels = torch.cat(all_labels).numpy()
    all_predictions = torch.cat(all_predictions).numpy()
    all_categories = np.concatenate(all_categories)

    return all_logits, all_labels, all_predictions, all_categories

# Assuming model, dataloader, and device are defined and properly setup
logits, labels, predictions, categories = evaluate_model(model, dataloader, device)





1
2370
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275

In [69]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
labels = labels.astype(int)
f1_scores = {}
accuracy_scores = {}
precision_scores = {}
recall_scores = {}
unique_categories = np.unique(categories)  # Ensure this matches the category data type returned

for category in unique_categories:
    category_mask = (categories == category)
    cat_labels = labels[category_mask]
    cat_predictions = predictions[category_mask]  # Use predictions here, not logits

    # Ensure that the shape of cat_labels and cat_predictions is one-dimensional
#     cat_labels = np.squeeze(cat_labels)
#     cat_predictions = np.squeeze(cat_predictions)

    # Calculate F1 score, accuracy, precision, and recall for the category
    f1 = f1_score(cat_labels, cat_predictions, average='macro')
    acc = accuracy_score(cat_labels, cat_predictions)
    precision = precision_score(cat_labels, cat_predictions, average='macro')
    recall = recall_score(cat_labels, cat_predictions, average='macro')

    f1_scores[category] = f1
    accuracy_scores[category] = acc
    precision_scores[category] = precision
    recall_scores[category] = recall

# Print scores for each category
avg_f1 = 0
avg_acc = 0
avg_precision = 0
avg_recall = 0

for category in unique_categories:
    print(f"Metrics for {category}:")
    print(f"  F1 Score: {f1_scores[category]:.4f}")
    print(f"  Accuracy: {accuracy_scores[category]:.4f}")
    print(f"  Precision: {precision_scores[category]:.4f}")
    print(f"  Recall: {recall_scores[category]:.4f}")
    
    avg_f1 += f1_scores[category]
    avg_acc += accuracy_scores[category]
    avg_precision += precision_scores[category]
    avg_recall += recall_scores[category]

# Calculate and print average scores
num_categories = len(unique_categories)
avg_f1 /= num_categories
avg_acc /= num_categories
avg_precision /= num_categories
avg_recall /= num_categories

print(f"Average F1 Score: {avg_f1:.4f}")
print(f"Average Accuracy: {avg_acc:.4f}")
print(f"Average Precision: {avg_precision:.4f}")
print(f"Average Recall: {avg_recall:.4f}")

Metrics for Achievement:
  F1 Score: 0.7155
  Accuracy: 0.7342
  Precision: 0.7138
  Recall: 0.7493
Metrics for Benevolence: caring:
  F1 Score: 0.4395
  Accuracy: 0.4557
  Precision: 0.6228
  Recall: 0.5757
Metrics for Benevolence: dependability:
  F1 Score: 0.4540
  Accuracy: 0.5243
  Precision: 0.5243
  Recall: 0.5500
Metrics for Conformity: interpersonal:
  F1 Score: 0.5966
  Accuracy: 0.9509
  Precision: 0.5974
  Recall: 0.5958
Metrics for Conformity: rules:
  F1 Score: 0.4519
  Accuracy: 0.4525
  Precision: 0.5892
  Recall: 0.5970
Metrics for Face:
  F1 Score: 0.5330
  Accuracy: 0.9151
  Precision: 0.5655
  Recall: 0.5269
Metrics for Hedonism:
  F1 Score: 0.6983
  Accuracy: 0.9420
  Precision: 0.7132
  Recall: 0.6856
Metrics for Humility:
  F1 Score: 0.5186
  Accuracy: 0.8792
  Precision: 0.5185
  Recall: 0.5187
Metrics for Power: dominance:
  F1 Score: 0.6273
  Accuracy: 0.8618
  Precision: 0.6129
  Recall: 0.6511
Metrics for Power: resources:
  F1 Score: 0.7003
  Accuracy: 0.88

In [70]:
import pandas as pd
import json

# Load the arguments data (assuming it has columns 'id', 'premise')
arguments_df = pd.read_csv('/kaggle/input/projectvalues/Project_Data/arguments-test.tsv', delimiter='\t')

# Load the value labels data (assuming it has columns 'id', 'value_label')
labels_df = pd.read_csv('/kaggle/input/projectvalues/Project_Data/labels-test.tsv', delimiter='\t')

# Load the value descriptions from a JSON file
with open('/kaggle/input/value-categories-2/value-categories.json', 'r') as file:
    value_descriptions = json.load(file)
    
labels_long_df = labels_df.melt(id_vars='Argument ID', var_name='value_category', value_name='label')
labels_long_df['label'] = labels_long_df['label'].replace({0: 2, 1: 0})
# labels_long_df = labels_long_df[labels_long_df['value_category'] != 'Universalism: objectivity']
labels_long_df['value_description'] = labels_long_df['value_category'].apply(lambda x: value_descriptions[x.lower().replace(": ","-")]['personal-motivation'])
combined_df_test = pd.merge(arguments_df, labels_long_df, left_on='Argument ID', right_on='Argument ID')
combined_df_test['Argument'] = combined_df_test.apply(
    lambda row: f"{row['Stance']} {row['Conclusion']} by saying {row['Premise']}",
    axis=1
)

In [71]:
test_inputs = tokenizer(list(combined_df_test['Argument']), list(combined_df_test['value_description']), padding=True, truncation=True, return_tensors="pt")
test_input_ids = test_inputs['input_ids'].to(device)
test_attention_mask = test_inputs['attention_mask'].to(device)
test_labels = torch.tensor(combined_df_test['label'].values).to(device)

test_dataset = TensorDataset(test_input_ids, test_attention_mask, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=16)

# Function to evaluate the model on the test set
def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            b_input_ids, b_attention_mask, b_labels = [b.to(device) for b in batch]

            outputs = model(b_input_ids, attention_mask=b_attention_mask)
            logits = outputs.logits

            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            batch_predictions = np.argmax(logits, axis=1)
            predictions.extend(batch_predictions)
            true_labels.extend(label_ids)

    return predictions, true_labels

# Evaluate the model
# predictions, true_labels = evaluate_model(model, test_dataloader)

# # Calculate accuracy and F1 score
# accuracy = accuracy_score(true_labels, predictions)
# f1 = f1_score(true_labels, predictions, average='binary')


In [75]:
from torch.utils.data import Dataset,DataLoader
print(1)
class CategoryDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=512):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        premise = item['Argument']
        description = item['value_description']
        label = item['label']
        category = item['value_category']
        
        # Tokenize the text pair
        encoding = self.tokenizer(premise, description, add_special_tokens=True, 
                                  max_length=self.max_len, padding='max_length', 
                                  truncation=True, return_tensors="pt")
        
        input_ids = encoding['input_ids'].squeeze(0)  # Remove the batch dimension
        attention_mask = encoding['attention_mask'].squeeze(0)
        
        return input_ids, attention_mask, torch.tensor(label, dtype=torch.float), category
    
dataset = CategoryDataset(combined_df_test,tokenizer)
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

def evaluate_model(model, dataloader, device):
    model.eval()
    all_logits = []
    all_labels = []
    all_categories = []
    all_predictions = []
    print(len(dataloader))

    with torch.no_grad():
        for (j,(input_ids, attention_mask, labels, categories)) in enumerate(dataloader):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Calculate probabilities using softmax for multi-class
            probabilities = torch.softmax(logits, dim=1)
            predictions = torch.argmax(probabilities, dim=1)  # Initial predictions

            # Adjust predictions where class '1' should be ignored
            for i, pred in enumerate(predictions):
                if pred == 1:
                    # Get probabilities for classes 0 and 2
                    prob_0 = probabilities[i, 0]
                    prob_2 = probabilities[i, 2]
                    # Choose the class (either 0 or 2) with the highest probability excluding class 1
                    predictions[i] = 0 if prob_0 > prob_2 else 2

            # Store the results
            all_logits.append(logits.cpu())
            all_labels.append(labels.cpu())
            all_predictions.append(predictions.cpu())
            all_categories.append(categories)  # Assuming categories can be batched directly
            if(j%100==0):
                print(j)

    # Concatenate results from all batches
    all_logits = torch.cat(all_logits).numpy()
    all_labels = torch.cat(all_labels).numpy()
    all_predictions = torch.cat(all_predictions).numpy()
    all_categories = np.concatenate(all_categories)

    return all_logits, all_labels, all_predictions, all_categories

# Get the predictions, true labels, and categories from the evaluation
logits, labels, predictions, categories = evaluate_model(model, dataloader, device)

1
1970
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900


In [78]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
labels = labels.astype(int)
f1_scores = {}
accuracy_scores = {}
precision_scores = {}
recall_scores = {}
unique_categories = np.unique(categories)  # Ensure this matches the category data type returned

for category in unique_categories:
    category_mask = (categories == category)
    cat_labels = labels[category_mask]
    cat_predictions = predictions[category_mask]  # Use predictions here, not logits

    # Ensure that the shape of cat_labels and cat_predictions is one-dimensional
#     cat_labels = np.squeeze(cat_labels)
#     cat_predictions = np.squeeze(cat_predictions)

    # Calculate F1 score, accuracy, precision, and recall for the category
    f1 = f1_score(cat_labels, cat_predictions, average='macro')
    acc = accuracy_score(cat_labels, cat_predictions)
    precision = precision_score(cat_labels, cat_predictions, average='macro')
    recall = recall_score(cat_labels, cat_predictions, average='macro')

    f1_scores[category] = f1
    accuracy_scores[category] = acc
    precision_scores[category] = precision
    recall_scores[category] = recall

# Print scores for each category
avg_f1 = 0
avg_acc = 0
avg_precision = 0
avg_recall = 0

for category in unique_categories:
    print(f"Metrics for {category}:")
    print(f"  F1 Score: {f1_scores[category]:.4f}")
    print(f"  Accuracy: {accuracy_scores[category]:.4f}")
    print(f"  Precision: {precision_scores[category]:.4f}")
    print(f"  Recall: {recall_scores[category]:.4f}")
    
    avg_f1 += f1_scores[category]
    avg_acc += accuracy_scores[category]
    avg_precision += precision_scores[category]
    avg_recall += recall_scores[category]

# Calculate and print average scores
num_categories = len(unique_categories)
avg_f1 /= num_categories
avg_acc /= num_categories
avg_precision /= num_categories
avg_recall /= num_categories
print(num_categories)

print(f"Average F1 Score: {avg_f1:.4f}")
print(f"Average Accuracy: {avg_acc:.4f}")
print(f"Average Precision: {avg_precision:.4f}")
print(f"Average Recall: {avg_recall:.4f}")

Metrics for Achievement:
  F1 Score: 0.6690
  Accuracy: 0.7246
  Precision: 0.6619
  Recall: 0.6850
Metrics for Benevolence: caring:
  F1 Score: 0.4023
  Accuracy: 0.4023
  Precision: 0.5980
  Recall: 0.5985
Metrics for Benevolence: dependability:
  F1 Score: 0.5263
  Accuracy: 0.6567
  Precision: 0.5589
  Recall: 0.6485
Metrics for Conformity: interpersonal:
  F1 Score: 0.5546
  Accuracy: 0.9651
  Precision: 0.6844
  Recall: 0.5358
Metrics for Conformity: rules:
  F1 Score: 0.3936
  Accuracy: 0.3947
  Precision: 0.6009
  Recall: 0.6178
Metrics for Face:
  F1 Score: 0.5245
  Accuracy: 0.9143
  Precision: 0.5368
  Recall: 0.5209
Metrics for Hedonism:
  F1 Score: 0.6029
  Accuracy: 0.9772
  Precision: 0.6183
  Recall: 0.5913
Metrics for Humility:
  F1 Score: 0.5176
  Accuracy: 0.9327
  Precision: 0.5272
  Recall: 0.5150
Metrics for Power: dominance:
  F1 Score: 0.5978
  Accuracy: 0.8610
  Precision: 0.5820
  Recall: 0.6380
Metrics for Power: resources:
  F1 Score: 0.7146
  Accuracy: 0.90