# Apply BERT and 20-Fold Cross-Validation

In [68]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():

    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 2 GPU(s) available.
We will use the GPU: Quadro RTX 8000


In [69]:
#!pip install transformers

## 1. Readand prepare data

In [70]:
import pandas as pd
df = pd.read_excel('Crowdanno_Datenbereinigung_done.xlsx')
df.head()

Unnamed: 0,id_beitrag (mb),id_mb,komm_id (mf),id_mf,Kommentar_id_rtl,id_rtl,comment_id_zon,id_zon,id_crowd,c_text,...,Tatsache_total,unangemessen_total_median,unangemessen_edulow_median,unangemessen_edumed_median,unangemessen_eduhigh_median,bereichernd_total_median,bereichernd_edulow_median,bereichernd_edumed_median,bereichernd_eduhigh_median,Tatsache_total_median
0,831.0,1.0,,,,,,,1,"Tolle Idee. Ich denke, dass dieses Projekt Tei...",...,0.111111,0,0,0,0,1,1,1,1,0
1,841.0,3.0,,,,,,,2,Wohnungstausch sollte auch in belegungsgebunde...,...,0.111111,0,0,0,0,1,0,1,0,0
2,843.0,4.0,,,,,,,3,Lebensbedingungen vor Ort könnten sogar geziel...,...,0.333333,0,0,0,0,1,1,1,1,0
3,850.0,5.0,,,,,,,4,"Super Ideen, da kommt Freude auf mitzumachen! ...",...,0.111111,0,0,0,0,1,1,1,1,0
4,852.0,6.0,,,,,,,5,Wenn der Staat schon Steuermittel ausgeben wil...,...,0.777778,0,0,0,0,1,1,1,1,1


In [71]:
#list(df)

In [72]:
#Get median/majorty vote for class Mixed
df["unangemessen_mixed_median"] = df[['unangemessen_1', "unangemessen_4", "unangemessen_7"]].median(axis=1)
df["unangemessen_mixed_median"].value_counts()

unangemessen_mixed_median
0.0    12135
1.0     1542
Name: count, dtype: int64

In [73]:
df['unangemessen_mixed_median'] = df.unangemessen_mixed_median.astype(int)

In [74]:
df["unangemessen_mixed_median"].value_counts()

unangemessen_mixed_median
0    12135
1     1542
Name: count, dtype: int64

In [75]:
df.shape

(13677, 68)

In [78]:
df.dropna(subset="c_text", inplace=True)

In [79]:
len(df)

13674

In [80]:
df.head()

Unnamed: 0,id_beitrag (mb),id_mb,komm_id (mf),id_mf,Kommentar_id_rtl,id_rtl,comment_id_zon,id_zon,id_crowd,c_text,...,unangemessen_total_median,unangemessen_edulow_median,unangemessen_edumed_median,unangemessen_eduhigh_median,bereichernd_total_median,bereichernd_edulow_median,bereichernd_edumed_median,bereichernd_eduhigh_median,Tatsache_total_median,unangemessen_mixed_median
0,831.0,1.0,,,,,,,1,"Tolle Idee. Ich denke, dass dieses Projekt Tei...",...,0,0,0,0,1,1,1,1,0,0
1,841.0,3.0,,,,,,,2,Wohnungstausch sollte auch in belegungsgebunde...,...,0,0,0,0,1,0,1,0,0,0
2,843.0,4.0,,,,,,,3,Lebensbedingungen vor Ort könnten sogar geziel...,...,0,0,0,0,1,1,1,1,0,0
3,850.0,5.0,,,,,,,4,"Super Ideen, da kommt Freude auf mitzumachen! ...",...,0,0,0,0,1,1,1,1,0,0
4,852.0,6.0,,,,,,,5,Wenn der Staat schon Steuermittel ausgeben wil...,...,0,0,0,0,1,1,1,1,1,0


## 2. Transform text for BERT

In [10]:
sentences = df.c_text.values

In [11]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-uncased", use_fast = False, do_lower_case=True)

In [12]:
# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids = []

# For every sentence...
for sent in sentences:
    # `encode` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    encoded_sent = tokenizer.encode(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'

                        # This function also supports truncation and conversion
                        # to pytorch tensors, but we need to do padding, so we
                        # can't use these features :( .
                        #max_length = 128,          # Truncate all sentences.
                        #return_tensors = 'pt',     # Return pytorch tensors.
                   )

    # Add the encoded sentence to the list.
    input_ids.append(encoded_sent)

# Print sentence 0, now as a list of IDs.
print('Original: ', sentences[0])
print('Token IDs:', input_ids[0])
print("Legth:", len(input_ids[0]))

Token indices sequence length is longer than the specified maximum sequence length for this model (594 > 512). Running this sequence through the model will result in indexing errors


Original:  Tolle Idee. Ich denke, dass dieses Projekt Teil des Stadtforums werden sollte, damit wir darüber weiter nachdenken können.
Token IDs: [102, 11524, 4407, 552, 260, 5327, 806, 347, 971, 2354, 582, 210, 649, 30515, 318, 1370, 806, 865, 228, 449, 10360, 30940, 490, 18226, 367, 316, 552, 103]
Legth: 28


In [13]:
df["sequence_legth"] = [len(sen) for sen in input_ids]

In [14]:
#import matplotlib.pyplot as plt
#plt.bar(df.index, df["sequence_legth"], width=20)

In [15]:
import statistics
print('Max sentence length: ', max([len(sen) for sen in input_ids]))
print('Min sentence length: ', min([len(sen) for sen in input_ids]))
print('Mean sentence length: ', statistics.mean([len(sen) for sen in input_ids]))
print('Median sentence length: ', statistics.median([len(sen) for sen in input_ids]))

Max sentence length:  3546
Min sentence length:  3
Mean sentence length:  77.44771098434987
Median sentence length:  44.0


## 3. Run Models

### 3.0 Initialize BERT

In [16]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from transformers import AutoTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset

In [17]:
# Initialize BERT tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-uncased", use_fast = False, do_lower_case=True)
model = BertForSequenceClassification.from_pretrained(
    "dbmdz/bert-base-german-uncased", # deepset ai
    num_labels = 2, # The number of output labels, which is 2 for binary classification.
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)
# Define a custom PyTorch dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = tokenizer(text, padding='max_length', truncation=True, max_length=512, return_tensors='pt')
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()
        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': torch.tensor(label)}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-german-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### 3.1 Low Education

#### 3.1.1 Low on low

In [18]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Low in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Low in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edulow_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)


Training Fold 1/20


  attn_output = torch.nn.functional.scaled_dot_product_attention(


Accuracy for Fold 1: 0.8845029239766082
Training Fold 2/20
Accuracy for Fold 2: 0.9400584795321637
Training Fold 3/20
Accuracy for Fold 3: 0.9912280701754386
Training Fold 4/20
Accuracy for Fold 4: 0.9912280701754386
Training Fold 5/20
Accuracy for Fold 5: 0.9956140350877193
Training Fold 6/20
Accuracy for Fold 6: 0.9956140350877193
Training Fold 7/20
Accuracy for Fold 7: 0.9956140350877193
Training Fold 8/20
Accuracy for Fold 8: 0.9970760233918129
Training Fold 9/20
Accuracy for Fold 9: 0.9985380116959064
Training Fold 10/20
Accuracy for Fold 10: 0.9970760233918129
Training Fold 11/20
Accuracy for Fold 11: 0.9985380116959064
Training Fold 12/20
Accuracy for Fold 12: 0.9970760233918129
Training Fold 13/20
Accuracy for Fold 13: 0.9970760233918129
Training Fold 14/20
Accuracy for Fold 14: 0.9985380116959064
Training Fold 15/20
Accuracy for Fold 15: 0.9985358711566618
Training Fold 16/20
Accuracy for Fold 16: 0.9970717423133236
Training Fold 17/20
Accuracy for Fold 17: 0.9970717423133236


In [19]:
#create new df
df_low_low = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_low_low.loc['mean'] = df_low_low.mean()
df_low_low

Unnamed: 0,accuracy,f1,recall,precision
0,0.884503,0.406015,0.313953,0.574468
1,0.940058,0.751515,0.72093,0.78481
2,0.991228,0.965517,0.976744,0.954545
3,0.991228,0.965116,0.965116,0.965116
4,0.995614,0.982249,0.965116,1.0
5,0.995614,0.982456,0.965517,1.0
6,0.995614,0.982456,0.965517,1.0
7,0.997076,0.988372,0.977011,1.0
8,0.998538,0.994286,1.0,0.988636
9,0.997076,0.988372,0.977011,1.0


In [20]:
df_low_low.to_excel('df_low_low.xlsx', index=None)

#### 3.1.2 Low on Medium

In [21]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Low in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Medium in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edulow_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8947368421052632
Training Fold 2/20
Accuracy for Fold 2: 0.8786549707602339
Training Fold 3/20
Accuracy for Fold 3: 0.8757309941520468
Training Fold 4/20
Accuracy for Fold 4: 0.868421052631579
Training Fold 5/20
Accuracy for Fold 5: 0.881578947368421
Training Fold 6/20
Accuracy for Fold 6: 0.8991228070175439
Training Fold 7/20
Accuracy for Fold 7: 0.868421052631579
Training Fold 8/20
Accuracy for Fold 8: 0.8757309941520468
Training Fold 9/20
Accuracy for Fold 9: 0.8771929824561403
Training Fold 10/20
Accuracy for Fold 10: 0.8888888888888888
Training Fold 11/20
Accuracy for Fold 11: 0.8932748538011696
Training Fold 12/20
Accuracy for Fold 12: 0.8757309941520468
Training Fold 13/20
Accuracy for Fold 13: 0.8713450292397661
Training Fold 14/20
Accuracy for Fold 14: 0.8771929824561403
Training Fold 15/20
Accuracy for Fold 15: 0.9019033674963397
Training Fold 16/20
Accuracy for Fold 16: 0.8843338213762811
Training Fold 17/20
Accuracy for Fold 17: 0.8

In [22]:
fold_accuracies

[0.8947368421052632,
 0.8786549707602339,
 0.8757309941520468,
 0.868421052631579,
 0.881578947368421,
 0.8991228070175439,
 0.868421052631579,
 0.8757309941520468,
 0.8771929824561403,
 0.8888888888888888,
 0.8932748538011696,
 0.8757309941520468,
 0.8713450292397661,
 0.8771929824561403,
 0.9019033674963397,
 0.8843338213762811,
 0.8740849194729137,
 0.8799414348462665,
 0.890190336749634,
 0.8828696925329429]

In [23]:
#create new df
df_low_med = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })


In [24]:
#Add row with mean
df_low_med.loc['mean'] = df_low_med.mean()

In [25]:
df_low_med

Unnamed: 0,accuracy,f1,recall,precision
0,0.894737,0.555556,0.576923,0.535714
1,0.878655,0.450331,0.53125,0.390805
2,0.875731,0.535519,0.505155,0.569767
3,0.868421,0.444444,0.467532,0.423529
4,0.881579,0.503067,0.525641,0.482353
5,0.899123,0.54902,0.617647,0.494118
6,0.868421,0.488636,0.477778,0.5
7,0.875731,0.47205,0.513514,0.436782
8,0.877193,0.461538,0.521739,0.413793
9,0.888889,0.5,0.575758,0.44186


In [26]:
df_low_med.to_excel('df_low_med.xlsx', index=None)

#### 3.1.3 Low on high

In [27]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Low in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #High in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edulow_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8903508771929824
Training Fold 2/20
Accuracy for Fold 2: 0.8581871345029239
Training Fold 3/20
Accuracy for Fold 3: 0.8698830409356725
Training Fold 4/20
Accuracy for Fold 4: 0.8567251461988304
Training Fold 5/20
Accuracy for Fold 5: 0.8669590643274854
Training Fold 6/20
Accuracy for Fold 6: 0.8874269005847953
Training Fold 7/20
Accuracy for Fold 7: 0.8771929824561403
Training Fold 8/20
Accuracy for Fold 8: 0.8757309941520468
Training Fold 9/20
Accuracy for Fold 9: 0.8713450292397661
Training Fold 10/20
Accuracy for Fold 10: 0.8859649122807017
Training Fold 11/20
Accuracy for Fold 11: 0.8918128654970761
Training Fold 12/20
Accuracy for Fold 12: 0.8903508771929824
Training Fold 13/20
Accuracy for Fold 13: 0.8757309941520468
Training Fold 14/20
Accuracy for Fold 14: 0.8552631578947368
Training Fold 15/20
Accuracy for Fold 15: 0.9033674963396779
Training Fold 16/20
Accuracy for Fold 16: 0.87701317715959
Training Fold 17/20
Accuracy for Fold 17: 0.

In [28]:
#create new df
df_low_high = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_low_high.loc['mean'] = df_low_high.mean()

In [29]:
df_low_high

Unnamed: 0,accuracy,f1,recall,precision
0,0.890351,0.561404,0.551724,0.571429
1,0.858187,0.404908,0.423077,0.388235
2,0.869883,0.491429,0.477778,0.505882
3,0.856725,0.363636,0.4,0.333333
4,0.866959,0.473988,0.465909,0.482353
5,0.887427,0.538922,0.54878,0.529412
6,0.877193,0.5,0.518519,0.482759
7,0.875731,0.458599,0.514286,0.413793
8,0.871345,0.47619,0.493827,0.45977
9,0.885965,0.535714,0.54878,0.523256


In [30]:
df_low_high.to_excel('df_low_high.xlsx', index=None)

#### 3.1.4 Low on mixed

In [31]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Low in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #Mixed in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edulow_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.9137426900584795
Training Fold 2/20
Accuracy for Fold 2: 0.8903508771929824
Training Fold 3/20
Accuracy for Fold 3: 0.8976608187134503
Training Fold 4/20
Accuracy for Fold 4: 0.8859649122807017
Training Fold 5/20
Accuracy for Fold 5: 0.8991228070175439
Training Fold 6/20
Accuracy for Fold 6: 0.8888888888888888
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.9181286549707602
Training Fold 8/20
Accuracy for Fold 8: 0.9078947368421053
Training Fold 9/20
Accuracy for Fold 9: 0.9108187134502924
Training Fold 10/20
Accuracy for Fold 10: 0.8947368421052632
Training Fold 11/20
Accuracy for Fold 11: 0.9122807017543859
Training Fold 12/20
Accuracy for Fold 12: 0.902046783625731
Training Fold 13/20
Accuracy for Fold 13: 0.8903508771929824
Training Fold 14/20
Accuracy for Fold 14: 0.8874269005847953
Training Fold 15/20
Accuracy for Fold 15: 0.9121522693997072
Training Fold 16/20
Accuracy for Fold 16: 0.8828696925329429
Training Fold 17/20
Accuracy for Fold 17: 0.9004392386530015
Training Fold 18/20
Accuracy for Fold 18: 0.8945827232796486
Training Fold 19/20
Accuracy for Fold 19: 0.9209370424597365
Training Fold 20/20
Accuracy for Fold 20: 0.9004392386530015


  _warn_prf(average, modifier, msg_start, len(result))


In [32]:
#create new df
df_low_mix = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_low_mix.loc['mean'] = df_low_mix.mean()

In [33]:
df_low_mix

Unnamed: 0,accuracy,f1,recall,precision
0,0.913743,0.624204,0.662162,0.590361
1,0.890351,0.539877,0.578947,0.505747
2,0.897661,0.588235,0.595238,0.581395
3,0.885965,0.5125,0.546667,0.482353
4,0.899123,0.581818,0.6,0.564706
5,0.888889,0.0,0.0,0.0
6,0.918129,0.674419,0.659091,0.690476
7,0.907895,0.593548,0.676471,0.528736
8,0.910819,0.643275,0.670732,0.617978
9,0.894737,0.560976,0.589744,0.534884


In [34]:
df_low_mix.to_excel('df_low_mix.xlsx', index=None)

## 3.2 Education Medium

#### 3.2.1  Medium on low

In [35]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Medium in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Low in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edumed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8859649122807017
Training Fold 2/20
Accuracy for Fold 2: 0.8859649122807017
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8830409356725146
Training Fold 4/20
Accuracy for Fold 4: 0.8771929824561403
Training Fold 5/20
Accuracy for Fold 5: 0.9005847953216374
Training Fold 6/20
Accuracy for Fold 6: 0.8874269005847953
Training Fold 7/20
Accuracy for Fold 7: 0.8991228070175439
Training Fold 8/20
Accuracy for Fold 8: 0.9400584795321637
Training Fold 9/20
Accuracy for Fold 9: 0.9195906432748538
Training Fold 10/20
Accuracy for Fold 10: 0.9064327485380117
Training Fold 11/20
Accuracy for Fold 11: 0.9342105263157895
Training Fold 12/20
Accuracy for Fold 12: 0.8845029239766082
Training Fold 13/20
Accuracy for Fold 13: 0.8786549707602339
Training Fold 14/20
Accuracy for Fold 14: 0.881578947368421
Training Fold 15/20
Accuracy for Fold 15: 0.9048316251830161
Training Fold 16/20
Accuracy for Fold 16: 0.890190336749634
Training Fold 17/20
Accuracy for Fold 17: 0.8828696925329429
Training Fold 18/20
Accuracy for Fold 18: 0.862371888726208
Training Fold 19/20
Accuracy for Fold 19: 0.862371888726208


In [36]:
#create new df
df_med_low = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_med_low.loc['mean'] = df_med_low.mean()

In [37]:
df_med_low

Unnamed: 0,accuracy,f1,recall,precision
0,0.885965,0.071429,0.037037,1.0
1,0.885965,0.0,0.0,0.0
2,0.883041,0.069767,0.036145,1.0
3,0.877193,0.106383,0.05618,1.0
4,0.900585,0.514286,0.356436,0.923077
5,0.887427,0.114943,0.060976,1.0
6,0.899123,0.420168,0.284091,0.806452
7,0.940058,0.705036,0.597561,0.859649
8,0.919591,0.604317,0.506024,0.75
9,0.906433,0.6,0.6,0.6


In [38]:
df_med_low.to_excel('df_med_low.xlsx', index=None)

#### 3.2.2  Medium on medium

In [39]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Medium in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Medium in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edumed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8888888888888888
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.9371345029239766
Training Fold 3/20
Accuracy for Fold 3: 0.8888888888888888
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.9137426900584795
Training Fold 5/20
Accuracy for Fold 5: 0.9532163742690059
Training Fold 6/20
Accuracy for Fold 6: 0.9269005847953217
Training Fold 7/20
Accuracy for Fold 7: 0.9473684210526315
Training Fold 8/20
Accuracy for Fold 8: 0.9488304093567251
Training Fold 9/20
Accuracy for Fold 9: 0.9692982456140351
Training Fold 10/20
Accuracy for Fold 10: 0.9678362573099415
Training Fold 11/20
Accuracy for Fold 11: 0.97953216374269
Training Fold 12/20
Accuracy for Fold 12: 0.966374269005848
Training Fold 13/20
Accuracy for Fold 13: 0.9766081871345029
Training Fold 14/20
Accuracy for Fold 14: 0.8888888888888888
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.890190336749634
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.9677891654465594
Training Fold 17/20
Accuracy for Fold 17: 0.9253294289897511
Training Fold 18/20
Accuracy for Fold 18: 0.9472913616398243
Training Fold 19/20
Accuracy for Fold 19: 0.9502196193265008
Training Fold 20/20
Accuracy for Fold 20: 0.9795021961932651


In [40]:
#create new df
df_med_med = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_med_med.loc['mean'] = df_med_med.mean()

In [41]:
df_med_med

Unnamed: 0,accuracy,f1,recall,precision
0,0.888889,0.0,0.0,0.0
1,0.937135,0.626087,0.473684,0.923077
2,0.888889,0.0,0.0,0.0
3,0.913743,0.438095,0.302632,0.793103
4,0.953216,0.761194,0.671053,0.87931
5,0.926901,0.561404,0.421053,0.842105
6,0.947368,0.7,0.552632,0.954545
7,0.94883,0.715447,0.578947,0.93617
8,0.969298,0.869565,0.921053,0.823529
9,0.967836,0.851351,0.828947,0.875


In [42]:
df_med_med.to_excel('df_med_med.xlsx', index=None)

#### 3.2.3  Medium on high

In [43]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Medium in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #High in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edumed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8830409356725146
Training Fold 2/20
Accuracy for Fold 2: 0.8801169590643275
Training Fold 3/20
Accuracy for Fold 3: 0.9122807017543859
Training Fold 4/20
Accuracy for Fold 4: 0.9049707602339181
Training Fold 5/20
Accuracy for Fold 5: 0.9005847953216374
Training Fold 6/20
Accuracy for Fold 6: 0.902046783625731
Training Fold 7/20
Accuracy for Fold 7: 0.8903508771929824
Training Fold 8/20
Accuracy for Fold 8: 0.9035087719298246
Training Fold 9/20
Accuracy for Fold 9: 0.9122807017543859
Training Fold 10/20
Accuracy for Fold 10: 0.9005847953216374
Training Fold 11/20
Accuracy for Fold 11: 0.8801169590643275
Training Fold 12/20
Accuracy for Fold 12: 0.8874269005847953
Training Fold 13/20
Accuracy for Fold 13: 0.8918128654970761
Training Fold 14/20
Accuracy for Fold 14: 0.8669590643274854
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8814055636896047
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8857979502196194
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8828696925329429
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.890190336749634
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8828696925329429
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8887262079062958


  _warn_prf(average, modifier, msg_start, len(result))


In [44]:
#create new df
df_med_high = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_med_high.loc['mean'] = df_med_high.mean()

In [45]:
df_med_high

Unnamed: 0,accuracy,f1,recall,precision
0,0.883041,0.452055,0.458333,0.445946
1,0.880117,0.481013,0.457831,0.506667
2,0.912281,0.589041,0.632353,0.551282
3,0.904971,0.569536,0.581081,0.558442
4,0.900585,0.575,0.541176,0.613333
5,0.902047,0.593939,0.544444,0.653333
6,0.890351,0.522293,0.5125,0.532468
7,0.903509,0.554054,0.577465,0.532468
8,0.912281,0.558824,0.542857,0.575758
9,0.900585,0.534247,0.506494,0.565217


In [46]:
df_med_high.to_excel('df_med_high.xlsx', index=None)

#### 3.2.4 Medium on mixed

In [47]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Medium in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #Mixed in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_edumed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8859649122807017
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8888888888888888
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8903508771929824
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8903508771929824
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.8654970760233918
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8771929824561403
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8859649122807017
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.8888888888888888
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8991228070175439
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8918128654970761
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.8698830409356725
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8786549707602339
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8859649122807017
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.8830409356725146
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.9033674963396779
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.9004392386530015
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8872620790629575
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8887262079062958
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8989751098096632
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8843338213762811


  _warn_prf(average, modifier, msg_start, len(result))


In [48]:
#create new df
df_med_mix = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_med_mix.loc['mean'] = df_med_mix.mean()

In [49]:
df_med_mix

Unnamed: 0,accuracy,f1,recall,precision
0,0.885965,0.0,0.0,0.0
1,0.888889,0.0,0.0,0.0
2,0.890351,0.0,0.0,0.0
3,0.890351,0.0,0.0,0.0
4,0.865497,0.0,0.0,0.0
5,0.877193,0.0,0.0,0.0
6,0.885965,0.0,0.0,0.0
7,0.888889,0.0,0.0,0.0
8,0.899123,0.0,0.0,0.0
9,0.891813,0.0,0.0,0.0


In [50]:
df_med_mix.to_excel('df_med_mix.xlsx', index=None)

### 3.3 Education High

#### 3.3.1 High on low

In [51]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #High in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Low in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_eduhigh_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8728070175438597
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8771929824561403
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8611111111111112
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8786549707602339
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.8698830409356725
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8757309941520468
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8771929824561403
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.881578947368421
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8757309941520468
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8640350877192983
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.8801169590643275
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.881578947368421
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8728070175438597
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.8932748538011696
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8667642752562226
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8682284040995608
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8784773060029283
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8682284040995608
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8550512445095169
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8726207906295754


  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
#create new df
df_high_low = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_high_low.loc['mean'] = df_high_low.mean()

In [53]:
df_high_low

Unnamed: 0,accuracy,f1,recall,precision
0,0.872807,0.0,0.0,0.0
1,0.877193,0.0,0.0,0.0
2,0.861111,0.0,0.0,0.0
3,0.878655,0.0,0.0,0.0
4,0.869883,0.0,0.0,0.0
5,0.875731,0.0,0.0,0.0
6,0.877193,0.0,0.0,0.0
7,0.881579,0.0,0.0,0.0
8,0.875731,0.0,0.0,0.0
9,0.864035,0.0,0.0,0.0


In [54]:
df_high_low.to_excel('df_high_low.xlsx', index=None)

#### 3.3.2 High on medium

In [55]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #High in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Medium in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_eduhigh_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.881578947368421
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8874269005847953
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8947368421052632
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8976608187134503
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.9005847953216374
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8801169590643275
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8801169590643275
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.902046783625731
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8976608187134503
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8742690058479532
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.8801169590643275
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8947368421052632
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8888888888888888
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.8932748538011696
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8989751098096632
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8857979502196194
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8814055636896047
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8857979502196194
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8945827232796486
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8843338213762811


  _warn_prf(average, modifier, msg_start, len(result))


In [56]:
#create new df
df_high_med = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_high_med.loc['mean'] = df_high_med.mean()

In [57]:
df_high_med

Unnamed: 0,accuracy,f1,recall,precision
0,0.881579,0.0,0.0,0.0
1,0.887427,0.0,0.0,0.0
2,0.894737,0.0,0.0,0.0
3,0.897661,0.0,0.0,0.0
4,0.900585,0.0,0.0,0.0
5,0.880117,0.0,0.0,0.0
6,0.880117,0.0,0.0,0.0
7,0.902047,0.0,0.0,0.0
8,0.897661,0.0,0.0,0.0
9,0.874269,0.0,0.0,0.0


In [58]:
df_high_med.to_excel('df_high_med.xlsx', index=None)

#### 3.3.3 High on high

In [59]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #High in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #High in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_eduhigh_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8845029239766082
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8845029239766082
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8845029239766082
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8845029239766082
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.8845029239766082
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8845029239766082
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8845029239766082
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.8845029239766082
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8845029239766082
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8845029239766082
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.8830409356725146
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8830409356725146
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8830409356725146
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.8830409356725146
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8843338213762811
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8843338213762811
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8843338213762811
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8843338213762811
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8843338213762811
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8843338213762811


  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
#create new df
df_high_high = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_high_high.loc['mean'] = df_high_high.mean()

In [61]:
df_high_high

Unnamed: 0,accuracy,f1,recall,precision
0,0.884503,0.0,0.0,0.0
1,0.884503,0.0,0.0,0.0
2,0.884503,0.0,0.0,0.0
3,0.884503,0.0,0.0,0.0
4,0.884503,0.0,0.0,0.0
5,0.884503,0.0,0.0,0.0
6,0.884503,0.0,0.0,0.0
7,0.884503,0.0,0.0,0.0
8,0.884503,0.0,0.0,0.0
9,0.884503,0.0,0.0,0.0


In [62]:
df_high_high.to_excel('df_high_high.xlsx', index=None)

#### 3.3.4 High on mixed

In [63]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #High in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #Mixed in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_eduhigh_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8757309941520468
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8888888888888888
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8903508771929824
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8874269005847953
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.9064327485380117
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8581871345029239
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8625730994152047
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.8976608187134503
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8786549707602339
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8845029239766082
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.8918128654970761
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8932748538011696
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8859649122807017
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.9181286549707602
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8828696925329429
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8960468521229868
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8916544655929722
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8828696925329429
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8916544655929722
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8799414348462665


  _warn_prf(average, modifier, msg_start, len(result))


In [64]:
#create new df
df_high_mix = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_high_mix.loc['mean'] = df_high_mix.mean()

In [65]:
df_high_mix

Unnamed: 0,accuracy,f1,recall,precision
0,0.875731,0.0,0.0,0.0
1,0.888889,0.0,0.0,0.0
2,0.890351,0.0,0.0,0.0
3,0.887427,0.0,0.0,0.0
4,0.906433,0.0,0.0,0.0
5,0.858187,0.0,0.0,0.0
6,0.862573,0.0,0.0,0.0
7,0.897661,0.0,0.0,0.0
8,0.878655,0.0,0.0,0.0
9,0.884503,0.0,0.0,0.0


In [66]:
df_high_mix.to_excel('df_high_mix.xlsx', index=None)

### 3.4 Education Mixed

#### 3.4.1 Mixed on low

In [81]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #High in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edulow_median'].tolist()) #Mixed in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

train_indices_list = []
test_indices_list = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_mixed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")
    train_indices_list.append(train_indices)
    test_indices_list.append(val_indices)

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8859649122807017
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8742690058479532
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.881578947368421
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.868421052631579
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.8669590643274854
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8567251461988304
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8874269005847953
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.8801169590643275
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8698830409356725
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8654970760233918
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.881578947368421
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8845029239766082
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8625730994152047
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.8728070175438597
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8565153733528551
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8828696925329429
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.87701317715959
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8711566617862372
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8740849194729137
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8711566617862372


  _warn_prf(average, modifier, msg_start, len(result))


In [82]:
#create new df
df_mix_low = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_mix_low.loc['mean'] = df_mix_low.mean()

In [83]:
df_mix_low

Unnamed: 0,accuracy,f1,recall,precision
0,0.885965,0.0,0.0,0.0
1,0.874269,0.0,0.0,0.0
2,0.881579,0.0,0.0,0.0
3,0.868421,0.0,0.0,0.0
4,0.866959,0.0,0.0,0.0
5,0.856725,0.0,0.0,0.0
6,0.887427,0.0,0.0,0.0
7,0.880117,0.0,0.0,0.0
8,0.869883,0.0,0.0,0.0
9,0.865497,0.0,0.0,0.0


In [84]:
df_mix_low.to_excel('df_high_low.xlsx', index=None)

#### 3.4.2 Mixed on medium

In [85]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #High in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_edumed_median'].tolist()) #Mixed in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_mixed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8771929824561403
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8976608187134503
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.9049707602339181
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8903508771929824
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.8757309941520468
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8932748538011696
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8903508771929824
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.8786549707602339
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8859649122807017
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8947368421052632
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.9005847953216374
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8932748538011696
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8903508771929824
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.881578947368421
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8814055636896047
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8916544655929722
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8887262079062958
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8828696925329429
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8945827232796486
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.890190336749634


  _warn_prf(average, modifier, msg_start, len(result))


In [86]:
#create new df
df_mix_med = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_mix_med.loc['mean'] = df_mix_med.mean()

In [87]:
df_mix_med

Unnamed: 0,accuracy,f1,recall,precision
0,0.877193,0.0,0.0,0.0
1,0.897661,0.0,0.0,0.0
2,0.904971,0.0,0.0,0.0
3,0.890351,0.0,0.0,0.0
4,0.875731,0.0,0.0,0.0
5,0.893275,0.0,0.0,0.0
6,0.890351,0.0,0.0,0.0
7,0.878655,0.0,0.0,0.0
8,0.885965,0.0,0.0,0.0
9,0.894737,0.0,0.0,0.0


In [88]:
df_mix_med.to_excel('df_mix_med.xlsx', index=None)

#### 3.4.3 Mixed on high

In [89]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #High in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_eduhigh_median'].tolist()) #Mixed in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_mixed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8669590643274854
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8830409356725146
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8888888888888888
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8961988304093568
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.8771929824561403
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.868421052631579
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8786549707602339
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.8801169590643275
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8991228070175439
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8757309941520468
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.8932748538011696
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8845029239766082
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8757309941520468
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.9005847953216374
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8945827232796486
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8828696925329429
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8740849194729137
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.890190336749634
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8799414348462665
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8931185944363104


  _warn_prf(average, modifier, msg_start, len(result))


In [90]:
#create new df
df_mix_high = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_mix_high.loc['mean'] = df_mix_high.mean()

In [91]:
df_mix_high

Unnamed: 0,accuracy,f1,recall,precision
0,0.866959,0.0,0.0,0.0
1,0.883041,0.0,0.0,0.0
2,0.888889,0.0,0.0,0.0
3,0.896199,0.0,0.0,0.0
4,0.877193,0.0,0.0,0.0
5,0.868421,0.0,0.0,0.0
6,0.878655,0.0,0.0,0.0
7,0.880117,0.0,0.0,0.0
8,0.899123,0.0,0.0,0.0
9,0.875731,0.0,0.0,0.0


In [92]:
df_mix_high.to_excel('df_mix_high.xlsx', index=None)

#### 3.4.4. Mixed on mixed

In [93]:
# Convert dataframe to dataset
dataset_train = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #Mixed in train set
dataset_test = TextDataset(df['c_text'].tolist(), df['unangemessen_mixed_median'].tolist()) #Mixed in test set


# Define k-fold cross-validation
k_folds = 20
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize lists to store accuracies for each fold
fold_accuracies = []
fold_f1s = []
fold_recalls = []
fold_precisions = []

# Perform k-fold cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(df['c_text'], df['unangemessen_mixed_median'])):
    print(f"Training Fold {fold+1}/{k_folds}")

    # Split dataset into train and validation sets for the current fold
    train_dataset = torch.utils.data.Subset(dataset_train, train_indices)
    val_dataset = torch.utils.data.Subset(dataset_test, val_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = torch.nn.CrossEntropyLoss()
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.train()
    for epoch in range(3):  # Adjust the number of epochs as needed
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    # Evaluation loop
    model.eval()
    val_predictions = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted_labels = torch.max(outputs.logits, dim=1)
            val_predictions.extend(predicted_labels.tolist())
            val_labels.extend(labels.tolist())

    fold_accuracy = accuracy_score(val_labels, val_predictions)
    fold_accuracies.append(fold_accuracy)
    print(f"Accuracy for Fold {fold+1}: {fold_accuracy}")


    fold_f1 = f1_score(val_labels, val_predictions)
    fold_f1s.append(fold_f1)

    fold_recall = recall_score(val_labels, val_predictions)
    fold_recalls.append(fold_recall)

    fold_precision = precision_score(val_labels, val_predictions)
    fold_precisions.append(fold_precision)

Training Fold 1/20
Accuracy for Fold 1: 0.8874269005847953
Training Fold 2/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 2: 0.8874269005847953
Training Fold 3/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 3: 0.8874269005847953
Training Fold 4/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 4: 0.8874269005847953
Training Fold 5/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 5: 0.8874269005847953
Training Fold 6/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 6: 0.8874269005847953
Training Fold 7/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 7: 0.8874269005847953
Training Fold 8/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 8: 0.8874269005847953
Training Fold 9/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 9: 0.8874269005847953
Training Fold 10/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 10: 0.8874269005847953
Training Fold 11/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 11: 0.8874269005847953
Training Fold 12/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 12: 0.8874269005847953
Training Fold 13/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 13: 0.8859649122807017
Training Fold 14/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 14: 0.8859649122807017
Training Fold 15/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 15: 0.8872620790629575
Training Fold 16/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 16: 0.8872620790629575
Training Fold 17/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 17: 0.8872620790629575
Training Fold 18/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 18: 0.8872620790629575
Training Fold 19/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 19: 0.8872620790629575
Training Fold 20/20


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy for Fold 20: 0.8872620790629575


  _warn_prf(average, modifier, msg_start, len(result))


In [94]:
#create new df
df_mix_mix = pd.DataFrame({'accuracy':fold_accuracies,
                                        'f1':fold_f1s,
                                        'recall':fold_recalls,
                                        'precision':fold_precisions
                                        })
#Add row with mean
df_mix_mix.loc['mean'] = df_mix_mix.mean()

In [95]:
df_mix_mix

Unnamed: 0,accuracy,f1,recall,precision
0,0.887427,0.0,0.0,0.0
1,0.887427,0.0,0.0,0.0
2,0.887427,0.0,0.0,0.0
3,0.887427,0.0,0.0,0.0
4,0.887427,0.0,0.0,0.0
5,0.887427,0.0,0.0,0.0
6,0.887427,0.0,0.0,0.0
7,0.887427,0.0,0.0,0.0
8,0.887427,0.0,0.0,0.0
9,0.887427,0.0,0.0,0.0


In [96]:
df_mix_mix.to_excel('df_mix_mix.xlsx', index=None)

# Fine