In [376]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig
from torch.utils.data import Dataset, DataLoader
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import AdamW
import torch
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import numpy as np
from tqdm import tqdm
from sklearn.metrics import recall_score, precision_score, f1_score
from sklearn.metrics import accuracy_score, classification_report

In [377]:
df = pd.read_csv('../datasets/dataset1.csv')
# Dropping topic because it has only one value. It might be usefull if we had more data
df = df.drop(columns=['Old Code Book', 'Memo', 'Chat0CREW1B', 'Chat0CREW1', 'Response Number', 'Message Time', 'CollapsR2DiscussionTypeInterpNothers', 'R2DiscussionTypeInterpNothers'])
df.columns = [''.join([word[0].upper() + word[1:] for word in col.split()]) for col in df.columns]
df.columns = [col.replace(' ', '') for col in df.columns]

In [378]:
print(df.shape)

(609, 25)


In [379]:
df.columns

Index(['Course', 'BookID', 'Topic', 'Bookclub', 'Pseudonym', 'Message',
       'IsAnswer', 'Page', 'R1DiscussionType', 'R2DiscussionType',
       'R1DialogicSpell', 'BinaryR1DialogicSpell', 'R1Uptake',
       'BinaryR1Uptake', 'R2DialogicSpell', 'BinaryR2DialogicSpell',
       'R2Uptake', 'BinaryR2Uptake', 'Pseudonym.1', 'Message.1', 'Bookclub.1',
       'R1Question', 'R2Question', 'R1Pivot', 'R2Pivot'],
      dtype='object')

If R1 has a value and R2 does not should we trust R1? Let us look at some of the examples to see if this is needed

In [380]:
# Identify the columns that start with R1 and R2 and have the same suffix
r1_columns = [col for col in df.columns if col.startswith('R1') or col.startswith('BinaryR1')]
r2_columns = [col for col in df.columns if col.startswith('R2') or col.startswith('BinaryR2')]

# Make sure that the suffixes match
matched_columns = [(r1, r2) for r1 in r1_columns for r2 in r2_columns if r1[2:] == r2[2:] or (r1[9:] == r2[9:] and r1.startswith('Binary') and r2.startswith('Binary'))]
matched_columns
#
## Iterate over these pairs of columns
for r1_col, r2_col in matched_columns:
    # Find rows where the R2 version has NaN and the R1 version has a value
    condition = df[r2_col].isna() & df[r1_col].notna()
    if condition.any():# and "Dialogic" not in r1_col:
        print(f'Column {r2_col} has NaN where {r1_col} has a value')
        print(df.loc[condition, [r1_col, r2_col, 'BinaryR2Uptake']].head())
        print(df.loc[condition, ['Message']].head())

Column R2DialogicSpell has NaN where R1DialogicSpell has a value
    R1DialogicSpell R2DialogicSpell  BinaryR2Uptake
240                             NaN               0
261           Begin             NaN               0
268                             NaN               0
273                             NaN               1
274                             NaN               0
                                               Message
240  she was smiling by the end and the book told m...
261  As for the second question I believe that the ...
268               Hey guys I got through the next page
273                            I know live without him
274       im just back at the screen we were at before
Column BinaryR2DialogicSpell has NaN where BinaryR1DialogicSpell has a value
     BinaryR1DialogicSpell  BinaryR2DialogicSpell  BinaryR2Uptake
315                      1                    NaN               0
316                      1                    NaN               0
                  

It seems to not be needed because all cases it is hard for me to tell which one is correct R1 or R2. Both answers seem valid so I will keep it as is.

In [381]:
df.drop(columns=r1_columns, inplace=True)

Below I remove duplicate columns and check if there are any values that are nan in the one I keep and not the other. In this case there were none

In [382]:
dot1_columns = [col for col in df.columns if col.endswith('.1')]
original_columns = [col.rstrip('.1') for col in dot1_columns if col.rstrip('.1') in df.columns]

# For each pair of columns, if one column has a value and the other does not, keep the value
for orig_col, dot1_col in zip(original_columns, dot1_columns):
    if (df[orig_col].isna() & df[dot1_col].notna()).any():
        print(f'Column {orig_col} has a value where {dot1_col} has NaN')

# Remove the .1 columns
df = df.drop(columns=dot1_columns)

There are 3 discussion type columns. The collapsed one was apparently used for visualisation according to the given paper so it is not needed. This leaves us with R2DiscussionTypeInterpNothers and R2DiscussionType. The first seems to use interpreatation instead of seminar and that is not in the codebook so we will use the first column. I will now look at the unique values of the columns and see their counts

In [383]:
counts = df['R2Uptake'].value_counts(dropna=False)
print(counts[0]/counts.sum())
counts

0.3957307060755337


  print(counts[0]/counts.sum())


R2Uptake
NaN          241
Affirm       140
Elaborate    107
Filler        61
Clarify       50
Disagree      10
Name: count, dtype: int64

We notice that there are not prompt or respond examples that can be found in the codebook. Also most are NaN which makes sense since maybe those are not uptakes. ALso NaNs are 40% of the data so we must achieve accuracy higher than 40 to have results better than just guessing NaN each time

In [384]:
df['R2Question'].value_counts(dropna=False)

R2Question
NaN              525
C-LOT             36
C-HOT             22
O-HOT             11
O-LOT              8
C-LOT, C-LOT       3
O-COT              1
C-HOT, C-HOT       1
O-HOT, C-LOT       1
C-LOT? C-HOT?      1
Name: count, dtype: int64

We see that we have few samples compared to the total samples size that have a Question category. Also we have one sample with 2 labels. Which is likely not enough for our model to learn anything. This can probably be dropped when training and the ones that are C-LOT C-LOT can be converted just to a single C-LOT. Another options is to split the sentence somehow and classify separately and then combine the labels. This seems like a good idea and I might try it. For the C-LOT? C-HOT? it seems the expert is unsure what to write so we will either drop it or trust R1 which chose for that sample C-LOT.

In [385]:
df['R2Pivot'].value_counts(dropna=False)

R2Pivot
NaN                                           562
Seminar to Social/Procedure/UX                 10
Social/Procedure/UX to Seminar                  8
Seminar to Deliberation                         7
Deliberation to Social/Procedure/UX             4
Social/Procedure/UX to Social/Procedure/UX      4
Social/Procedure/UX to Deliberation             3
Deliberation to Seminar                         3
Seminar to Imaginative entry                    2
Imaginative entry to Seminar                    2
Seminar to Imaginative                          1
Imaginative to Seminar                          1
Delibration to Seminar                          1
Deliberationa to Seminar                        1
Name: count, dtype: int64

In [386]:
value_counts = df['R2Pivot'].value_counts(dropna=True)
total = value_counts.sum()
print(total)

47


Adding history here bofore I drop more columns

In [387]:
vectorizer = TfidfVectorizer()

# Fit and transform the data
tfidf_matrix = vectorizer.fit_transform(df['Message'])

# Get the feature names and IDF values
feature_names = vectorizer.get_feature_names_out()
idf_values = vectorizer.idf_

# Create a DataFrame with feature names and IDF values
idf_df = pd.DataFrame({'feature': feature_names, 'idf': idf_values})

# Sort the DataFrame by IDF values and get the top 100 features
idf_df = idf_df.sort_values('idf')
top_features = idf_df.head(50)['feature'].values

# Get the indices of the top features
top_feature_indices = [list(feature_names).index(feature) for feature in top_features]

# Get the TF-IDF values of the top features for each document
tfidf_df = pd.DataFrame(tfidf_matrix[:, top_feature_indices].toarray(), columns=top_features)

# Concatenate the original DataFrame with the TF-IDF DataFrame
df = pd.concat([df.reset_index(drop=True), tfidf_df], axis=1)

In [388]:
# Create new columns for the previous messages
#df['PrevMessage1'] = ''
#df['PrevMessage2'] = ''

# Iterate over the DataFrame
#for i in range(1, len(df)):
    # Check if the previous row has the same 'BookID', 'Bookclub', and 'Course'
    #if (df.loc[i, 'BookID'] == df.loc[i-1, 'BookID'] and
    #    df.loc[i, 'Bookclub'] == df.loc[i-1, 'Bookclub'] and
    #    df.loc[i, 'Course'] == df.loc[i-1, 'Course']):
    #    # If it does, add the previous message to 'PrevMessage1'
    #    df.loc[i, 'PrevMessage1'] = df.loc[i-1, 'Message']
        
        # If there is more than one previous row and it also has the same 'BookID', 'Bookclub', and 'Course'
        #if i > 1 and (df.loc[i, 'BookID'] == df.loc[i-2, 'BookID'] and
        #              df.loc[i, 'Bookclub'] == df.loc[i-2, 'Bookclub'] and
        #              df.loc[i, 'Course'] == df.loc[i-2, 'Course']):
        #    # Add the second previous message to 'PrevMessage2'
        #    df.loc[i, 'PrevMessage2'] = df.loc[i-2, 'Message']

# If the first row has no previous messages, fill 'PrevMessage1' and 'PrevMessage2' with empty strings
#df.loc[0, ['PrevMessage1']] = '' #, 'PrevMessage2']] = ''

For this one we don't have many samples separately but maybe we can make a binary classifier and just take the previous and next category. The classifier will just say pivot yes or no.

In [389]:
df['IsAnswer'].value_counts(dropna=False)

IsAnswer
No     501
NaN    106
         2
Name: count, dtype: int64

Is Answer is either No or NaN. The NaN value does not mean it is an answer. I looked at the fields that were labelled NaN and they can also be a greeting which is not an answer or a statement which is not always an answer. Because of this I believe this column is not useful for training and I will drop it

In [390]:
df.drop(columns=['IsAnswer'], inplace=True)

In [391]:
df['BookID'].value_counts(dropna=False)

BookID
260    421
261    188
Name: count, dtype: int64

In [392]:
df['Bookclub'].value_counts(dropna=False)

Bookclub
Book Club One      275
Book Club Four     192
Book Club Two       94
Book Club Three     30
Book Club Five      18
Name: count, dtype: int64

For BookID, Page and Bookclub I am unsure if they will be usefull in the end. My reasoning is because we want to generalise and what if we add a Bookclub 6 and if the model has never seen it. Or if we add a new book but the model has learned something about the specific books 260 and 261. Because of this I believe it is better to drop these columns. I was considering maybe they could be used for ids of different discussions since only a club or only a book is not enough to determine a discussion and we want to classify sentences in separate probably. If we want context we can try to use history instead of these ids.

In [393]:
df.drop(columns=['BookID', 'Bookclub', 'Page', 'Course', 'Topic'], inplace=True)

In [394]:
# Select all rows where 'R2Pivot' is not NaN
non_nan_rows = df[df['R2Pivot'].notna()]

# Get the indices of the non-NaN rows
indices = non_nan_rows.index

# Print the current 'R2DiscussionType' for these rows and the 'R2DiscussionType' from the previous row in the original DataFrame
res = zip(non_nan_rows['R2DiscussionType'], df.loc[indices - 1, 'R2DiscussionType'] if indices[0] > 0 else None)

for idx, (next, prev) in enumerate(res):
    print(non_nan_rows.iloc[idx]['R2Pivot'])
    print(prev, next)

Social/Procedure/UX to Seminar
Social Seminar
Seminar to Social/Procedure/UX
Seminar Seminar
Social/Procedure/UX to Seminar
Seminar Procedure
Seminar to Deliberation
Seminar Deliberation
Deliberation to Social/Procedure/UX
Deliberation Social
Social/Procedure/UX to Social/Procedure/UX
Social Procedure
Social/Procedure/UX to Seminar
Procedure Seminar
Seminar to Imaginative entry
Seminar Imaginative entry
Imaginative entry to Seminar
Imaginative entry Seminar
Seminar to Imaginative entry
Seminar Imaginative entry
Imaginative entry to Seminar
Imaginative entry Seminar
Seminar to Social/Procedure/UX
Seminar Procedure
Social/Procedure/UX to Social/Procedure/UX
Procedure Social
Social/Procedure/UX to Deliberation
Social Deliberation
Deliberation to Seminar
Deliberation Seminar
Seminar to Social/Procedure/UX
Seminar Procedure
Social/Procedure/UX to Seminar
Procedure Seminar
Seminar to Social/Procedure/UX
Seminar Procedure
Social/Procedure/UX to Seminar
Procedure Seminar
Seminar to Social/Proc

In [364]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [395]:
PRETRAINED_MODELS = {
    'bert': 'bert-base-uncased',
    'roberta': 'roberta-base',
    'xlnet': 'xlnet-large-cased',
    'xlm': 'xlm-mlm-en-2048',
    'distilbert': 'distilbert-base-uncased',
    'albert':'albert-base-v2'
}

MODEL_CLASSES = {
    'bert': (BertForSequenceClassification, BertTokenizer, BertConfig),
    #'roberta': (RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig),
    #'xlnet': (XLNetForSequenceClassification, XLNetTokenizer, XLNetConfig),
    #'xlm': (XLMForSequenceClassification, XLMTokenizer, XLMConfig),
    #'distilbert': (DistilBertForSequenceClassification, DistilBertTokenizer, DistilBertConfig),
    #'albert':(AlbertForSequenceClassification,AlbertTokenizer, AlbertConfig)
}

MODEL_TYPE = 'bert'
PRETRAINED_MODEL_NAME = PRETRAINED_MODELS[MODEL_TYPE]

model_class, tokenizer_class, config_class = MODEL_CLASSES[MODEL_TYPE]

TRUNCATION = True
LEARNING_RATE = 1e-5
BATCH_SIZE = 32
EPOCHS = 50
WEIGHT_DECAY = 0.01

In [396]:
r2columns = [col for col in df.columns if 'R2' in col]
X = df.drop(columns=r2columns).fillna('')
y = df['R2Uptake'].fillna('None')

X_train_tmp, X_test, y_train_tmp, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train_tmp, y_train_tmp, test_size=0.2, random_state=42, stratify=y_train_tmp)

In [397]:
enc = OneHotEncoder()
enc.fit(y_train.values.reshape(-1, 1))

In [398]:
class R2UptakeDataset(Dataset):
    def __init__(self, X, y, tokenizer, max_length):
        self.X = X.values
        self.tokenizer = tokenizer
        self.max_length = max_length
        # Fit the label binarizer and transform the labels into one-hot encoded format
        self.labels = enc.transform(y.values.reshape(-1, 1)).toarray()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Encode the utterance using the provided tokenizer
        encoding = self.tokenizer.encode_plus(
            self.X[idx],
            add_special_tokens=True,
            max_length = self.max_length,
            return_token_type_ids=True,
            padding='max_length',
            return_attention_mask=True,
            truncation=TRUNCATION,
            return_tensors='pt'
        )
        # Convert the list of strings into a one-hot encoded format
        label = self.labels[idx]  # This should now be a binary vector instead of a list of strings

        # Return the encoding and the label
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.float),
            'token_type_ids': encoding['token_type_ids'].flatten(),
        }

In [399]:
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)

In [400]:
num_labels = len(df['R2Uptake'].unique())
num_labels

6

In [401]:
from torch.utils.data import Dataset, DataLoader, Sampler
from sklearn.model_selection import StratifiedShuffleSplit

class StratifiedSampler(Sampler):
    def __init__(self, class_vector, batch_size):
        self.n_splits = int(class_vector.size(0) / batch_size)
        self.class_vector = class_vector

    def gen_sample_array(self):
        s = StratifiedShuffleSplit(n_splits=self.n_splits, test_size=0.5)
        X = torch.randn(self.class_vector.size(0),2).numpy()
        y = self.class_vector.numpy()
        s.get_n_splits(X, y)

        train_index, test_index = next(s.split(X, y))
        return np.hstack([train_index, test_index])

    def __iter__(self):
        return iter(self.gen_sample_array())

    def __len__(self):
        return len(self.class_vector)

In [403]:
train_texts = X_train.apply(lambda x: ('|'.join(x.astype(str))).replace('0.0', ''), axis=1)
train_labels = y_train

val_texts = X_val.apply(lambda x: ('|'.join(x.astype(str))).replace('0.0', ''), axis=1)
val_labels = y_val

test_texts = X_test.apply(lambda x: ('|'.join(x.astype(str))).replace('0.0', ''), axis=1)
test_labels = y_test

longest_train_data = train_texts[train_texts.str.len().idxmax()]
max_length = min(2 ** (len(tokenizer.tokenize(longest_train_data))-1).bit_length(), 512)
print(train_labels.value_counts())

train_data = R2UptakeDataset(train_texts, train_labels, tokenizer, max_length=max_length)
val_data = R2UptakeDataset(val_texts, val_labels, tokenizer, max_length=max_length)
test_data = R2UptakeDataset(test_texts, test_labels, tokenizer, max_length=max_length)

le = LabelEncoder()
train_labels_encoded = le.fit_transform(train_labels)
val_labels_encoded = le.transform(val_labels)

trainSampler = StratifiedSampler(torch.tensor(train_labels_encoded), BATCH_SIZE)
valSampler = StratifiedSampler(torch.tensor(val_labels_encoded), BATCH_SIZE)

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=trainSampler)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, sampler=valSampler)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

R2Uptake
None         154
Affirm        90
Elaborate     68
Filler        39
Clarify       32
Disagree       6
Name: count, dtype: int64


In [404]:
max_length

512

In [407]:
train_texts.iloc[3].split('|')

['Cheryl Diaz',
 "We can continue to discuss individually, or if you want to try writing collaboratively at the same time, please suggest a time. I'm available any evening this week starting at 6 p.m. I'll keep tabs on here from now on to see if you want to do that. Thanks, Robyn",
 '4619300045485831',
 '0.2262973847672804',
 '6433665813448955',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '0.16672712179392618',
 '',
 '',
 '',
 '0.17336175701849443',
 '',
 '',
 '0.17698922476304443',
 '',
 '8964028075721846',
 '',
 '',
 '',
 '',
 '',
 '',
 '9427971979086341',
 '',
 '',
 '',
 '',
 '9872370817676032',
 '',
 '',
 '',
 '',
 '',
 '0.10272620462479903',
 '0.1039798308341429',
 '',
 '',
 '0.10737538149340177',
 '',
 '']

In [66]:
train_labels.iloc[3]

'Filler'

In [22]:
#HIDDEN_WEIGHTS = 768
HIDDEN_WEIGHTS = 256
DROPOUT = 0.3
class BERTClass(torch.nn.Module):
    def __init__(self, pretrained_model_name, num_labels):
        super(BERTClass, self).__init__()
        self.num_labels = num_labels
        self.l1 = model_class.from_pretrained(pretrained_model_name, num_labels=self.num_labels)
        self.pre_classifier = torch.nn.Linear(self.num_labels, HIDDEN_WEIGHTS)
        self.dropout = torch.nn.Dropout(DROPOUT)
        self.classifier = torch.nn.Linear(HIDDEN_WEIGHTS, self.num_labels)

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        hidden_state = output_1[0]
        pooler = hidden_state
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        output = output.view(-1, self.num_labels)  # Reshape the output
        return output

model = BERTClass(PRETRAINED_MODEL_NAME, num_labels)
model = model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [23]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, correct_bias=False)



In [24]:
def valid(model, valid_dataloader):
    val_targets = []
    val_outputs = []
    
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in tqdm(valid_dataloader):
            input_ids = batch['input_ids'].to(device, dtype=torch.long)
            attention_mask = batch['attention_mask'].to(device, dtype=torch.long)
            token_type_ids = batch['token_type_ids'].to(device, dtype=torch.long)
            labels = batch['labels'].to(device, dtype=torch.float)

            outputs = model(input_ids, attention_mask, token_type_ids)

            loss = loss_fn(outputs, labels)
            val_loss += loss.item()

            val_targets.extend(labels.cpu().detach().numpy().tolist())
            val_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
            

    val_loss /= len(valid_dataloader)
    
    return val_loss, val_targets, val_outputs

In [25]:
def train(model, train_dataloader):
    model.train()
    train_loss = 0
    for batch in tqdm(train_dataloader):
        input_ids = batch['input_ids'].to(device, dtype=torch.long)
        attention_mask = batch['attention_mask'].to(device, dtype=torch.long)
        token_type_ids = batch['token_type_ids'].to(device, dtype=torch.long)
        labels = batch['labels'].to(device, dtype=torch.float)

        model.zero_grad()
        outputs = model(input_ids, attention_mask, token_type_ids)

        loss = loss_fn(outputs, labels)
        train_loss += loss.item()

        loss.backward()
        optimizer.step()

    train_loss /= len(train_dataloader)
    
    return train_loss

In [26]:
def train_model(num_epochs, train_dataloader, valid_dataloader, model, optimizer, best_model_path = "./", patience=1):
    valid_loss_min = np.Inf

    num_not_improved = 0
    for epoch in range(1, num_epochs):
        print()
        print("#################### Epoch {}: Training Start    ####################".format(epoch))
        train_loss = train(model, train_dataloader)
        print('#################### Epoch {}: Training End      ####################'.format(epoch))

        print()
        print("#################### Epoch {}: Validation Start ####################".format(epoch))

        valid_loss, val_targets, val_outputs = valid(model, valid_dataloader)
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch, train_loss, valid_loss))

        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min, valid_loss))

            #checkpoint = {
                    #    'state_dict': model.state_dict(),
                    #    'enc' : enc.get_params()
                    #}

            #save_ckp(checkpoint, best_model_path)
            valid_loss_min = valid_loss
            num_not_improved = 0
        else:
            num_not_improved += 1
            if num_not_improved >= patience:
                print('Not improvement for more than:', num_not_improved)
                break
            
        print("#################### Epoch {}: Validation End   ####################".format(epoch))
        print()

    print("#################### Training finished     ####################")
    return model

In [27]:
print('No saved model found. Need to be train from scratch.')
trained_model = train_model(EPOCHS, train_loader, val_loader, model, optimizer)

No saved model found. Need to be train from scratch.

#################### Epoch 1: Training Start    ####################


100%|██████████| 13/13 [07:41<00:00, 35.48s/it]


#################### Epoch 1: Training End      ####################

#################### Epoch 1: Validation Start ####################


100%|██████████| 4/4 [00:37<00:00,  9.33s/it]


Epoch: 1 	Training Loss: 0.670940 	Validation Loss: 0.634460
Validation loss decreased (inf --> 0.634460).  Saving model ...
#################### Epoch 1: Validation End   ####################


#################### Epoch 2: Training Start    ####################


100%|██████████| 13/13 [07:46<00:00, 35.87s/it]


#################### Epoch 2: Training End      ####################

#################### Epoch 2: Validation Start ####################


100%|██████████| 4/4 [00:35<00:00,  8.94s/it]


Epoch: 2 	Training Loss: 0.606649 	Validation Loss: 0.582038
Validation loss decreased (0.634460 --> 0.582038).  Saving model ...
#################### Epoch 2: Validation End   ####################


#################### Epoch 3: Training Start    ####################


 23%|██▎       | 3/13 [01:56<06:28, 38.80s/it]

In [None]:
test_loss, test_labels , test_predictions_probs = valid(trained_model, test_loader)

100%|██████████| 4/4 [00:45<00:00, 11.41s/it]


In [None]:
test_loss

0.4385611116886139

In [None]:
threshold = 0.5
test_predictions = [[prob > threshold for prob in prob_list] for prob_list in test_predictions_probs ]

In [None]:
test_predictions

[[False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, F

In [None]:
print('Accuracy:', accuracy_score(test_labels, test_predictions))
print('Precision:', precision_score(test_labels, test_predictions, average='weighted'))
print('Recall:', recall_score(test_labels, test_predictions, average='weighted'))
print('F1:', f1_score(test_labels, test_predictions, average='weighted'))

report = classification_report(test_labels, test_predictions, target_names=enc.categories_[0])
print(report)

Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1: 0.0
              precision    recall  f1-score   support

      Affirm       0.00      0.00      0.00        28
     Clarify       0.00      0.00      0.00        10
    Disagree       0.00      0.00      0.00         2
   Elaborate       0.00      0.00      0.00        22
      Filler       0.00      0.00      0.00        12
        None       0.00      0.00      0.00        48

   micro avg       0.00      0.00      0.00       122
   macro avg       0.00      0.00      0.00       122
weighted avg       0.00      0.00      0.00       122
 samples avg       0.00      0.00      0.00       122



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
