In [1]:
from MajorityBaseline import *
import pandas as pd
import numpy as np
from tqdm import tqdm
import logging
from sklearn.metrics import *
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score

In [8]:
class MajorityBaselineClassifier():
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.class_counter_dict = dict()
        for c in range(self.num_classes):
            self.class_counter_dict.update({c:0})
    
    # u train_x su samo oznake
    def fit(self, train_x):
        for x in train_x:
            prev_value = self.class_counter_dict[x]
            self.class_counter_dict.update({x:prev_value + 1})
        best_c = -1 
        max_amount = -1
        for c in range(self.num_classes):
            if (self.class_counter_dict[c] > max_amount or best_c == -1):
                best_c = c
                max_amount = self.class_counter_dict[c]
        self.best_c = best_c
        
    def evaluate(self, test_x):
        y_pred = [self.best_c for i in range(len(test_x))]
        y_true = test_x
        accuracy = accuracy_score(y_pred, y_true)*100
        f1 = f1_score(y_pred, y_true, zero_division=0)*100
        recall = recall_score(y_pred, y_true, zero_division=0)*100
        return accuracy, f1, recall


In [9]:
train_data = pd.read_csv('dreaddit-train.csv')
test_data = pd.read_csv('dreaddit-test.csv')

train_labels = train_data[["label"]].to_numpy().transpose()[0]
test_labels = test_data[["label"]].to_numpy().transpose()[0]

print(train_labels.shape)
print(test_labels.shape)

(2838,)
(715,)


In [10]:
model = MajorityBaselineClassifier(2)

In [11]:
model.fit(train_labels)

In [12]:
model.best_c

1

In [13]:
accuracy, f1, recall = model.evaluate(test_labels)

In [14]:
print("Accuracy on test data = %0.2f%%" % accuracy)
print("F1 on test data = %0.2f%%" % f1)
print("Recall on test data = %0.2f%%" % recall)

Accuracy on test data = 51.61%
F1 on test data = 68.08%
Recall on test data = 51.61%
