In [1]:
from datasets import Dataset, DatasetDict
import pandas as pd
import os

# Finetune on imbalanced dataset

In [2]:
train = pd.read_csv(os.path.join("data-imbalance", "train-imbalance.csv"))   
val = pd.read_csv(os.path.join("data-imbalance", "dev-imbalance.csv"))

In [3]:
train_dataset = Dataset.from_pandas(train)
val_dataset = Dataset.from_pandas(val)

In [4]:
dataset_dict = DatasetDict({'train': train_dataset, 'validation': val_dataset})

In [5]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['tweet', 'label'],
        num_rows: 5868
    })
    validation: Dataset({
        features: ['tweet', 'label'],
        num_rows: 1966
    })
})

In [6]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [7]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

  _torch_pytree._register_pytree_node(


In [8]:
def preprocess_function(examples):
    return tokenizer(examples["tweet"], truncation=True)

In [9]:
tokenized_twitter = dataset_dict.map(preprocess_function, batched=True)

Map:   0%|          | 0/5868 [00:00<?, ? examples/s]

Map:   0%|          | 0/1966 [00:00<?, ? examples/s]

In [10]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [11]:
import evaluate

accuracy = evaluate.load("accuracy")

  _torch_pytree._register_pytree_node(


In [12]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [13]:
id2label = {0: "negative", 1: "neutral", 2: "positive"}
label2id = {"negative": 0, "neutral": 1, "positive": 2}

In [14]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=3, id2label=id2label, label2id=label2id)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:

training_args = TrainingArguments(
    output_dir="semantic-bert-imbalanced-dataset",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=True,
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_twitter["train"],
    eval_dataset=tokenized_twitter["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()
trainer.push_to_hub()

  0%|          | 0/7340 [00:00<?, ?it/s]

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.823654294013977, 'eval_accuracy': 0.6017293997965412, 'eval_runtime': 4.9998, 'eval_samples_per_second': 393.215, 'eval_steps_per_second': 24.601, 'epoch': 1.0}
{'loss': 0.7608, 'learning_rate': 1.8637602179836514e-05, 'epoch': 1.36}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.8749859929084778, 'eval_accuracy': 0.5834181078331638, 'eval_runtime': 4.103, 'eval_samples_per_second': 479.164, 'eval_steps_per_second': 29.978, 'epoch': 2.0}
{'loss': 0.5139, 'learning_rate': 1.7275204359673027e-05, 'epoch': 2.72}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.9829184412956238, 'eval_accuracy': 0.5946083418107834, 'eval_runtime': 4.0434, 'eval_samples_per_second': 486.222, 'eval_steps_per_second': 30.42, 'epoch': 3.0}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 1.2405952215194702, 'eval_accuracy': 0.5869786368260427, 'eval_runtime': 3.9818, 'eval_samples_per_second': 493.749, 'eval_steps_per_second': 30.891, 'epoch': 4.0}
{'loss': 0.3119, 'learning_rate': 1.591280653950954e-05, 'epoch': 4.09}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 1.5168319940567017, 'eval_accuracy': 0.58646998982706, 'eval_runtime': 3.9409, 'eval_samples_per_second': 498.865, 'eval_steps_per_second': 31.211, 'epoch': 5.0}
{'loss': 0.1501, 'learning_rate': 1.455040871934605e-05, 'epoch': 5.45}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 2.0460996627807617, 'eval_accuracy': 0.5813835198372329, 'eval_runtime': 3.957, 'eval_samples_per_second': 496.845, 'eval_steps_per_second': 31.084, 'epoch': 6.0}
{'loss': 0.0981, 'learning_rate': 1.3188010899182562e-05, 'epoch': 6.81}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 2.257493019104004, 'eval_accuracy': 0.5859613428280773, 'eval_runtime': 4.1057, 'eval_samples_per_second': 478.847, 'eval_steps_per_second': 29.958, 'epoch': 7.0}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 2.6288998126983643, 'eval_accuracy': 0.5803662258392676, 'eval_runtime': 4.0422, 'eval_samples_per_second': 486.366, 'eval_steps_per_second': 30.429, 'epoch': 8.0}
{'loss': 0.0647, 'learning_rate': 1.1825613079019073e-05, 'epoch': 8.17}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 2.8078842163085938, 'eval_accuracy': 0.5844354018311292, 'eval_runtime': 4.0148, 'eval_samples_per_second': 489.688, 'eval_steps_per_second': 30.637, 'epoch': 9.0}
{'loss': 0.0436, 'learning_rate': 1.0463215258855586e-05, 'epoch': 9.54}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 2.981097936630249, 'eval_accuracy': 0.5824008138351984, 'eval_runtime': 4.2146, 'eval_samples_per_second': 466.474, 'eval_steps_per_second': 29.184, 'epoch': 10.0}
{'loss': 0.0271, 'learning_rate': 9.1008174386921e-06, 'epoch': 10.9}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.0668563842773438, 'eval_accuracy': 0.5874872838250255, 'eval_runtime': 4.0719, 'eval_samples_per_second': 482.817, 'eval_steps_per_second': 30.207, 'epoch': 11.0}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.120116710662842, 'eval_accuracy': 0.5834181078331638, 'eval_runtime': 3.9886, 'eval_samples_per_second': 492.901, 'eval_steps_per_second': 30.838, 'epoch': 12.0}
{'loss': 0.0153, 'learning_rate': 7.73841961852861e-06, 'epoch': 12.26}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.1980180740356445, 'eval_accuracy': 0.5910478128179044, 'eval_runtime': 3.9748, 'eval_samples_per_second': 494.62, 'eval_steps_per_second': 30.945, 'epoch': 13.0}
{'loss': 0.0189, 'learning_rate': 6.376021798365123e-06, 'epoch': 13.62}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.2316672801971436, 'eval_accuracy': 0.5930824008138352, 'eval_runtime': 3.9451, 'eval_samples_per_second': 498.34, 'eval_steps_per_second': 31.178, 'epoch': 14.0}
{'loss': 0.0158, 'learning_rate': 5.013623978201635e-06, 'epoch': 14.99}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.2659456729888916, 'eval_accuracy': 0.5885045778229908, 'eval_runtime': 3.9305, 'eval_samples_per_second': 500.191, 'eval_steps_per_second': 31.294, 'epoch': 15.0}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.3859939575195312, 'eval_accuracy': 0.5874872838250255, 'eval_runtime': 35.9971, 'eval_samples_per_second': 54.616, 'eval_steps_per_second': 3.417, 'epoch': 16.0}
{'loss': 0.0122, 'learning_rate': 3.6512261580381475e-06, 'epoch': 16.35}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.330221176147461, 'eval_accuracy': 0.5956256358087487, 'eval_runtime': 4.0028, 'eval_samples_per_second': 491.157, 'eval_steps_per_second': 30.729, 'epoch': 17.0}
{'loss': 0.0107, 'learning_rate': 2.2888283378746596e-06, 'epoch': 17.71}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.3341147899627686, 'eval_accuracy': 0.5946083418107834, 'eval_runtime': 4.0455, 'eval_samples_per_second': 485.969, 'eval_steps_per_second': 30.404, 'epoch': 18.0}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.404203414916992, 'eval_accuracy': 0.5930824008138352, 'eval_runtime': 3.9481, 'eval_samples_per_second': 497.961, 'eval_steps_per_second': 31.154, 'epoch': 19.0}
{'loss': 0.0078, 'learning_rate': 9.264305177111717e-07, 'epoch': 19.07}


  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 3.3940141201019287, 'eval_accuracy': 0.5925737538148524, 'eval_runtime': 4.054, 'eval_samples_per_second': 484.954, 'eval_steps_per_second': 30.34, 'epoch': 20.0}
{'train_runtime': 1236.5448, 'train_samples_per_second': 94.91, 'train_steps_per_second': 5.936, 'train_loss': 0.13999554968010178, 'epoch': 20.0}


events.out.tfevents.1704278749.Dimitriss-MacBook-Pro.local.3889.0:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/J1mb0o/semantic-bert-imbalanced-dataset/commit/82679964d3339e08595b5d33dc878eb80d105584', commit_message='End of training', commit_description='', oid='82679964d3339e08595b5d33dc878eb80d105584', pr_url=None, pr_revision=None, pr_num=None)

# Finetuning on balanced dataset

In [16]:
train = pd.read_csv(os.path.join("data-balance", "train-balance.csv"))   
val = pd.read_csv(os.path.join("data-balance", "dev-balance.csv"))

In [17]:
train_dataset = Dataset.from_pandas(train)
val_dataset = Dataset.from_pandas(val)

In [18]:
dataset_dict = DatasetDict({'train': train_dataset, 'validation': val_dataset})

In [19]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['tweet', 'label'],
        num_rows: 2550
    })
    validation: Dataset({
        features: ['tweet', 'label'],
        num_rows: 1173
    })
})

In [20]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [21]:
def preprocess_function(examples):
    return tokenizer(examples["tweet"], truncation=True)

In [22]:
tokenized_twitter = dataset_dict.map(preprocess_function, batched=True)

Map:   0%|          | 0/2550 [00:00<?, ? examples/s]

Map:   0%|          | 0/1173 [00:00<?, ? examples/s]

In [23]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [24]:
import evaluate

accuracy = evaluate.load("accuracy")

In [25]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [26]:
id2label = {0: "negative", 1: "neutral", 2: "positive"}
label2id = {"negative": 0, "neutral": 1, "positive": 2}

In [27]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=3, id2label=id2label, label2id=label2id)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:

training_args = TrainingArguments(
    output_dir="semantic-bert-balanced-dataset",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=True,
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_twitter["train"],
    eval_dataset=tokenized_twitter["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()
trainer.push_to_hub()

  0%|          | 0/3200 [00:00<?, ?it/s]

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 0.8837119340896606, 'eval_accuracy': 0.5745950554134698, 'eval_runtime': 2.5222, 'eval_samples_per_second': 465.069, 'eval_steps_per_second': 29.339, 'epoch': 1.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 0.9415038824081421, 'eval_accuracy': 0.5490196078431373, 'eval_runtime': 2.1754, 'eval_samples_per_second': 539.204, 'eval_steps_per_second': 34.016, 'epoch': 2.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 1.0333811044692993, 'eval_accuracy': 0.56692242114237, 'eval_runtime': 2.1589, 'eval_samples_per_second': 543.322, 'eval_steps_per_second': 34.276, 'epoch': 3.0}
{'loss': 0.7136, 'learning_rate': 1.6875e-05, 'epoch': 3.12}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 1.1917176246643066, 'eval_accuracy': 0.566069906223359, 'eval_runtime': 2.1578, 'eval_samples_per_second': 543.615, 'eval_steps_per_second': 34.295, 'epoch': 4.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 1.3571304082870483, 'eval_accuracy': 0.578005115089514, 'eval_runtime': 2.1496, 'eval_samples_per_second': 545.674, 'eval_steps_per_second': 34.424, 'epoch': 5.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 1.6460908651351929, 'eval_accuracy': 0.577152600170503, 'eval_runtime': 2.1459, 'eval_samples_per_second': 546.621, 'eval_steps_per_second': 34.484, 'epoch': 6.0}
{'loss': 0.2277, 'learning_rate': 1.375e-05, 'epoch': 6.25}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 2.1102819442749023, 'eval_accuracy': 0.5532821824381927, 'eval_runtime': 2.1489, 'eval_samples_per_second': 545.854, 'eval_steps_per_second': 34.436, 'epoch': 7.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 2.3828601837158203, 'eval_accuracy': 0.5583972719522592, 'eval_runtime': 2.1507, 'eval_samples_per_second': 545.406, 'eval_steps_per_second': 34.408, 'epoch': 8.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 2.4821300506591797, 'eval_accuracy': 0.5618073316283035, 'eval_runtime': 2.1484, 'eval_samples_per_second': 545.992, 'eval_steps_per_second': 34.445, 'epoch': 9.0}
{'loss': 0.0617, 'learning_rate': 1.0625e-05, 'epoch': 9.38}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 2.75492262840271, 'eval_accuracy': 0.5370843989769821, 'eval_runtime': 2.1533, 'eval_samples_per_second': 544.74, 'eval_steps_per_second': 34.366, 'epoch': 10.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 2.8267455101013184, 'eval_accuracy': 0.5498721227621484, 'eval_runtime': 2.1821, 'eval_samples_per_second': 537.549, 'eval_steps_per_second': 33.912, 'epoch': 11.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 2.902785301208496, 'eval_accuracy': 0.5490196078431373, 'eval_runtime': 2.1929, 'eval_samples_per_second': 534.899, 'eval_steps_per_second': 33.745, 'epoch': 12.0}
{'loss': 0.0242, 'learning_rate': 7.500000000000001e-06, 'epoch': 12.5}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 2.984478235244751, 'eval_accuracy': 0.546462063086104, 'eval_runtime': 2.1497, 'eval_samples_per_second': 545.645, 'eval_steps_per_second': 34.423, 'epoch': 13.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 3.012620687484741, 'eval_accuracy': 0.5541346973572038, 'eval_runtime': 2.1452, 'eval_samples_per_second': 546.795, 'eval_steps_per_second': 34.495, 'epoch': 14.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 3.0790586471557617, 'eval_accuracy': 0.5490196078431373, 'eval_runtime': 2.1559, 'eval_samples_per_second': 544.09, 'eval_steps_per_second': 34.325, 'epoch': 15.0}
{'loss': 0.0086, 'learning_rate': 4.3750000000000005e-06, 'epoch': 15.62}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 3.0979597568511963, 'eval_accuracy': 0.5498721227621484, 'eval_runtime': 2.1772, 'eval_samples_per_second': 538.76, 'eval_steps_per_second': 33.988, 'epoch': 16.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 3.156381607055664, 'eval_accuracy': 0.545609548167093, 'eval_runtime': 2.1509, 'eval_samples_per_second': 545.344, 'eval_steps_per_second': 34.404, 'epoch': 17.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 3.172257661819458, 'eval_accuracy': 0.5498721227621484, 'eval_runtime': 2.1509, 'eval_samples_per_second': 545.349, 'eval_steps_per_second': 34.404, 'epoch': 18.0}
{'loss': 0.0048, 'learning_rate': 1.25e-06, 'epoch': 18.75}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 3.179137945175171, 'eval_accuracy': 0.5473145780051151, 'eval_runtime': 2.1548, 'eval_samples_per_second': 544.356, 'eval_steps_per_second': 34.341, 'epoch': 19.0}


  0%|          | 0/74 [00:00<?, ?it/s]

{'eval_loss': 3.1861555576324463, 'eval_accuracy': 0.5447570332480819, 'eval_runtime': 2.1511, 'eval_samples_per_second': 545.31, 'eval_steps_per_second': 34.401, 'epoch': 20.0}
{'train_runtime': 566.6488, 'train_samples_per_second': 90.003, 'train_steps_per_second': 5.647, 'train_loss': 0.1629483714699745, 'epoch': 20.0}


events.out.tfevents.1704280023.Dimitriss-MacBook-Pro.local.3889.1:   0%|          | 0.00/12.2k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/J1mb0o/semantic-bert-balanced-dataset/commit/34a05ece7a91c7dc05d35cb6e14bb7e2c323179f', commit_message='End of training', commit_description='', oid='34a05ece7a91c7dc05d35cb6e14bb7e2c323179f', pr_url=None, pr_revision=None, pr_num=None)

# Random Forest

In [83]:
import os
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

In [84]:
train_dataset = pd.read_csv(os.path.join("data-imbalance", "train-imbalance.csv"))

In [85]:
X_train, y_train = train_dataset["tweet"], train_dataset["label"]

In [88]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
text_clf = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', RandomForestClassifier(n_estimators=200, random_state=42)),
])

In [89]:
text_clf.fit(X_train, y_train)

# Eval random forest

In [103]:
import evaluate
def model_evaluation(predicted_class=None, true_class=None):
    if predicted_class is None or true_class is None:
        raise ValueError("predicted and true must be not None")
    
    accuracy = evaluate.load("accuracy")
    acc = accuracy.compute(predictions=predicted_class, references=true_class, )
    
    precision = evaluate.load("precision")
    prec = precision.compute(predictions=predicted_class, references=true_class, average=None)
    prec_weigted = precision.compute(predictions=predicted_class, references=true_class, average="macro")

    recall = evaluate.load("recall")
    rec = recall.compute(predictions=predicted_class, references=true_class, average=None)
    rec_weigted = recall.compute(predictions=predicted_class, references=true_class, average="macro")

    f1 = evaluate.load("f1")
    calc_f1 = f1.compute(predictions=predicted_class, references=true_class, average=None)
    f1_weighted = f1.compute(predictions=predicted_class, references=true_class, average="macro")
    return acc, prec, prec_weigted, rec, rec_weigted, calc_f1, f1_weighted
    # return acc

## Unbalanced test set

In [104]:
test_set_unb = pd.read_csv(os.path.join("data-imbalance", "test-imbalance.csv"))
X_test_unb, y_test_unb = test_set_unb["tweet"], test_set_unb["label"]

In [105]:
predicted_unb = text_clf.predict(X_test_unb)

In [106]:
unbalanced_eval = model_evaluation(predicted_unb, y_test_unb)

In [111]:
unbalanced_eval

({'accuracy': 0.5215},
 {'precision': array([0.66666667, 0.45493562, 0.53004535])},
 {'precision': 0.550549213486062},
 {'recall': array([0.00615385, 0.15565345, 0.94064386])},
 {'recall': 0.36748372004685215},
 {'f1': array([0.01219512, 0.23194748, 0.67802756])},
 {'f1': 0.3073900539133287})

In [110]:
import pandas as pd

## Balanced

In [107]:
test_set_bal = pd.read_csv(os.path.join("data-balance", "test-balance.csv"))
X_test_bal, y_test_bal = test_set_bal["tweet"], test_set_bal["label"]

In [108]:
balanced_predicted = text_clf.predict(X_test_bal)

In [109]:
balanced_eval = model_evaluation(balanced_predicted, y_test_bal)

## Export results to a dataframe

In [112]:
resutls_df = pd.DataFrame(columns=["Model", 
                      "Accuracy", 
                      "Pre_{Negative}", 
                      "Pre_{Neu}", 
                      "Pre_{Pos}", 
                      "Pre_{Weighted}", 
                      "Rec_{Negative}", 
                      "Rec_{Neu}", 
                      "Rec_{Pos}", 
                      "Rec_{Weighted}", 
                      "F1_{Negative}", 
                      "F1_{Neu}", 
                      "F1_{Pos}", 
                      "F1_{Weighted}"],
                      data=[["Random Forest (unbalanced)", 
                              unbalanced_eval[0]["accuracy"], 
                              unbalanced_eval[1]["precision"][0], 
                              unbalanced_eval[1]["precision"][1],
                              unbalanced_eval[1]["precision"][2],
                              unbalanced_eval[2]["precision"],
                              unbalanced_eval[3]["recall"][0],
                              unbalanced_eval[3]["recall"][1],
                              unbalanced_eval[3]["recall"][2],
                              unbalanced_eval[4]["recall"], 
                              unbalanced_eval[5]["f1"][0],
                              unbalanced_eval[5]["f1"][1],
                              unbalanced_eval[5]["f1"][2],
                              unbalanced_eval[6]["f1"]],
                              ["Random Forest (balanced)",
                              balanced_eval[0]["accuracy"],
                              balanced_eval[1]["precision"][0],
                              balanced_eval[1]["precision"][1],
                              balanced_eval[1]["precision"][2],
                              balanced_eval[2]["precision"],
                              balanced_eval[3]["recall"][0],
                              balanced_eval[3]["recall"][1],
                              balanced_eval[3]["recall"][2],
                              balanced_eval[4]["recall"],
                              balanced_eval[5]["f1"][0],
                              balanced_eval[5]["f1"][1],
                              balanced_eval[5]["f1"][2],
                              balanced_eval[6]["f1"]],
                                                  
                        ]
                      )

In [115]:
resutls_df.round(4)

Unnamed: 0,Model,Accuracy,Pre_{Negative},Pre_{Neu},Pre_{Pos},Pre_{Weighted},Rec_{Negative},Rec_{Neu},Rec_{Pos},Rec_{Weighted},F1_{Negative},F1_{Neu},F1_{Pos},F1_{Weighted}
0,Random Forest (unbalanced),0.5215,0.6667,0.4549,0.53,0.5505,0.0062,0.1557,0.9406,0.3675,0.0122,0.2319,0.678,0.3074
1,Random Forest (balanced),0.3631,0.6667,0.3605,0.3624,0.4632,0.0062,0.1631,0.92,0.3631,0.0122,0.2246,0.52,0.2523


In [114]:
resutls_df.to_latex("random_forest.tex", index=False, float_format="%.4f")