In [1]:
  from google.colab import drive
  # Mount the Google Drive to access files stored there
  drive.mount('/content/drive')

  # Install the latest version of torchtext library quietly without showing output
  !pip install torchtext -qq
  !pip install transformers evaluate wandb datasets accelerate -U -qq ## NEW LINES ##
  basepath = '/content/drive/MyDrive/data/'

Mounted at /content/drive
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m96.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[2K 

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path


# New libraries introduced in this notebook
import evaluate
from datasets import load_dataset, DatasetDict, Dataset, Value, ClassLabel, Features
from transformers import TrainingArguments, Trainer, Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
from transformers import AutoTokenizer, DistilBertTokenizer
from transformers import AutoModelForSequenceClassification,  DistilBertModel, AutoModelForSeq2SeqLM
from transformers import AutoConfig, DistilBertConfig
from transformers import pipeline
from transformers import EvalPrediction
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
import wandb
import torch

In [3]:
base_folder = Path(basepath)
data_folder = base_folder/'datasets'
model_folder = base_folder/'models'
custom_functions = base_folder/'custom-functions'
model_folder.mkdir(exist_ok=True, parents = True)

# Load Data

In [4]:
train_df = pd.read_csv(data_folder/'train_twitter.csv')

test_df = pd.read_csv(data_folder/'test_twitter.csv')

In [5]:
train_df.head()

Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,2017-21441,“Worry is a down payment on a problem you may ...,0,1,0,0,0,0,1,0,0,0,1
1,2017-31535,Whatever you decide to do make sure it makes y...,0,0,0,0,1,1,1,0,0,0,0
2,2017-21068,@Max_Kellerman it also helps that the majorit...,1,0,1,0,1,0,1,0,0,0,0
3,2017-31436,Accept the challenges so that you can literall...,0,0,0,0,1,0,1,0,0,0,0
4,2017-22195,My roommate: it's okay that we can't spell bec...,1,0,1,0,0,0,0,0,0,0,0


In [6]:
test_df.head()

Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,2018-01559,@Adnan__786__ @AsYouNotWish Dont worry Indian ...,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
1,2018-03739,"Academy of Sciences, eschews the normally sobe...",NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
2,2018-00385,I blew that opportunity -__- #mad,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
3,2018-03001,This time in 2 weeks I will be 30... 😥,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
4,2018-01988,#Deppression is real. Partners w/ #depressed p...,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE


In [7]:
labels = [label for label in train_df.columns if label not in ["Tweet","ID"]]
id2labels = {idx:label for idx, label in enumerate(labels)}
labels2id = {v:k for k,v in id2labels.items()}
labels2id

{'anger': 0,
 'anticipation': 1,
 'disgust': 2,
 'fear': 3,
 'joy': 4,
 'love': 5,
 'optimism': 6,
 'pessimism': 7,
 'sadness': 8,
 'surprise': 9,
 'trust': 10}

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
train_set, valid_set = train_test_split(train_df, test_size=0.2, random_state=42)

In [10]:
train_text = train_set['Tweet'].tolist()
train_label = train_set.drop(columns=['ID','Tweet']).to_numpy().astype(float)
valid_text = valid_set['Tweet'].tolist()
valid_label = valid_set.drop(columns=['ID','Tweet']).to_numpy().astype(float)

In [11]:
train_text

["Going to get myself a copy of @StephenKing's CUJO for an upcoming project that I can't talk about just yet.  #amwriting",
 "@carysmithwriter @Maria_Savva @RealRockAndRoll We're the least known band in the World, but so glad you asked #muchlove ",
 'Unruly kids at 8am in the morning #nothanks ripping the flower beds up by the roots while their parents watch #shocking',
 "Ok but I just got called a 'White Devil' on the train and I didnt know whether to laugh or be offended",
 '@SXMUrbanView @karenhunter @CousinSyl you are so wrong for this!needed levity after that recording',
 "Even a pencil✏ never #stayed  with me until it's #end ⚫ 😞",
 "@pottermore : I can't find my patronus, the website doesn't work, I can't even see the questions.... #sadness...",
 "Pride and Prejudice is a modern day Keeping up with the Kardashians' - @howsyourdaybin .............I've never been so offended in my life",
 'Wish I could convince the rest of my children to go to their fucking preschool and elementary

In [12]:
train_label

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 1., 0.],
       ...,
       [1., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [13]:
test_text = test_df['Tweet'].tolist()
test_label = test_df.drop(columns=['ID','Tweet']).to_numpy()
test_label = np.where(test_label == "NONE",0,test_label).astype(float)

In [14]:
test_text

['@Adnan__786__ @AsYouNotWish Dont worry Indian army is on its ways to dispatch all Terrorists to Hell',
 'Academy of Sciences, eschews the normally sober tone of scientific papers and calls the massive loss of wildlife a “biological annihilation',
 'I blew that opportunity -__- #mad',
 'This time in 2 weeks I will be 30... 😥',
 '#Deppression is real. Partners w/ #depressed people truly dont understand the depth in which they affect us. Add in #anxiety &amp;makes it worse',
 '@POLITICOEurope Interesting choice of words... Are you confirming that governments fund #terrorism? Bit of an open door, but still...',
 '@ananavarro CNN should, for sure 😀',
 'Distance yourself once stretched by your friends impose! #serious #loveyou #notseriously',
 'Be happy. Be confident. Be kind.\\n\\n #KissablesLoveSMShopmag\\nAllOutDenimFor KISSMARC',
 'My visit to hospital for care triggered #trauma from accident 20+yrs ago and image of my dead brother in it. Feeling symptoms of #depression',
 "Thanks to D

In [15]:
test_label

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [16]:
(len(train_text), train_label.shape, len(valid_text), valid_label.shape, len(test_text), test_label.shape)

(6179, (6179, 11), 1545, (1545, 11), 3259, (3259, 11))

In [17]:
type(train_text)

list

# Create Huggingface DataSet

In [18]:
trainset = Dataset.from_dict({
    'texts': train_text,
    'labels': train_label
})

validset = Dataset.from_dict({
    'texts': valid_text,
    'labels': valid_label
})

testset = Dataset.from_dict({
    'texts': test_text,
    'labels': test_label
})

In [19]:
trainset

Dataset({
    features: ['texts', 'labels'],
    num_rows: 6179
})

In [20]:
trainset.features

{'texts': Value(dtype='string', id=None),
 'labels': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}

In [21]:
trainset.features['labels']

Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)

In [22]:
trainset

Dataset({
    features: ['texts', 'labels'],
    num_rows: 6179
})

In [23]:
trainset.features

{'texts': Value(dtype='string', id=None),
 'labels': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}

In [24]:
trainset[4]

{'texts': '@SXMUrbanView @karenhunter @CousinSyl you are so wrong for this!needed levity after that recording',
 'labels': [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}

In [25]:
trainset[4]['texts']

'@SXMUrbanView @karenhunter @CousinSyl you are so wrong for this!needed levity after that recording'

In [26]:
trainset[4]['labels']

[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

# Load pre-trained Tokenizer

In [27]:
checkpoint = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

# Create function for Tokenizer

In [28]:
max_length = 128

In [29]:
def tokenize_fn(batch):
    return tokenizer(text = batch["texts"], truncation=True, padding="max_length", return_tensors="pt",max_length = 128)

In [30]:
tokenized_traindataset= trainset.map(tokenize_fn, batched=True)
tokenized_validdataset= validset.map(tokenize_fn, batched=True)
tokenized_testdataset= testset.map(tokenize_fn, batched=True)

Map:   0%|          | 0/6179 [00:00<?, ? examples/s]

Map:   0%|          | 0/1545 [00:00<?, ? examples/s]

Map:   0%|          | 0/3259 [00:00<?, ? examples/s]

In [31]:
tokenized_traindataset

Dataset({
    features: ['texts', 'labels', 'input_ids', 'attention_mask'],
    num_rows: 6179
})

In [32]:
tokenized_traindataset = tokenized_traindataset.remove_columns(['texts'])
tokenized_validdataset = tokenized_validdataset.remove_columns(['texts'])
tokenized_testdataset = tokenized_testdataset.remove_columns(['texts'])

In [33]:
tokenized_traindataset.set_format(type='torch')
tokenized_validdataset.set_format(type='torch')
tokenized_testdataset.set_format(type='torch')

In [34]:
tokenized_traindataset[1]

{'labels': tensor([0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0.]),
 'input_ids': tensor([ 3320,    75,  1208, 16331, 12756,  3320,   329,  6286,   834,   134,
             9,   208,   900,  3320,  1649,   138, 23349,  7175, 29807,   101,
            31,    60,     8,   709,   801,  1928,    16,     8,  1150,     6,
            68,    78,  3755,    25,  1380,  1713,    51,  2295,  5850,    15,
             3,     1,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,   

In [35]:
tokenized_traindataset.features

{'labels': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

# Model Training

Download pre-trained model

In [36]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint,num_labels = 11,
                                              problem_type = "multi_label_classification", id2label = id2labels , label2id = labels2id
                                              )

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google/flan-t5-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Compute Metrics

In [37]:
#sklearn
def metrics(predictions, labels, threshold=0.5):
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    y_true = labels
    f1_micro = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    f1_macro = f1_score(y_true=y_true, y_pred=y_pred, average='macro')
    accuracy = accuracy_score(y_true, y_pred)
    metrics = {'f1_micro': f1_micro,'f1_macro': f1_macro,'Accuracy': accuracy}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,tuple) else p.predictions
    result = metrics(predictions=preds,labels=p.label_ids)
    return result

# Train Args


In [38]:
# Define the directory where model checkpoints will be saved
run_name = "HW6/flan-t5-base"
base_folder = Path(basepath)
model_folder = base_folder / "models"/run_name
# Create the directory if it doesn't exist
model_folder.mkdir(exist_ok=True, parents=True)

# Configure training parameters
training_args = TrainingArguments(

    num_train_epochs=5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,  # Apply L2 regularization to prevent overfitting
    learning_rate=0.00001,
    lr_scheduler_type= 'linear',# Step size for the optimizer during training
    optim='adamw_torch',  # Optimizer,
    remove_unused_columns=False,  # Remove unused columns from the dataset
    output_dir=str(model_folder),# Directory to save model checkpoints
    evaluation_strategy='steps',  # Evaluate model at specified step intervals
    eval_steps=50,  # Perform evaluation every 10 training steps
    save_strategy="steps",  # Save model checkpoint at specified step intervals
    save_steps=50,  # Save a model checkpoint every 10 training steps
    load_best_model_at_end=True,  # Reload the best model at the end of training
    save_total_limit=2,  # Retain only the best and the most recent model checkpoints
    # Use 'accuracy' as the metric to determine the best model
    metric_for_best_model="eval_f1_macro",
    greater_is_better=True,  # A model is 'better' if its accuracy is higher


    # Experiment logging configurations (commented out in this example)
    logging_strategy='steps',
    logging_steps=20,
    report_to='wandb',  # Log metrics and results to Weights & Biases platform
    run_name=run_name,  # Experiment name for Weights & Biases

    fp16=False,
    bf16=False,
    tf32=False,

)

# Initialize Trainer

In [39]:
# initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_traindataset,
    eval_dataset=tokenized_validdataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

In [40]:
!wandb login 75a22b5a5c4de4706fb1be6e842e13687283d10c
%env WANDB_PROJECT = nlp_course_spring_2024-HW

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
env: WANDB_PROJECT=nlp_course_spring_2024-HW


# Training and Validation

In [41]:
torch.cuda.empty_cache()

In [42]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33msamanojvan[0m ([33mmanojcompany[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,F1 Micro,F1 Macro,Accuracy
50,0.6296,0.590896,0.366017,0.151943,0.004531
100,0.547,0.517983,0.204501,0.077705,0.016828
150,0.5044,0.487575,0.013956,0.007714,0.022654
200,0.4904,0.481198,0.00599,0.003442,0.023948
250,0.4794,0.479891,0.002731,0.001596,0.024595
300,0.4737,0.478922,0.000548,0.000325,0.024595
350,0.4711,0.477093,0.001095,0.000646,0.024595
400,0.482,0.474404,0.001096,0.000607,0.023948
450,0.4592,0.472889,0.001643,0.000932,0.024595
500,0.47,0.472281,0.000548,0.000324,0.024595


Step,Training Loss,Validation Loss,F1 Micro,F1 Macro,Accuracy
50,0.6296,0.590896,0.366017,0.151943,0.004531
100,0.547,0.517983,0.204501,0.077705,0.016828
150,0.5044,0.487575,0.013956,0.007714,0.022654
200,0.4904,0.481198,0.00599,0.003442,0.023948
250,0.4794,0.479891,0.002731,0.001596,0.024595
300,0.4737,0.478922,0.000548,0.000325,0.024595
350,0.4711,0.477093,0.001095,0.000646,0.024595
400,0.482,0.474404,0.001096,0.000607,0.023948
450,0.4592,0.472889,0.001643,0.000932,0.024595
500,0.47,0.472281,0.000548,0.000324,0.024595


There were missing keys in the checkpoint model loaded: ['transformer.encoder.embed_tokens.weight', 'transformer.decoder.embed_tokens.weight'].


TrainOutput(global_step=3865, training_loss=0.39828493841869556, metrics={'train_runtime': 2335.4137, 'train_samples_per_second': 13.229, 'train_steps_per_second': 1.655, 'total_flos': 4717652405948160.0, 'train_loss': 0.39828493841869556, 'epoch': 5.0})

In [43]:
eval_results = trainer.evaluate(tokenized_validdataset)

In [44]:
wandb.log({"eval_accuracy": eval_results["eval_Accuracy"], "eval_loss": eval_results["eval_loss"], "eval_f1_micro": eval_results["eval_f1_micro"], "eval_f1_macro": eval_results["eval_f1_macro"]})

Check best points

In [45]:
# After training, let us check the best checkpoint
# We need this for Inference
best_model_checkpoint_step = trainer.state.best_model_checkpoint.split('-')[-1]
print(f"The best model was saved at step {best_model_checkpoint_step}.")

The best model was saved at step 3750.


# Performance on Test set

In [46]:
checkpoint = str(model_folder/f'checkpoint-{best_model_checkpoint_step}')
checkpoint

'/content/drive/MyDrive/data/models/HW6/flan-t5-base/checkpoint-3750'

In [47]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

## Training Arguments

In [48]:
training_args = TrainingArguments(
    output_dir=str(model_folder),
    per_device_eval_batch_size=8,
    do_train=False,
    do_eval=True,
    report_to='wandb',
    run_name=run_name
)

Instantiate Trainer

In [49]:
trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_testdataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

Evaluate using Trainer

In [50]:
test_results= trainer.evaluate()

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [51]:
test_results

{'eval_loss': 0.39368724822998047,
 'eval_f1_micro': 0.0,
 'eval_f1_macro': 0.0,
 'eval_Accuracy': 0.08530223995090519,
 'eval_runtime': 27.5569,
 'eval_samples_per_second': 118.264,
 'eval_steps_per_second': 14.806}

In [52]:
wandb.log({"test_accuracy": test_results["eval_Accuracy"], "test_loss": test_results["eval_loss"], "eval_f1_micro": test_results["eval_f1_micro"], "eval_f1_macro": test_results["eval_f1_macro"]})

In [53]:
test_output = trainer.predict(tokenized_testdataset)


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [54]:
test_preds = test_output.predictions
test_preds = torch.tensor(test_preds[0])
test_labels = np.array(test_output.label_ids)


In [55]:
wandb.finish()

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Accuracy,▁▂▂▂▂▂▂▂▂▃▅▆▇▇▇▇▇▇▇▇▇▇▇████████████████▄
eval/f1_macro,▄▁▁▁▁▁▁▁▁▃▅▇▇▇▇▇▇▇▇▇▇▇▇████████████████▁
eval/f1_micro,▅▁▁▁▁▁▁▁▂▃▆▇▇██████████████████████████▁
eval/loss,█▅▅▅▅▄▄▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,▆█▇█▇▇█▇▇▇▇▇▇▇▆▇▅▆▇▇█▇█▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▁
eval/steps_per_second,▆█▇█▇▇█▇▇▇▇▇▇▇▆▇▅▆▇▇█▇█▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▁
eval_accuracy,▁
eval_f1_macro,█▁
eval_f1_micro,█▁

0,1
eval/Accuracy,0.0853
eval/f1_macro,0.0
eval/f1_micro,0.0
eval/loss,0.39369
eval/runtime,27.5569
eval/samples_per_second,118.264
eval/steps_per_second,14.806
eval_accuracy,0.19159
eval_f1_macro,0.0
eval_f1_micro,0.0


In [56]:
test_output

PredictionOutput(predictions=(array([[ 0.79194075, -2.3183415 ,  0.60500836, ..., -0.5822684 ,
        -3.513043  , -4.409068  ],
       [ 0.88613594, -2.3772352 ,  0.8157621 , ..., -0.41360116,
        -4.1619635 , -4.344741  ],
       [ 0.15404564, -2.796532  ,  0.3780688 , ...,  0.776625  ,
        -3.2200775 , -4.4652667 ],
       ...,
       [ 2.4341097 , -2.784812  ,  2.404312  , ..., -0.72216177,
        -3.7629147 , -4.3830624 ],
       [-3.6523936 , -1.7881359 , -3.75497   , ..., -3.1516817 ,
        -2.8084593 , -2.4587057 ],
       [-3.1786523 , -1.2241937 , -3.1846263 , ..., -2.8429413 ,
        -3.129721  , -2.4383929 ]], dtype=float32), array([[[-8.27625841e-02,  1.55977607e-01, -1.07212998e-01, ...,
          1.24324612e-01,  9.20354500e-02, -5.63262366e-02],
        [-2.74613965e-02,  1.24689236e-01, -2.55814463e-01, ...,
          9.15213972e-02,  1.22662395e-01, -2.02016801e-01],
        [-7.65478089e-02,  6.68384433e-02, -1.55148208e-01, ...,
          6.55510370e-03

In [57]:
test_output.predictions

(array([[ 0.79194075, -2.3183415 ,  0.60500836, ..., -0.5822684 ,
         -3.513043  , -4.409068  ],
        [ 0.88613594, -2.3772352 ,  0.8157621 , ..., -0.41360116,
         -4.1619635 , -4.344741  ],
        [ 0.15404564, -2.796532  ,  0.3780688 , ...,  0.776625  ,
         -3.2200775 , -4.4652667 ],
        ...,
        [ 2.4341097 , -2.784812  ,  2.404312  , ..., -0.72216177,
         -3.7629147 , -4.3830624 ],
        [-3.6523936 , -1.7881359 , -3.75497   , ..., -3.1516817 ,
         -2.8084593 , -2.4587057 ],
        [-3.1786523 , -1.2241937 , -3.1846263 , ..., -2.8429413 ,
         -3.129721  , -2.4383929 ]], dtype=float32),
 array([[[-8.27625841e-02,  1.55977607e-01, -1.07212998e-01, ...,
           1.24324612e-01,  9.20354500e-02, -5.63262366e-02],
         [-2.74613965e-02,  1.24689236e-01, -2.55814463e-01, ...,
           9.15213972e-02,  1.22662395e-01, -2.02016801e-01],
         [-7.65478089e-02,  6.68384433e-02, -1.55148208e-01, ...,
           6.55510370e-03,  2.584085

In [58]:
preds = test_output.predictions

In [59]:
preds = torch.tensor(preds[0])

predictions to probabilty

In [60]:
def conversion(predictions, threshold=0.25):
    probs = torch.sigmoid(torch.Tensor(predictions))
    return np.where(probs >= threshold, 1, 0)

In [61]:
test_pred_output = conversion(preds)

# Model Inference

In [62]:
checkpoint = str(model_folder/f'checkpoint-{best_model_checkpoint_step}')
custom_pipeline = pipeline(
    task="text-classification",
    model=checkpoint,
    tokenizer=checkpoint,
    device=0)

In [63]:
map_labels = []
for predictions in test_pred_output:
    map_row = [label for label, pred in zip(labels, predictions) if pred == 1]
    map_labels.append(map_row)

In [64]:
print(map_labels[0:5])

[['anger', 'disgust', 'sadness'], ['anger', 'disgust', 'sadness'], ['anger', 'disgust', 'sadness'], ['joy', 'sadness'], ['anger', 'disgust', 'fear', 'pessimism', 'sadness']]


In [65]:
test_df[['anger', 'anticipation', 'disgust', 'fear', 'joy', 'love',
       'optimism', 'pessimism', 'sadness', 'surprise', 'trust']] = test_pred_output

In [66]:
test_df.head()

Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,2018-01559,@Adnan__786__ @AsYouNotWish Dont worry Indian ...,1,0,1,0,0,0,0,0,1,0,0
1,2018-03739,"Academy of Sciences, eschews the normally sobe...",1,0,1,0,0,0,0,0,1,0,0
2,2018-00385,I blew that opportunity -__- #mad,1,0,1,0,0,0,0,0,1,0,0
3,2018-03001,This time in 2 weeks I will be 30... 😥,0,0,0,0,1,0,0,0,1,0,0
4,2018-01988,#Deppression is real. Partners w/ #depressed p...,1,0,1,1,0,0,0,1,1,0,0


In [67]:
test_df.drop(["Tweet"],axis =1, inplace = True)

In [68]:
test_df.to_csv(data_folder/"flan-test.csv")