# Import necessary packages

In [None]:
! pip install transformers
! pip install datasets
! pip install evaluate
! pip install emoji==0.6.0

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets
Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6
Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━

In [None]:
import numpy as np
import pandas as pd
import chardet
import csv
import emoji
import torch
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding
import evaluate
import argparse
import os
import sys
import random
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, load_metric
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch.nn as nn
from dataclasses import dataclass
from tqdm.auto import tqdm
from torch.optim import AdamW
from transformers import get_scheduler
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
import random

SEED = 595

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
set_seed(SEED)

In [None]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# Fill in the Google Drive path where you uploaded the assignment
# Example: If you create a EECS595 folder and put all the files under HW1 folder, then 'EECS595/HW1'

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = #
#           TODO: Change this to your project path            #
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = #

GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = "Colab_Notebooks/FinalProject"

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = #

In [None]:
GOOGLE_DRIVE_PATH = os.path.join("drive", "MyDrive", GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
sys.path.append(GOOGLE_DRIVE_PATH)

print(os.listdir(GOOGLE_DRIVE_PATH))

['stancedataset.zip', 'train.csv', 'test.csv', 'train.gsheet', 'sb (1).gsheet', 'sb.gsheet', 'sb.csv', 'hc_bertweet_num_epoch10_lr_3e-5_batch8', 'Untitled0.ipynb', 'Bertweetbs8lr3e-5ne10', 'Untitled1.ipynb', 'finalmodel2.ipynb', 'taskA.csv', 'integral', 'finalmodel3.ipynb', 'integral_best', 'Untitled2.ipynb', 'finalmodel_bertweet.ipynb']


In [None]:
os.chdir(GOOGLE_DRIVE_PATH)

# Load data

In [None]:
def load_dataframe(file_path):
  with open(file_path, 'rb') as f:
    result = chardet.detect(f.read())
    file_encoding = result['encoding']
  tweet = []
  target = []
  stance = []
  opinion = []
  sentiment = []
  # Open the CSV file using a with statement
  with open(file_path, 'r', encoding=file_encoding) as file:
    reader = csv.reader(file)

    # Process each row in the CSV file
    for row in reader:
    # Do something with the row, e.g., print it
      tweet.append(row[0])
      target.append(row[1])
      stance.append(row[2])
      opinion.append(row[3])
      sentiment.append(row[4])
  df = pd.DataFrame({'tweet': tweet, 'target': target, 'stance': stance, 'opinion': opinion, 'sentiment': sentiment})
  df=df.drop(0,axis=0).reset_index(drop=True)
  stance2label = {'FAVOR': 0, 'AGAINST': 1, 'NONE': 2}
  df['label'] = df['stance'].map(stance2label)
  return df

In [None]:
train = load_dataframe('train.csv')
test = load_dataframe('test.csv')

In [None]:
train.head()

Unnamed: 0,tweet,target,stance,opinion,sentiment,label
0,"@tedcruz And, #HandOverTheServer she wiped cle...",Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,1
1,Hillary is our best choice if we truly want to...,Hillary Clinton,FAVOR,1. The tweet explicitly expresses opinion abo...,pos,0
2,@TheView I think our country is ready for a fe...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,1
3,I just gave an unhealthy amount of my hard-ear...,Hillary Clinton,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,1
4,@PortiaABoulger Thank you for adding me to you...,Hillary Clinton,NONE,3. The tweet is not explicitly expressing opi...,pos,2


In [None]:
train.shape

(2914, 6)

In [None]:
test.head()

Unnamed: 0,tweet,target,stance,opinion,sentiment,label
0,He who exalts himself shall be humbled; a...,Atheism,AGAINST,1. The tweet explicitly expresses opinion abo...,pos,1
1,RT @prayerbullets: I remove Nehushtan -previou...,Atheism,AGAINST,1. The tweet explicitly expresses opinion abo...,other,1
2,@Brainman365 @heidtjj @BenjaminLives I have so...,Atheism,AGAINST,1. The tweet explicitly expresses opinion abo...,pos,1
3,#God is utterly powerless without Human interv...,Atheism,AGAINST,1. The tweet explicitly expresses opinion abo...,neg,1
4,@David_Cameron Miracles of #Multiculturalism...,Atheism,AGAINST,2. The tweet does NOT expresses opinion about ...,neg,1


In [None]:
test.shape

(1956, 6)

In [None]:
train.target.unique()

array(['Hillary Clinton', 'Legalization of Abortion', 'Atheism',
       'Climate Change is a Real Concern', 'Feminist Movement'],
      dtype=object)

In [None]:
test.target.unique()

array(['Atheism', 'Climate Change is a Real Concern', 'Feminist Movement',
       'Hillary Clinton', 'Legalization of Abortion', 'Donald Trump'],
      dtype=object)

# devide targets and get dataset

In [None]:
#training
Hillary = train[train['target'] == 'Hillary Clinton'].reset_index(drop=True)
Abortion = train[train['target'] == 'Legalization of Abortion'].reset_index(drop=True)
Atheism = train[train['target'] == 'Atheism'].reset_index(drop=True)
Climate = train[train['target'] == 'Climate Change is a Real Concern'].reset_index(drop=True)
Feminist = train[train['target'] == 'Feminist Movement'].reset_index(drop=True)
#testing
Hillary_test = test[test['target'] == 'Hillary Clinton'].reset_index(drop=True)
Abortion_test = test[test['target'] == 'Legalization of Abortion'].reset_index(drop=True)
Atheism_test = test[test['target'] == 'Atheism'].reset_index(drop=True)
Climate_test = test[test['target'] == 'Climate Change is a Real Concern'].reset_index(drop=True)
Feminist_test = test[test['target'] == 'Feminist Movement'].reset_index(drop=True)
Trump_test = test[test['target'] == 'Donald Trump'].reset_index(drop=True)

# Data set analysis

In [None]:
def analyse_dataset(df):
  #see shape
  print(f"Number of data points: {df.shape[0]}")
  print(df['stance'].value_counts())
  print(df['stance'].value_counts()/df.shape[0])
  print(df['stance'].value_counts()/df.shape[0])
  print(df['opinion'].value_counts())

In [None]:
def class_weight(df):
  class_counts = torch.bincount(torch.tensor(df['label']))
  print(class_counts)
  total_samples = len(df['label'])
  class_weights = max(class_counts) / class_counts.float()
  return class_weights

In [None]:
class_weight(Hillary)

tensor([118, 393, 178])


tensor([3.3305, 1.0000, 2.2079])

In [None]:
print('All training dataset:')
analyse_dataset(train)
print('Hillary:')
analyse_dataset(Hillary)
print('Abortion:')
analyse_dataset(Abortion)
print('Atheism:')
analyse_dataset(Atheism)
print('Climate:')
analyse_dataset(Climate)
print('Feminist:')
analyse_dataset(Feminist)

All training dataset:
Number of data points: 2914
AGAINST    1395
NONE        766
FAVOR       753
Name: stance, dtype: int64
AGAINST    0.478723
NONE       0.262869
FAVOR      0.258408
Name: stance, dtype: float64
AGAINST    0.478723
NONE       0.262869
FAVOR      0.258408
Name: stance, dtype: float64
1.  The tweet explicitly expresses opinion about the target, a part of the target, or an aspect of the target.                   1716
2. The tweet does NOT expresses opinion about the target but it HAS opinion about something or someone other than the target.    1024
3.  The tweet is not explicitly expressing opinion. (For example, the tweet is simply giving information.)                        174
Name: opinion, dtype: int64
Hillary:
Number of data points: 689
AGAINST    393
NONE       178
FAVOR      118
Name: stance, dtype: int64
AGAINST    0.570392
NONE       0.258345
FAVOR      0.171263
Name: stance, dtype: float64
AGAINST    0.570392
NONE       0.258345
FAVOR      0.171263
Name: stan

The dataset are all imbalanced, climate is the most imbalanced subdataset, we may think of using resampling technique to improve the performance of our model.

# Training:

In [None]:
from datasets import Dataset
import pandas as pd
def get_dataloader(df,tokenizer,batch_size,train=True):
  def tokenize_function(examples):
    return tokenizer(examples["tweet"], truncation=True)


  dataset = Dataset.from_pandas(df[['tweet','label']])

  tokenized_dataset = dataset.map(tokenize_function, batched=True)
  accepted_keys = ["input_ids", "attention_mask", "label"]
  for key in tokenized_dataset.features.keys():
    if key not in accepted_keys:
      tokenized_dataset = tokenized_dataset.remove_columns(key)
  tokenized_dataset.set_format("torch")
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
  if train==True:
    tokenized = tokenized_dataset.shuffle(seed=SEED)
    dataloader = DataLoader(tokenized, shuffle=True, batch_size=batch_size, collate_fn=data_collator,num_workers=2,worker_init_fn=lambda id:np.random.seed(id))
  else:
    tokenized = tokenized_dataset
    dataloader = DataLoader(tokenized, shuffle=False, batch_size=batch_size, collate_fn=data_collator,num_workers=2,worker_init_fn=lambda id:np.random.seed(id))

  return dataloader

In [None]:
def finetune(model, train_dataloader, num_epochs, learning_rate, class_weight=None):

    # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = #
    #                   TODO: Implementation                      #
    # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = #
    num_training_steps = num_epochs * len(train_dataloader)

    optimizer = AdamW(model.parameters(), lr=learning_rate)
    #############################
    criterion = nn.CrossEntropyLoss(weight=class_weight)
    lr_scheduler = get_scheduler(
        name="linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=num_training_steps)
    metric = evaluate.load("accuracy")
    progress_bar = tqdm(range(num_training_steps))

    for epoch in range(num_epochs):

        model.train()
        for batch in train_dataloader:
            batch = {key: value.to(device) for key, value in batch.items()}
            outputs = model(**batch)
            #loss = outputs.loss
            ##############################################3
            loss = criterion(outputs.logits, batch['labels'])
            loss.backward()

            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(1)
    return model

In [None]:
from sklearn.metrics import f1_score
def testing(model,test_dataloader,target):
  model.eval()
  all_predictions = []
  all_labels = []
  indexes = []
  with torch.no_grad():
    for batch in test_dataloader:
      # Unpack the inputs from our dataloader
      input_ids = batch["input_ids"].to(device)
      attention_mask = batch["attention_mask"].to(device)
      labels = batch["labels"].to(device)

      # Forward pass
      outputs = model(input_ids, attention_mask=attention_mask)
      logits = outputs.logits

      # Get predicted labels
      predictions = torch.argmax(logits, dim=1)

      # Append predictions and labels to the lists
      all_predictions.extend(predictions.cpu().numpy())
      all_labels.extend(labels.cpu().numpy())
  f1 = f1_score(all_labels, all_predictions, labels=[0,1,2],average=None).reshape(1,-1)
  f1_df = pd.DataFrame(f1,columns=['Favor','Against','None'],index=[target])
  f1_df['Average']=(f1_df['Favor']+f1_df['Against'])/2
  return all_predictions, all_labels, f1_df


In [None]:
import re
def decode_tokenizer(tokenizer,lst):
  original_tweet=[]
  substrings_to_remove = ["<s>", "</s>", "<pad>"]
  for i in lst:
    input_str = tokenizer.decode(i)
    processed_str = re.sub("|".join(map(re.escape, substrings_to_remove)), "", input_str)
    processed_str = re.sub(' +', ' ', processed_str).strip()
    original_tweet.append(processed_str)
  return original_tweet

In [None]:
def generate_dataloaders(batch_size,tokenizer):
  hc_train_dataloader = get_dataloader(Hillary,tokenizer,batch_size=batch_size)
  hc_test_dataloader = get_dataloader(Hillary_test,tokenizer,batch_size=batch_size,train=False)
  abortion_train_dataloader = get_dataloader(Abortion,tokenizer,batch_size=batch_size)
  abortion_test_dataloader = get_dataloader(Abortion_test,tokenizer,batch_size=batch_size,train=False)
  atheism_train_dataloader = get_dataloader(Atheism,tokenizer,batch_size=batch_size)
  atheism_test_dataloader = get_dataloader(Atheism_test,tokenizer,batch_size=batch_size,train=False)
  climate_train_dataloader = get_dataloader(Climate,tokenizer,batch_size=batch_size)
  climate_test_dataloader = get_dataloader(Climate_test,tokenizer,batch_size=batch_size,train=False)
  feminist_train_dataloader = get_dataloader(Feminist,tokenizer,batch_size=batch_size)
  feminist_test_dataloader = get_dataloader(Feminist_test,tokenizer,batch_size=batch_size,train=False)
  trump_test_dataloader = get_dataloader(Trump_test,tokenizer,batch_size=batch_size,train=False)
  all_train_dataloader = get_dataloader(train,tokenizer,batch_size=batch_size)
  all_test_dataloader = get_dataloader(test[test['target']!='Donald Trump'].reset_index(drop=True),tokenizer,batch_size=batch_size,train=False)

  data_loaders = {'Hillary':(hc_train_dataloader,hc_test_dataloader),'Abortion':(abortion_train_dataloader,abortion_test_dataloader),
           'Atheism':(atheism_train_dataloader,atheism_test_dataloader),'Climate':(climate_train_dataloader,climate_test_dataloader),
           'Feminist':(feminist_train_dataloader,feminist_test_dataloader),'Trump':trump_test_dataloader,'All':(all_train_dataloader,all_test_dataloader)}
  return data_loaders

In [None]:
def train_bertweet(dataloaders,num_epochs,learning_rate,divide=True):

  #initialize model, one for each target
  if divide == False:
    all_bertweet = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)
    all_bertweet.to(device)
    all_bertweet = finetune(all_bertweet,dataloaders['All'][0],num_epochs,learning_rate,class_weight(train).to(device))
    models = all_bertweet

    hc_prediction,hc_true_label,hc_f1=testing(all_bertweet,dataloaders['Hillary'][1],'Hillary')
    abortion_prediction,abortion_true_label,abortion_f1=testing(all_bertweet,dataloaders['Abortion'][1],'Abortion')
    atheism_prediction,atheism_true_label,atheism_f1=testing(all_bertweet,dataloaders['Atheism'][1],'Atheism')
    climate_prediction,climate_true_label,climate_f1=testing(all_bertweet,dataloaders['Climate'][1],'Climate')
    feminist_prediction,feminist_true_label,feminist_f1=testing(all_bertweet,dataloaders['Feminist'][1],'Feminist')

  else:
    hc_bertweet = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)
    hc_bertweet.to(device)
    abortion_bertweet = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)
    abortion_bertweet.to(device)
    atheism_bertweet = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)
    atheism_bertweet.to(device)
    climate_bertweet = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)
    climate_bertweet.to(device)
    feminist_bertweet = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)
    feminist_bertweet.to(device)
    #finetune
    hc_bertweet = finetune(hc_bertweet,dataloaders['Hillary'][0],num_epochs,learning_rate,class_weight(Hillary).to(device))
    abortion_bertweet = finetune(abortion_bertweet,dataloaders['Abortion'][0],num_epochs,learning_rate,class_weight(Abortion).to(device))
    atheism_bertweet = finetune(atheism_bertweet,dataloaders['Atheism'][0],num_epochs,learning_rate,class_weight(Atheism).to(device))
    climate_bertweet = finetune(climate_bertweet,dataloaders['Climate'][0],num_epochs,learning_rate,class_weight(Climate).to(device))
    feminist_bertweet = finetune(feminist_bertweet,dataloaders['Feminist'][0],num_epochs,learning_rate,class_weight(Feminist).to(device))
    models = {'Hillary':hc_bertweet,'Abortion':abortion_bertweet,
           'Atheism':atheism_bertweet,'Climate':climate_bertweet,
           'Feminist':feminist_bertweet}
    #testing
    hc_prediction,hc_true_label,hc_f1=testing(hc_bertweet,dataloaders['Hillary'][1],'Hillary')
    abortion_prediction,abortion_true_label,abortion_f1=testing(abortion_bertweet,dataloaders['Abortion'][1],'Abortion')
    atheism_prediction,atheism_true_label,atheism_f1=testing(atheism_bertweet,dataloaders['Atheism'][1],'Atheism')
    climate_prediction,climate_true_label,climate_f1=testing(climate_bertweet,dataloaders['Climate'][1],'Climate')
    feminist_prediction,feminist_true_label,feminist_f1=testing(feminist_bertweet,dataloaders['Feminist'][1],'Feminist')

  results = {'Hillary':[hc_prediction,hc_true_label,hc_f1],'Abortion':[abortion_prediction,abortion_true_label,abortion_f1],
        'Atheism':[atheism_prediction,atheism_true_label,atheism_f1],'Climate':[climate_prediction,climate_true_label,climate_f1],
        'Feminist':[feminist_prediction,feminist_true_label,feminist_f1]}
  print(hc_f1)
  print(abortion_f1)
  print(atheism_f1)
  print(climate_f1)
  print(feminist_f1)
  all_label = pd.DataFrame(np.concatenate((hc_true_label,abortion_true_label,atheism_true_label,climate_true_label,feminist_true_label),axis=0),columns=['label'])
  all_prediction = pd.DataFrame(np.concatenate((hc_prediction,abortion_prediction,atheism_prediction,climate_prediction,feminist_prediction),axis=0),columns=['prediction'])
  f1 = f1_score(all_label, all_prediction, average=None).reshape(1,-1)
  f1_df = pd.DataFrame(f1,columns=['Favor','Against','None'],index=['Overall'])
  score = (f1_df['Favor']+f1_df['Against'])/2
  f1_df['Average']=score
  print(f1_df)
  print(score)
  results['Overall']=[all_prediction,all_label,f1_df]

  return models,score,results

In [None]:
def get_opinion_result(results):
  taskA=pd.concat([Hillary_test,Abortion_test,Atheism_test,Climate_test,Feminist_test],axis=0).reset_index(drop=True)
  taskA['opinion_key']=taskA['opinion'].apply(lambda x: x[0])
  taskA['prediction']=results['Overall'][0]
  opinion_target=taskA[taskA['opinion_key']=='1'].reset_index(drop=True)
  not_opinion_target=taskA[taskA['opinion_key']=='2'].reset_index(drop=True)
  f1_opinion=f1_score(opinion_target['label'],opinion_target['prediction'],average=None).reshape(1,-1)
  f1_opinion_df = pd.DataFrame(f1_opinion,columns=['Favor','Against','None'],index=['Opinion'])
  f1_opinion_df['Average']=(f1_opinion_df['Favor']+f1_opinion_df['Against'])/2
  f1_not_opinion=f1_score(not_opinion_target['label'],not_opinion_target['prediction'],average=None).reshape(1,-1)
  f1_not_opinion_df = pd.DataFrame(f1_not_opinion,columns=['Favor','Against','None'],index=['not_opinion'])
  f1_not_opinion_df['Average']=(f1_not_opinion_df['Favor']+f1_not_opinion_df['Against'])/2
  print(f1_opinion_df)
  print(f1_not_opinion_df)
  return f1_opinion_df,f1_not_opinion_df

In [None]:
def final_result(models,dataloaders,divided=True):
  #testing
  if divided==True:
    hc_prediction,hc_true_label,hc_f1=testing(models['Hillary'],dataloaders['Hillary'][1],'Hillary')
    abortion_prediction,abortion_true_label,abortion_f1=testing(models['Abortion'],dataloaders['Abortion'][1],'Abortion')
    atheism_prediction,atheism_true_label,atheism_f1=testing(models['Atheism'],dataloaders['Atheism'][1],'Atheism')
    climate_prediction,climate_true_label,climate_f1=testing(models['Climate'],dataloaders['Climate'][1],'Climate')
    feminist_prediction,feminist_true_label,feminist_f1=testing(models['Feminist'],dataloaders['Feminist'][1],'Feminist')
  else:
    hc_prediction,hc_true_label,hc_f1=testing(models,dataloaders['Hillary'][1],'Hillary')
    abortion_prediction,abortion_true_label,abortion_f1=testing(models,dataloaders['Abortion'][1],'Abortion')
    atheism_prediction,atheism_true_label,atheism_f1=testing(models,dataloaders['Atheism'][1],'Atheism')
    climate_prediction,climate_true_label,climate_f1=testing(models,dataloaders['Climate'][1],'Climate')
    feminist_prediction,feminist_true_label,feminist_f1=testing(models,dataloaders['Feminist'][1],'Feminist')
  results = {'Hillary':[hc_prediction,hc_true_label,hc_f1],'Abortion':[abortion_prediction,abortion_true_label,abortion_f1],
           'Atheism':[atheism_prediction,atheism_true_label,atheism_f1],'Climate':[climate_prediction,climate_true_label,climate_f1],
           'Feminist':[feminist_prediction,feminist_true_label,feminist_f1]}
  print(hc_f1)
  print(abortion_f1)
  print(atheism_f1)
  print(climate_f1)
  print(feminist_f1)
  all_label = pd.DataFrame(np.concatenate((hc_true_label,abortion_true_label,atheism_true_label,climate_true_label,feminist_true_label),axis=0),columns=['label'])
  all_prediction = pd.DataFrame(np.concatenate((hc_prediction,abortion_prediction,atheism_prediction,climate_prediction,feminist_prediction),axis=0),columns=['prediction'])
  f1 = f1_score(all_label, all_prediction, average=None).reshape(1,-1)
  f1_df = pd.DataFrame(f1,columns=['Favor','Against','None'],index=['all'])
  score = (f1_df['Favor']+f1_df['Against'])/2
  f1_df['Average']=score
  print(f1_df)
  results['Overall']=[all_prediction,all_label,f1_df]
  return score, results

In [None]:
def taskBtest(model,dataloaders):
  trump_prediction,trump_true_label,trump_f1=testing(model,dataloaders['Trump'],'Trump')
  Trump_test['opinion_key']=Trump_test['opinion'].apply(lambda x: x[0])
  print(trump_f1)
  Trump_test['prediction']=trump_prediction
  opinion_target=Trump_test[Trump_test['opinion_key']=='1'].reset_index(drop=True)
  not_opinion_target=Trump_test[Trump_test['opinion_key']=='2'].reset_index(drop=True)
  f1_opinion=f1_score(opinion_target['label'],opinion_target['prediction'],average=None).reshape(1,-1)
  f1_opinion_df = pd.DataFrame(f1_opinion,columns=['Favor','Against','None'],index=['Opinion'])
  f1_opinion_df['Average']=(f1_opinion_df['Favor']+f1_opinion_df['Against'])/2
  f1_not_opinion=f1_score(not_opinion_target['label'],not_opinion_target['prediction'],average=None).reshape(1,-1)
  f1_not_opinion_df = pd.DataFrame(f1_not_opinion,columns=['Favor','Against','None'],index=['not_opinion'])
  f1_not_opinion_df['Average']=(f1_not_opinion_df['Favor']+f1_not_opinion_df['Against'])/2
  print(f1_opinion_df)
  print(f1_not_opinion_df)
  return trump_prediction,trump_true_label,trump_f1

# Frist approach: integral model

In [None]:
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/843k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.91M [00:00<?, ?B/s]

In [None]:
dataloaders_bs8 = generate_dataloaders(batch_size=8,tokenizer=tokenizer)

Map:   0%|          | 0/689 [00:00<?, ? examples/s]

Map:   0%|          | 0/295 [00:00<?, ? examples/s]

Map:   0%|          | 0/653 [00:00<?, ? examples/s]

Map:   0%|          | 0/280 [00:00<?, ? examples/s]

Map:   0%|          | 0/513 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Map:   0%|          | 0/395 [00:00<?, ? examples/s]

Map:   0%|          | 0/169 [00:00<?, ? examples/s]

Map:   0%|          | 0/664 [00:00<?, ? examples/s]

Map:   0%|          | 0/285 [00:00<?, ? examples/s]

Map:   0%|          | 0/707 [00:00<?, ? examples/s]

Map:   0%|          | 0/2914 [00:00<?, ? examples/s]

Map:   0%|          | 0/1249 [00:00<?, ? examples/s]

In [None]:
models,score,results=train_bertweet(dataloaders_bs8,10,3e-5,divide=False)

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([ 753, 1395,  766])


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

  0%|          | 0/3650 [00:00<?, ?it/s]

            Favor  Against      None  Average
Hillary  0.604651  0.82153  0.781457  0.71309
             Favor   Against  None   Average
Abortion  0.521739  0.753623  0.62  0.637681
            Favor   Against      None   Average
Atheism  0.492754  0.825503  0.547945  0.659128
           Favor   Against      None   Average
Climate  0.87747  0.133333  0.628571  0.505402
             Favor   Against      None   Average
Feminist  0.536232  0.711246  0.582524  0.623739
            Favor   Against      None   Average
Overall  0.668684  0.770149  0.651911  0.719417
Overall    0.719417
dtype: float64


In [None]:
models1,score1,results1=train_bertweet(dataloaders_bs8,15,3e-5,divide=False)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/5475 [00:00<?, ?it/s]

            Favor   Against      None   Average
Hillary  0.606742  0.789189  0.641221  0.697965
             Favor   Against     None   Average
Abortion  0.521739  0.740525  0.54902  0.631132
            Favor   Against      None   Average
Atheism  0.454545  0.848684  0.542857  0.651615
            Favor  Against      None   Average
Climate  0.874494      0.2  0.619718  0.537247
             Favor   Against      None  Average
Feminist  0.552632  0.734568  0.553191   0.6436
            Favor   Against     None   Average
Overall  0.663677  0.768553  0.58547  0.716115
Overall    0.716115
dtype: float64


In [None]:
models2,score2,results2=train_bertweet(dataloaders_bs8,10,3e-5,divide=False)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/3650 [00:00<?, ?it/s]

           Favor   Against      None   Average
Hillary  0.73913  0.818444  0.728477  0.778787
             Favor   Against      None  Average
Abortion  0.556522  0.758017  0.588235  0.65727
            Favor   Against      None   Average
Atheism  0.484848  0.828283  0.493506  0.656566
            Favor  Against      None   Average
Climate  0.870968     0.25  0.675676  0.560484
             Favor   Against      None   Average
Feminist  0.546667  0.695925  0.554455  0.621296
            Favor   Against      None   Average
Overall  0.688525  0.768533  0.621782  0.728529
Overall    0.728529
dtype: float64


In [None]:
#torch.save(models2,'integral_best')

# divided model

In [None]:
models3,score3,results3=train_bertweet(dataloaders_bs8,10,3e-5,divide=True)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You sho

tensor([118, 393, 178])


  0%|          | 0/870 [00:00<?, ?it/s]

tensor([121, 355, 177])


  0%|          | 0/820 [00:00<?, ?it/s]

tensor([ 92, 304, 117])


  0%|          | 0/650 [00:00<?, ?it/s]

tensor([212,  15, 168])


  0%|          | 0/500 [00:00<?, ?it/s]

tensor([210, 328, 126])


  0%|          | 0/830 [00:00<?, ?it/s]

           Favor  Against      None  Average
Hillary  0.60241  0.80315  0.619048  0.70278
             Favor   Against      None   Average
Abortion  0.556522  0.732733  0.571429  0.644627
            Favor   Against      None   Average
Atheism  0.578947  0.847458  0.608696  0.713202
         Favor   Against     None   Average
Climate    0.9  0.142857  0.71875  0.521429
             Favor   Against      None   Average
Feminist  0.539877  0.736196  0.592593  0.638037
            Favor   Against      None   Average
Overall  0.688666  0.772424  0.615044  0.730545
Overall    0.730545
dtype: float64


In [None]:
models4,score4,results4=train_bertweet(dataloaders_bs8,15,3e-5,divide=True)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You sho

tensor([118, 393, 178])


  0%|          | 0/1305 [00:00<?, ?it/s]

tensor([121, 355, 177])


  0%|          | 0/1230 [00:00<?, ?it/s]

tensor([ 92, 304, 117])


  0%|          | 0/975 [00:00<?, ?it/s]

tensor([212,  15, 168])


  0%|          | 0/750 [00:00<?, ?it/s]

tensor([210, 328, 126])


  0%|          | 0/1245 [00:00<?, ?it/s]

            Favor   Against   None  Average
Hillary  0.634146  0.793734  0.576  0.71394
             Favor   Against      None  Average
Abortion  0.525547  0.702532  0.560748  0.61404
            Favor   Against      None  Average
Atheism  0.583333  0.860927  0.666667  0.72213
            Favor   Against      None   Average
Climate  0.897638  0.153846  0.760563  0.525742
             Favor   Against      None   Average
Feminist  0.538012  0.675325  0.637363  0.606668
            Favor   Against      None   Average
Overall  0.678771  0.753404  0.626087  0.716087
Overall    0.716087
dtype: float64


## best divided model

In [None]:
hc_bertweet = torch.load('Bertweetbs8lr3e-5ne10/Hillary')
abortion_bertweet = torch.load('Bertweetbs8lr3e-5ne10/Abortion')
atheism_bertweet = torch.load('Bertweetbs8lr3e-5ne10/Atheism')
climate_bertweet = torch.load('Bertweetbs8lr3e-5ne10/Climate')
feminist_bertweet = torch.load('Bertweetbs8lr3e-5ne10/Feminist')
models_divided = {'Hillary':hc_bertweet,'Abortion':abortion_bertweet,
      'Atheism':atheism_bertweet,'Climate':climate_bertweet,
      'Feminist':feminist_bertweet}

In [None]:
score_divided,result_divided = final_result(models_divided,dataloaders_bs8)

            Favor   Against      None   Average
Hillary  0.697674  0.816438  0.661871  0.757056
             Favor   Against      None   Average
Abortion  0.607143  0.746988  0.586207  0.677065
            Favor   Against      None   Average
Atheism  0.632911  0.852459  0.607143  0.742685
            Favor   Against      None   Average
Climate  0.907692  0.166667  0.757576  0.537179
           Favor   Against      None   Average
Feminist  0.4875  0.706949  0.506329  0.597224
        Favor   Against      None   Average
all  0.705882  0.774721  0.622807  0.740302


In [None]:
result_divided.keys()

dict_keys(['Hillary', 'Abortion', 'Atheism', 'Climate', 'Feminist', 'Overall'])

In [None]:
from sklearn.metrics import classification_report
print(classification_report(result_divided['Hillary'][1],result_divided['Hillary'][0]))

              precision    recall  f1-score   support

           0       0.73      0.67      0.70        45
           1       0.77      0.87      0.82       172
           2       0.75      0.59      0.66        78

    accuracy                           0.76       295
   macro avg       0.75      0.71      0.73       295
weighted avg       0.76      0.76      0.76       295



In [None]:
score_divided,result_divided = final_result(models_divided,dataloaders_bs8)

            Favor   Against      None   Average
Hillary  0.697674  0.816438  0.661871  0.757056
             Favor   Against      None   Average
Abortion  0.607143  0.746988  0.586207  0.677065
            Favor   Against      None   Average
Atheism  0.632911  0.852459  0.607143  0.742685
            Favor   Against      None   Average
Climate  0.907692  0.166667  0.757576  0.537179
           Favor   Against      None   Average
Feminist  0.4875  0.706949  0.506329  0.597224
        Favor   Against      None   Average
all  0.705882  0.774721  0.622807  0.740302


In [None]:
score_divided

all    0.740302
dtype: float64

In [None]:
f1_opinion_df,f1_not_opinion_df = get_opinion_result(result_divided)

            Favor   Against  None   Average
Opinion  0.770492  0.834504   0.0  0.802498
                Favor   Against      None   Average
not_opinion  0.268293  0.615854  0.672316  0.442073


# best integral model

In [None]:
all_bertweet = torch.load('integral_best')

In [None]:
score_integral,result_integral = final_result(all_bertweet,dataloaders_bs8,False)

           Favor   Against      None   Average
Hillary  0.73913  0.818444  0.728477  0.778787
             Favor   Against      None  Average
Abortion  0.556522  0.758017  0.588235  0.65727
            Favor   Against      None   Average
Atheism  0.484848  0.828283  0.493506  0.656566
            Favor  Against      None   Average
Climate  0.870968     0.25  0.675676  0.560484
             Favor   Against      None   Average
Feminist  0.546667  0.695925  0.554455  0.621296
        Favor   Against      None   Average
all  0.688525  0.768533  0.621782  0.728529


In [None]:
f1_opinion_df1,f1_not_opinion_df1 = get_opinion_result(result_integral)

            Favor   Against  None  Average
Opinion  0.739496  0.820305   0.0   0.7799
               Favor   Against   None   Average
not_opinion  0.30137  0.626582  0.704  0.463976


# TaskB

In [None]:
integral_taskB=taskBtest(all_bertweet,dataloaders_bs8)

          Favor  Against      None   Average
Trump  0.109091    0.372  0.485981  0.240545
            Favor   Against      None   Average
Opinion  0.103226  0.428571  0.026432  0.265899
             Favor  Against      None   Average
not_opinion    0.2  0.30837  0.665263  0.254185


In [None]:
f1_opinion_df,f1_not_opinion_df = get_opinion_result(result_divided)

            Favor   Against  None   Average
Opinion  0.770492  0.834504   0.0  0.802498
                Favor   Against      None   Average
not_opinion  0.268293  0.615854  0.672316  0.442073


In [None]:
for key in models_divided:
  print(key)
  taskBtest(models_divided[key],dataloaders_bs8)

Hillary
         Favor   Against      None   Average
Trump  0.34555  0.553797  0.544839  0.449674
            Favor   Against      None   Average
Opinion  0.364641  0.635569  0.048387  0.500105
             Favor   Against      None   Average
not_opinion    0.0  0.459364  0.649165  0.229682
Abortion
       Favor   Against      None   Average
Trump    0.0  0.096591  0.490649  0.048295
         Favor   Against      None   Average
Opinion    0.0  0.065217  0.018927  0.032609
             Favor   Against      None   Average
not_opinion    0.0  0.133333  0.724584  0.066667
Atheism
       Favor   Against      None   Average
Trump    0.0  0.090652  0.491545  0.045326
         Favor   Against      None   Average
Opinion    0.0  0.081218  0.019737  0.040609
             Favor   Against      None   Average
not_opinion    0.0  0.104575  0.722846  0.052288
Climate
       Favor  Against      None  Average
Trump    0.0      0.0  0.532359      0.0
         Favor  Against      None  Average
Opinion   