# LABELLING - ACTIVE LEARNING

In [1]:
%pip install transformers

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


## 1. Load the data

In [3]:
selected_data = pd.read_csv('../Data/selected_data.csv')

## 2. Helper Functions

In [None]:
'''
Function that labels the data with the provided model
and saves the labeled data to a csv file. Additionally,
it saves 100 rows with the lowest RoBERTa confidence scores
to a new CSV file.

Params:
data - dataframe with the text column to be labeled
model - the model to be used for sentiment analysis
round - active learning round
'''
def label_data(data, model, round):

  # Extract the text column of selected_data as a list
  reviews = data["text"].tolist()

  # Initialize the sentiment analysis pipeline
  sentiment_pipeline = pipeline("text-classification", 
                                model=model,
                                device=0) 
    
  # Calculate the sentiment of the each of the reviews
  kwargs = {'padding':True,'truncation':True,'max_length':512}
  results = sentiment_pipeline(reviews, **kwargs) 

  # Add the sentiment and score to the selected_data DataFrame
  data["roberta_label"] = [res["label"] for res in results]
  data["roberta_score"] = [res["score"] for res in results]

  # Save the labeled data to a csv file
  data.to_csv(f'../Data/Labelling/roberta_labeled_data_round{round}.csv', index=False)

  # Save 100 rows with the lowest RoBERTa confidence scores to a new CSV file
  data_low_confidence = data.nsmallest(100, 'roberta_score')
  data_low_confidence.to_csv(f'../Data/Labelling/roberta_low_confidence_round{round}.csv', index=False)

  return data

In [None]:
def finetune(train_data, model):
  pass