----------------------------
#### Stanford Sentiment Treebank 2 (SST-2) dataset
---------------------------

In [1]:
from datasets import load_dataset

In [2]:
# Load the SST-2 dataset
dataset = load_dataset("glue", "sst2", cache_dir= r'D:\AI-DATASETS\07-Hugging-Face-Data')

In [3]:
# Access splits
train_data      = dataset['train']
validation_data = dataset['validation']
test_data       = dataset['test']

In [5]:
# Get the number of samples in each split
num_train_samples      = len(dataset['train'])
num_validation_samples = len(dataset['validation'])
num_test_samples       = len(dataset['test'])

# Display the counts
print(f"Number of training samples: {num_train_samples}")
print(f"Number of validation samples: {num_validation_samples}")
print(f"Number of test samples: {num_test_samples}")

Number of training samples: 67349
Number of validation samples: 872
Number of test samples: 1821


In [6]:
dataset['train']

Dataset({
    features: ['sentence', 'label', 'idx'],
    num_rows: 67349
})

- Label **1**: Positive Sentiment
    - This indicates that the sentence expresses a positive or favorable sentiment about the subject.

- Label **0**: Negative Sentiment
    - This indicates that the sentence expresses a negative or unfavorable sentiment about the subject.

In [7]:
import pandas as pd

In [11]:
# Set display options for Pandas
pd.set_option('display.max_colwidth', None)  # No truncation of column content
pd.set_option('display.width', None)  # No truncation of DataFrame display width

In [12]:
# Convert each split into Pandas DataFrame
train_df = pd.DataFrame(dataset['train'])
valid_df = pd.DataFrame(dataset['validation'])
test_df  = pd.DataFrame(dataset['test'])

In [13]:
train_df.sample(5)

Unnamed: 0,sentence,label,idx
3327,should come with the warning `` for serious film buffs only ! '',1,3327
53927,critics be damned .,1,53927
46787,of charm,1,46787
24122,a reworking of die hard and cliffhanger but it 's nowhere near as exciting as either .,0,24122
48619,", every shot enhances the excellent performances",1,48619


#### Hands-on Activity
- Goal: Classify sentences using a pre-trained transformer model fine-tuned on SST-2.

In [14]:
from transformers import pipeline

**textattack/bert-base-uncased-SST-2**

The model being used is `textattack/bert-base-uncased-SST-2`, which is a BERT-based model fine-tuned on the SST-2 dataset for sentiment analysis. The model is designed to output either a positive or negative sentiment classification.


In [15]:
# Sentiment analysis pipeline
classifier = pipeline("sentiment-analysis", model="textattack/bert-base-uncased-SST-2")

config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [16]:
# Test sentences
sentences = ["The movie was fantastic!", "I hated every moment of it."]
results   = classifier(sentences)

In [17]:
print(results)

[{'label': 'LABEL_1', 'score': 0.9996718168258667}, {'label': 'LABEL_0', 'score': 0.9973737001419067}]


In [18]:
valid_df.sample(5)

Unnamed: 0,sentence,label,idx
222,"a psychological thriller with a genuinely spooky premise and an above-average cast , actor bill paxton 's directing debut is a creepy slice of gothic rural americana .",1,222
99,why make a documentary about these marginal historical figures ?,0,99
846,"an absurdist comedy about alienation , separation and loss .",0,846
67,"a fast , funny , highly enjoyable movie .",1,67
797,"it 's not original , and , robbed of the element of surprise , it does n't have any huge laughs in its story of irresponsible cops who love to play pranks .",0,797


**Use valid_df** - to test the model

In [19]:
texts       = valid_df['sentence'].tolist()  # Extract text from 'sentence' column
true_labels = valid_df['label'].tolist()  # Extract true labels

In [20]:
# Predict sentiment for the validation dataset
predictions = classifier(texts)

In [21]:
predictions[:10]

[{'label': 'LABEL_1', 'score': 0.9997661709785461},
 {'label': 'LABEL_0', 'score': 0.9856706261634827},
 {'label': 'LABEL_1', 'score': 0.9995738863945007},
 {'label': 'LABEL_1', 'score': 0.996027946472168},
 {'label': 'LABEL_0', 'score': 0.9979519248008728},
 {'label': 'LABEL_1', 'score': 0.9996365308761597},
 {'label': 'LABEL_0', 'score': 0.9960685968399048},
 {'label': 'LABEL_0', 'score': 0.9940721392631531},
 {'label': 'LABEL_1', 'score': 0.9996395111083984},
 {'label': 'LABEL_0', 'score': 0.9970386028289795}]

In [22]:
# Convert predictions to numerical labels (0 for negative, 1 for positive)
predicted_labels = [1 if pred['label'] == 'LABEL_1' else 0 for pred in predictions]

In [23]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [24]:
# Evaluate the model's performance
accuracy  = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall    = recall_score(true_labels, predicted_labels)
f1        = f1_score(true_labels, predicted_labels)

# Display the evaluation scores
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1-Score: {f1 * 100:.2f}%")
confusion_matrix(true_labels, predicted_labels)

Accuracy: 92.43%
Precision: 91.81%
Recall: 93.47%
F1-Score: 92.63%


array([[391,  37],
       [ 29, 415]], dtype=int64)

**some other BERT models**

    "distilbert-base-uncased-finetuned-sst-2-english",
    "roberta-large-mnli",
    "albert-base-v2"

In [25]:
model_name = 'distilbert-base-uncased-finetuned-sst-2-english'         # works
#model_name = 'textattack/bert-large-uncased-SST-2'
#model_name = 'textattack/albert-base-v2-SST-2'     #- private repo
#model_name = 'roberta-large-SST-2'    #- private repo
#model_name =  'google/electra-base-discriminator'   #- poor results
#model_name =  'xlm-roberta-base-SST-2' #- a private repo

In [26]:
token = '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'

In [27]:
# Load the model pipeline
classifier = pipeline("sentiment-analysis", 
                      model     = model_name,
                      token     = token
                      #cache_dir = r'D:\AI-DATASETS\07-Hugging-Face-Data'
                     )

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [28]:
# Make predictions on the validation data
predictions = classifier(valid_df['sentence'].to_list())

In [29]:
predictions[:5]

[{'label': 'POSITIVE', 'score': 0.9998838901519775},
 {'label': 'NEGATIVE', 'score': 0.998969554901123},
 {'label': 'POSITIVE', 'score': 0.9996399879455566},
 {'label': 'POSITIVE', 'score': 0.9996993541717529},
 {'label': 'NEGATIVE', 'score': 0.9996892213821411}]

In [30]:
# Convert predictions to binary labels (0 for negative, 1 for positive)
predicted_labels = [1 if pred['label'] == 'LABEL_1' or pred['label'] == 'POSITIVE' else 0 for pred in predictions]
true_labels = valid_df['label'].tolist()

In [31]:
# Evaluate the model's performance
accuracy  = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall    = recall_score(true_labels, predicted_labels)
f1        = f1_score(true_labels, predicted_labels)

# Display the evaluation scores
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1-Score: {f1 * 100:.2f}%")
confusion_matrix(true_labels, predicted_labels)

Accuracy: 91.06%
Precision: 89.78%
Recall: 93.02%
F1-Score: 91.37%


array([[381,  47],
       [ 31, 413]], dtype=int64)

#### Try a Simple Logistic regression

In [32]:
# Convert splits to Pandas DataFrames
train_data = dataset['train'].to_pandas()
valid_data = dataset['validation'].to_pandas()
test_data  = dataset['test'].to_pandas()

# Rename columns for clarity
train_data.columns = ['sentence', 'label', 'idx']
valid_data.columns = ['sentence', 'label', 'idx']
test_data.columns  = ['sentence', 'label', 'idx']

In [33]:
train_data.sample(5)

Unnamed: 0,sentence,label,idx
67089,"by the time the plot grinds itself out in increasingly incoherent fashion , you might be wishing for a watch that makes time go faster rather than the other way around .",0,67089
45514,astonishingly skillful and moving ...,1,45514
45221,"it 's a long way from orwell 's dark , intelligent warning cry ( 1984 ) to the empty stud knockabout of equilibrium , and what once was conviction is now affectation .",0,45221
14061,able to appreciate the wonderful cinematography and naturalistic acting,1,14061
24247,on this screenplay,1,24247


In [34]:
train_data.label.value_counts()

label
1    37569
0    29780
Name: count, dtype: int64

In [35]:
valid_data.sample(5)

Unnamed: 0,sentence,label,idx
113,this movie is maddening .,0,113
837,will amuse and provoke adventurous adults in specialty venues .,1,837
473,"thanks to haynes ' absolute control of the film 's mood , and buoyed by three terrific performances , far from heaven actually pulls off this stylistic juggling act .",1,473
326,a beguiling splash of pastel colors and prankish comedy from disney .,1,326
336,"having had the good sense to cast actors who are , generally speaking , adored by the movie-going public , khouri then gets terrific performances from them all .",1,336


In [36]:
valid_data.label.value_counts()

label
1    444
0    428
Name: count, dtype: int64

In [37]:
test_data.sample(5)

Unnamed: 0,sentence,label,idx
1308,"an incendiary , deeply thought-provoking look at one of the most peculiar ( and peculiarly venomous ) bigotries in our increasingly frightening theocracy",-1,1308
574,"if this is an example of the type of project that robert redford 's lab is willing to lend its imprimatur to , then perhaps it 's time to rethink independent films .",-1,574
1786,"technically , the film is about as interesting as an insurance commercial .",-1,1786
39,"verbinski substitutes atmosphere for action , tedium for thrills .",-1,39
382,"the film makes a tragic error by going on for too long , trying to mirror every subsequent event in chinese history : war , revolution , communism , etc. .",-1,382


In [38]:
test_data.label.value_counts()

label
-1    1821
Name: count, dtype: int64

In [39]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [40]:
# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(
                             #max_features = 5000, 
                             stop_words   = 'english'
                            )

In [41]:
# Transform the sentences
X_train = vectorizer.fit_transform(train_data['sentence'])
X_valid = vectorizer.transform(valid_data['sentence'])
X_test  = vectorizer.transform(test_data['sentence'])

In [42]:
# Target labels
y_train = train_data['label']
y_valid = valid_data['label']
y_test  = test_data['label']

In [43]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [44]:
# Initialize and train the model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

In [45]:
# Predict on validation and test sets
y_valid_pred = model.predict(X_valid)
y_test_pred  = model.predict(X_test)

In [46]:
# Calculate accuracy
valid_accuracy = accuracy_score(y_valid, y_valid_pred)
test_accuracy  = accuracy_score(y_test, y_test_pred)

print(f"Validation Accuracy: {valid_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Validation Accuracy: 80.39%
Test Accuracy: 0.00%
