Import the required libraries

In [48]:
import pandas as pd
import numpy as np
import tensorflow as tf
from scipy import stats
from transformers import BertTokenizer, TFBertForSequenceClassification
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

Read and pre-process the training data

In [5]:
data = pd.read_csv('C:/Users/puvia/Darshan K M - PA2312052010003 - NLP - CT3/train2.csv',encoding="ISO-8859-1")

In [6]:
data.head()

Unnamed: 0,textID,text,selected_text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (Km²),Density (P/Km²)
0,cb774db0d1,"I`d have responded, if I were going","I`d have responded, if I were going",neutral,morning,0-20,Afghanistan,38928346,652860.0,60
1,549e992a42,Sooo SAD I will miss you here in San Diego!!!,Sooo SAD,negative,noon,21-30,Albania,2877797,27400.0,105
2,088c60f138,my boss is bullying me...,bullying me,negative,night,31-45,Algeria,43851044,2381740.0,18
3,9642c003ef,what interview! leave me alone,leave me alone,negative,morning,46-60,Andorra,77265,470.0,164
4,358bd9e861,"Sons of ****, why couldn`t they put them on t...","Sons of ****,",negative,noon,60-70,Angola,32866272,1246700.0,26


In [7]:
data['text'] = data['text'].str.replace(r'http\S+|www.\S+','', regex=True)
data['text'] = data['text'].str.replace(r'@\w+','', regex=True)

In [8]:
label_mapping = {'positive': 2, 'neutral' : 0, 'negative': 1}
data['sentiment'] = data['sentiment'].map(label_mapping)

In [9]:
data.head()

Unnamed: 0,textID,text,selected_text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (Km²),Density (P/Km²)
0,cb774db0d1,"I`d have responded, if I were going","I`d have responded, if I were going",0,morning,0-20,Afghanistan,38928346,652860.0,60
1,549e992a42,Sooo SAD I will miss you here in San Diego!!!,Sooo SAD,1,noon,21-30,Albania,2877797,27400.0,105
2,088c60f138,my boss is bullying me...,bullying me,1,night,31-45,Algeria,43851044,2381740.0,18
3,9642c003ef,what interview! leave me alone,leave me alone,1,morning,46-60,Andorra,77265,470.0,164
4,358bd9e861,"Sons of ****, why couldn`t they put them on t...","Sons of ****,",1,noon,60-70,Angola,32866272,1246700.0,26


Split the data, initialize the BERT-Tokenizer and tokenize the text data.

In [10]:
X_train, X_test, y_train, y_test = train_test_split(data['selected_text'], data['sentiment'], test_size=0.2, random_state=42)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(list(map(str, X_train)), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(list(map(str, X_test)), truncation=True, padding=True, max_length=128)

In [11]:
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(16)
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(16)

Initialize the model, loss calculation and optimizer and compile the model

In [12]:
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=2e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])




All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.





Mention the number of epochs, validation data and fit the model to the training dataset

In [13]:
history = model.fit(train_dataset, epochs=3, validation_data=test_dataset)

Epoch 1/3


Epoch 2/3
Epoch 3/3


In [14]:
y_pred = model.predict(test_dataset).logits
y_pred = np.argmax(y_pred, axis=1)



In [15]:
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_mapping.keys()))
print("Accuracy:", accuracy_score(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

    positive       0.86      0.83      0.84      2549
     neutral       0.88      0.86      0.87      1765
    negative       0.84      0.91      0.87      1889

    accuracy                           0.86      6203
   macro avg       0.86      0.86      0.86      6203
weighted avg       0.86      0.86      0.86      6203

Accuracy: 0.8599064968563598


**Inference:**

The F1-scores for all classes are close (ranging from 0.84 to 0.87), indicating that the model performs consistently well across positive, neutral, and negative sentiments.

The model excels in identifying negative sentiments (high recall of 0.91).
Strong overall performance in precision, recall, and F1-scores across all classes.
Improvement Areas:

The positive class has slightly lower recall (0.83), meaning some actual positive samples are being misclassified.
Further optimization may enhance precision and recall for the positive and neutral classes.


**----------------------------------------------------------------------------------SENTIMENT ANALYSIS----------------------------------------------------------------------------------**

Funciton to Pre-process the input data and map the predictions.

In [42]:
def predict_sentiment(text):
          
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)
    
    # Pass inputs through the model and get only logits
    outputs = model(inputs).logits
    
    # Get the prediction by finding the index of the max logit
    prediction = tf.argmax(outputs, axis=1).numpy()
    
    # Map the prediction to the sentiment label
    sentiments = [k for pred in prediction for k, v in label_mapping.items() if v == pred]
    return sentiments if len(sentiments) > 1 else sentiments[0]
    

Load the data scraped from Amazon

In [49]:
# Load the reviews data from a CSV file
df = pd.read_csv(r"C:\Users\puvia\Darshan K M - PA2312052010003 - NLP - CT3\amazon_reviews.csv")

Run the Model

In [50]:
# Convert all reviews to strings before processing
reviews = df['Review'].astype(str).tolist()  # Convert to list of strings

# Example: Get predictions for all reviews
predicted_sentiments = [predict_sentiment(review) for review in reviews]

# Add predictions back to the DataFrame
df['Sentiment'] = predicted_sentiments

df.to_csv('updated_reviews.csv', index=False)

In [51]:
# Perform A/B testing based on sentiment
df['Group'] = np.where(df.index % 2 == 0, 'A', 'B')  # Randomly split users into A and B

# Function to calculate engagement metrics
def calculate_engagement(df):
    engagement_metrics = {
        'Average_Rating': df['Rating'].mean(),
        'Review_Count': len(df),
        'Positive_Reviews': (df['Sentiment'] == 'positive').sum(),
        'Negative_Reviews': (df['Sentiment'] == 'negative').sum(),
        'Neutral_Reviews': (df['Sentiment'] == 'neutral').sum()
    }
    return engagement_metrics

# Calculate engagement metrics for both groups
group_a_metrics = calculate_engagement(df[df['Group'] == 'A'])
group_b_metrics = calculate_engagement(df[df['Group'] == 'B'])

# Print engagement metrics
print("Group A Engagement Metrics:", group_a_metrics)
print("Group B Engagement Metrics:", group_b_metrics)

# Perform statistical tests (e.g., T-test) to compare engagement metrics


# Compare average ratings between Group A and Group B
t_stat, p_value = stats.ttest_ind(df[df['Group'] == 'A']['Rating'], df[df['Group'] == 'B']['Rating'])

print(f"T-statistic: {t_stat}, P-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference between the engagement of Group A and Group B.")
else:
    print("There is no significant difference between the engagement of Group A and Group B.")


Group A Engagement Metrics: {'Average_Rating': 3.98, 'Review_Count': 50, 'Positive_Reviews': 31, 'Negative_Reviews': 2, 'Neutral_Reviews': 17}
Group B Engagement Metrics: {'Average_Rating': 4.0, 'Review_Count': 50, 'Positive_Reviews': 24, 'Negative_Reviews': 3, 'Neutral_Reviews': 23}
T-statistic: -0.37470140930053286, P-value: 0.7086921656430278
There is no significant difference between the engagement of Group A and Group B.
