# 6.1 Rule-Based Approaches

**Installed the required Python prerequisite packages and libraries.**


In [None]:
!pip install textblob
!pip install afinn
!pip install tensorflow
!pip install keras
!pip install --upgrade transformers huggingface_hub
!pip install torch

6.1.2 Implementing Rule-Based Sentiment Analysis

In [None]:
from textblob import TextBlob

# Sample text
text = "I love this product! It works wonderfully and the quality is excellent."

# Perform sentiment analysis
blob = TextBlob(text)
sentiment = blob.sentiment

print("Sentiment Analysis:")
print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")

6.1.3 Creating Custom Rule-Based Sentiment Analyzers

In [None]:
from afinn import Afinn

# Initialize the Afinn sentiment analyzer
afinn = Afinn()

# Sample text
text = "I hate the traffic in this city. It makes commuting a nightmare."

# Perform sentiment analysis
sentiment_score = afinn.score(text)

# Determine sentiment based on score
if sentiment_score > 0:
    sentiment = "Positive"
elif sentiment_score < 0:
    sentiment = "Negative"
else:
    sentiment = "Neutral"

print("Sentiment Analysis:")
print(f"Text: {text}")
print(f"Sentiment Score: {sentiment_score}")
print(f"Sentiment: {sentiment}")

# 6.2 Machine Learning Approaches

6.2.2 Feature Extraction

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Sample text corpus
corpus = [
    "I love this product! It's amazing.",
    "This is the worst service I have ever experienced.",
    "I am very happy with my purchase.",
    "I am disappointed with the quality of this item."
]

# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Transform the text data into TF-IDF features
X = vectorizer.fit_transform(corpus)

print("TF-IDF Feature Matrix:")
print(X.toarray())

6.2.3 Model Training

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Sample text corpus and labels
corpus = [
    "I love this product! It's amazing.",
    "This is the worst service I have ever experienced.",
    "I am very happy with my purchase.",
    "I am disappointed with the quality of this item."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict the sentiment of the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

6.2.4 Evaluating Machine Learning Models

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Predict the sentiment of the test set
y_pred = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# 6.3 Deep Learning Approaches

6.3.2 Convolutional Neural Networks (CNNs)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, GlobalMaxPooling1D, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product! It's amazing.",
    "This is the worst service I have ever experienced.",
    "I am very happy with my purchase.",
    "I am disappointed with the quality of this item."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Define the CNN model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
# The error is likely here, due to the data type of X_train
# Convert y_train and y_test to numpy arrays
model.fit(X_train, np.array(y_train), epochs=5, verbose=1, validation_data=(X_test, np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")

# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")

6.3.3 Recurrent Neural Networks (RNNs) and Long Short-Term Memory Networks (LSTMs)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product! It's amazing.",
    "This is the worst service I have ever experienced.",
    "I am very happy with my purchase.",
    "I am disappointed with the quality of this item."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
# Convert y_train and y_test to numpy arrays to avoid the ValueError
model.fit(X_train, np.array(y_train), epochs=5, verbose=1, validation_data=(X_test, np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")

# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")

6.3.4 Transformer-Based Models

In [None]:
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product! It's amazing.",
    "This is the worst service I have ever experienced.",
    "I am very happy with my purchase.",
    "I am disappointed with the quality of this item."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode the text data
max_length = 10
X = tokenizer(corpus, padding='max_length', truncation=True, max_length=max_length, return_tensors='np')
input_ids = X['input_ids']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(input_ids, labels, test_size=0.25, random_state=42)

# Build a simple Keras model
vocab_size = tokenizer.vocab_size

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=32, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, np.array(y_train), epochs=10, batch_size=2, validation_data=(X_test, np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")

# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding='max_length', truncation=True, max_length=max_length, return_tensors='np')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction) == 1 else "Negative")
