In [55]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import classification_report

In [56]:
# Example labeled data
sentences = ["Apple is a tech company.", "I live in New York.", "The cat chased the mouse."]
entity_labels = ["Company", "Location", "Animal"]

In [57]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sentences, entity_labels, test_size=0.2, random_state=42)

In [58]:
# Naive Bayes
vectorizer = CountVectorizer()  # or TfidfVectorizer()
X_train_nb = vectorizer.fit_transform(X_train)
X_test_nb = vectorizer.transform(X_test)

nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_nb, y_train)
y_pred_nb = nb_classifier.predict(X_test_nb)

In [59]:
print("Naive Bayes Classification Report:")
print(classification_report(y_test, y_pred_nb))

Naive Bayes Classification Report:
              precision    recall  f1-score   support

      Animal       0.00      0.00      0.00       0.0
     Company       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
# Support Vector Machines
vectorizer = TfidfVectorizer()
X_train_svm = vectorizer.fit_transform(X_train)
X_test_svm = vectorizer.transform(X_test)

svm_classifier = SVC()
svm_classifier.fit(X_train_svm, y_train)
y_pred_svm = svm_classifier.predict(X_test_svm)

In [61]:

print("SVM Classification Report:")
print(classification_report(y_test, y_pred_svm))

SVM Classification Report:
              precision    recall  f1-score   support

      Animal       0.00      0.00      0.00       0.0
     Company       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [62]:
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        embedded = self.embedding(x)
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        out, _ = self.rnn(embedded, h0)
        out = self.fc(out[:, -1, :])
        return out


In [63]:
# Convert words to integer IDs
word_to_id = {word: idx for idx, word in enumerate(set(' '.join(sentences).split()))}
X_train_rnn = torch.tensor([[word_to_id[word] for word in sentence.split()] for sentence in X_train])
X_test_rnn = torch.tensor([[word_to_id[word] for word in sentence.split()] for sentence in X_test])

In [64]:
y_train_rnn = torch.tensor([entity_labels.index(label) for label in y_train])
y_test_rnn = torch.tensor([entity_labels.index(label) for label in y_test])

In [65]:
input_size = len(word_to_id)
hidden_size = 16
num_classes = len(entity_labels)

rnn_classifier = RNNClassifier(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn_classifier.parameters(), lr=0.001)

In [66]:
num_epochs = 30
for epoch in range(num_epochs):
    rnn_classifier.train()
    optimizer.zero_grad()
    outputs = rnn_classifier(X_train_rnn)
    loss = criterion(outputs, y_train_rnn)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    rnn_classifier.eval()
    outputs = rnn_classifier(X_test_rnn)
    _, y_pred_rnn = torch.max(outputs, 1)

In [67]:
from sklearn.metrics import classification_report, confusion_matrix
# Convert integer predictions back to entity labels
y_pred_rnn_labels = [entity_labels[idx] for idx in y_pred_rnn]

# Create a sorted list of unique entity labels
sorted_entity_labels = sorted(entity_labels)

# Print RNN Classification Report
print("RNN Classification Report:")
print(classification_report(y_test, y_pred_rnn_labels, labels=sorted_entity_labels))


RNN Classification Report:
              precision    recall  f1-score   support

      Animal       0.00      0.00      0.00         0
     Company       1.00      1.00      1.00         1
    Location       0.00      0.00      0.00         0

   micro avg       1.00      1.00      1.00         1
   macro avg       0.33      0.33      0.33         1
weighted avg       1.00      1.00      1.00         1



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [68]:
sentences = ["Apple is a tech company.", "I live in New York.", "The cat chased the mouse."]
entity_labels = ["Company", "Location", "Animal"]

# Convert words to integer IDs
word_to_id = {word: idx for idx, word in enumerate(set(' '.join(sentences).split()))}
X_test_rnn = torch.tensor([[word_to_id[word] for word in sentence.split()] for sentence in sentences])

# Load the trained RNN model
rnn_classifier = RNNClassifier(input_size=len(word_to_id), hidden_size=16, num_classes=len(entity_labels))
rnn_classifier.eval()

# Testing the model
with torch.no_grad():
    outputs = rnn_classifier(X_test_rnn)
    _, y_pred_rnn = torch.max(outputs, 1)

# Convert integer predictions back to entity labels
y_pred_rnn_labels = [entity_labels[idx] for idx in y_pred_rnn]

# Print predicted entity labels
for sentence, label in zip(sentences, y_pred_rnn_labels):
    print(f"Sentence: {sentence}")
    print(f"Predicted Entity Label: {label}")
    print()

Sentence: Apple is a tech company.
Predicted Entity Label: Animal

Sentence: I live in New York.
Predicted Entity Label: Animal

Sentence: The cat chased the mouse.
Predicted Entity Label: Animal



In [73]:
# Convert words to integer IDs
word_to_id = {word: idx for idx, word in enumerate(set(' '.join(sentences).split()))}
X_test_rnn = torch.tensor([[word_to_id[word] for word in sentence.split()] for sentence in sentences])
y_true = [entity_labels.index(label) for label in entity_labels]

# Load the trained RNN model
rnn_classifier = RNNClassifier(input_size=len(word_to_id), hidden_size=16, num_classes=len(entity_labels))
rnn_classifier.eval()

# Testing the model and calculating accuracy
correct_predictions = 0
total_examples = len(sentences)

with torch.no_grad():
    outputs = rnn_classifier(X_test_rnn)
    _, y_pred_rnn = torch.max(outputs, 1)

    for pred_label, true_label in zip(y_pred_rnn, y_true):
        if pred_label.item() == true_label:
            correct_predictions += 1

accuracy = correct_predictions / total_examples
print(f"Accuracy: {accuracy:}")

Accuracy: 0.3333333333333333
