In [1]:
import spacy
from scipy.spatial.distance import cosine

# Download the spacy model if it's not already downloaded
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
import spacy
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load SpaCy model
nlp = spacy.load("en_core_web_md")

# Example dataset
data = [
    ("I love this product!", "positive"),
    ("This is the best purchase I have made.", "positive"),
    ("Absolutely fantastic experience.", "positive"),
    ("I hate this item.", "negative"),
    ("Terrible quality and waste of money.", "negative"),
    ("I am very disappointed with the service.", "negative"),
    ("It is okay, not great but not bad either.", "neutral"),
    ("The product is average, nothing special.", "neutral"),
    ("Meh, it's just fine.", "neutral"),
]

# Preprocessing: Convert text to word embeddings
def get_sentence_embedding(sentence):
    """
    Generate a sentence embedding by averaging the word vectors.
    """
    doc = nlp(sentence)
    return doc.vector

# Prepare data
sentences = [item[0] for item in data]
labels = [item[1] for item in data]

# Generate embeddings for each sentence
embeddings = np.array([get_sentence_embedding(sentence) for sentence in sentences])

# Encode labels
label_mapping = {"positive": 1, "negative": 0, "neutral": 2}
encoded_labels = np.array([label_mapping[label] for label in labels])

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(embeddings, encoded_labels, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Make predictions
y_pred = classifier.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_mapping.keys()))


Accuracy: 0.0
Classification Report:
               precision    recall  f1-score   support

    positive       0.00      0.00      0.00       0.0
    negative       0.00      0.00      0.00       1.0
     neutral       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [4]:
# Function to predict sentiment of a single sentence
def predict_sentiment(sentence, model, label_mapping):
    """
    Predict the sentiment of a given sentence using the trained model.
    """
    # Generate embedding for the input sentence
    embedding = get_sentence_embedding(sentence).reshape(1, -1)  # Reshape for model input
    # Predict the label
    label_index = model.predict(embedding)[0]
    # Reverse mapping from index to sentiment
    reverse_mapping = {v: k for k, v in label_mapping.items()}
    return reverse_mapping[label_index]

# Example input sentence
example_sentence = "The service was absolutely great, I loved it!"

# Predict sentiment
predicted_sentiment = predict_sentiment(example_sentence, classifier, label_mapping)
print(f"Predicted Sentiment for '{example_sentence}': {predicted_sentiment}")


Predicted Sentiment for 'The service was absolutely great, I loved it!': neutral
