In [None]:
!pip install nltk spacy

Name: spacy
Version: 3.7.5
Summary: Industrial-strength Natural Language Processing (NLP) in Python
Home-page: https://spacy.io
Author: Explosion
Author-email: contact@explosion.ai
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: catalogue, cymem, jinja2, langcodes, murmurhash, numpy, packaging, preshed, pydantic, requests, setuptools, spacy-legacy, spacy-loggers, srsly, thinc, tqdm, typer, wasabi, weasel
Required-by: en-core-web-sm, fastai


In [None]:
import nltk
import spacy

from nltk.tokenize import word_tokenize  # tokenization: breakdown of text into individual worrds
from nltk.corpus import stopwords        # stopwords: common words (is, and, the)
from spacy.lang.en import English

# download nltk resources
nltk.download('punkt')  # tokenizer model used for splitting text into sentences (sentence tokenization)
nltk.download('stopwords')

# load spacy model
nlp = English()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
# nltk
text = "NLP is fascinating field of Artifical Intelligence"

tokens = word_tokenize(text)
print("Tokens using NLTK: ", tokens)

Tokens using NLTK:  ['NLP', 'is', 'fascinating', 'field', 'of', 'Artifical', 'Intelligence']


In [None]:
# spacy
doc = nlp(text)
spacy_tokens = [token.text for token in doc]
print("Tokens using SpaCy: ", spacy_tokens)

Tokens using SpaCy:  ['NLP', 'is', 'fascinating', 'field', 'of', 'Artifical', 'Intelligence']


In [None]:
from sklearn.feature_extraction.text import CountVectorizer
# countvectorizer: tool that converts a collection of text docu into matrix of tokens -> tokenize and counts how often each word appears in text
from sklearn.naive_bayes import MultinomialNB
# multinomial: naive bayes classifier for multinomially distribute data, often used for text classification - represents freq or counts
from sklearn.pipeline import make_pipeline
# make_pipeline: used to create a pipeline that sequentially combine several processing steps into a single objects

# example
texts = [
    "I love programming", "Programming is great", "I really enjoy solving problems",
    "I hate bugs", "Bugs are frustrating", "I dislike errors", "I cannot stand issues",
    "Coding is so much fun", "I find programming to be amazing", "I dislike debugging",
    "I adore my teacher"
    ]
labels = ["Positive", "Positive", "Positive", "Negative", "Negative",
          "Negative", "Negative", "Positive", "Positive", "Negative", "Neutral"
          ]

# pipeline for the classification
model = make_pipeline(CountVectorizer(), MultinomialNB())
model.fit(texts, labels)

# user input
user_input = input("Enter a sentence for sentiment analysis: ")

# prediction from the user input
prediction = model.predict([user_input])
print("Predicted sentiment: ", prediction)

Enter a sentence for sentiment analysis: i adore my teacher
Predicted sentiment:  ['Neutral']
