# What's NLP?

In [None]:
import nltk
nltk.download('punkt')  # Download the necessary resources

from nltk.tokenize import word_tokenize

# Sample text
text = "Natural Language Processing (NLP) enables machines to understand human language."

# Tokenizing the text
tokens = word_tokenize(text)

# Display the tokens
print(tokens)

In [None]:
import nltk

In [None]:
nltk.download('punkt')

In [None]:
from nltk.tokenize import word_tokenize

In [None]:
text = "Natural Language Processing (NLP) enables machines to understand human language."

In [None]:
tokens = word_tokenize(text)

In [None]:
print(tokens)

In [None]:
['Natural', 'Language', 'Processing', '(', 'NLP', ')', 'enables', 'machines', 'to', 'understand', 'human', 'language', '.']

# Significance and Application of NLP

In [None]:
from translate import Translator

# Create a translator object
translator = Translator(to_lang="es")

# Translate a phrase
translation = translator.translate("How are you?")
print(translation)  # Output: ¿Cómo estás?

In [None]:
from translate import Translator

In [None]:
translator = Translator(to_lang="es")

In [None]:
translation = translator.translate("How are you?")

In [None]:
print(translation)  # Output: ¿Cómo estás?

In [None]:
from textblob import TextBlob

# Sample text
text = "I love this product! It's amazing."

# Create a TextBlob object
blob = TextBlob(text)

# Perform sentiment analysis
sentiment = blob.sentiment
print(sentiment)  # Output: Sentiment(polarity=0.65, subjectivity=0.6)

In [None]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

# Sample text
text = """
Natural Language Processing (NLP) is a fascinating field at the intersection of computer science, artificial intelligence, and linguistics. It enables machines to understand, interpret, and generate human language, opening up a world of possibilities for applications ranging from chatbots and translation services to sentiment analysis and beyond.
"""

# Create a parser
parser = PlaintextParser.from_string(text, Tokenizer("english"))

# Create a summarizer
summarizer = LsaSummarizer()

# Generate the summary
summary = summarizer(parser.document, 2)  # Summarize to 2 sentences
for sentence in summary:
    print(sentence)

Real-World Example: E-commerce Review Analysis

In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Sample reviews
reviews = [
    "This product is fantastic! It exceeded my expectations.",
    "Not worth the price. I'm disappointed with the quality.",
    "Good value for money. Will buy again.",
]

# Initialize the sentiment analyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

# Analyze each review
for review in reviews:
    sentiment = sia.polarity_scores(review)
    print(f"Review: {review}\\nSentiment: {sentiment}\\n")


# Overview of Python for NLP

In [None]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

text = "Natural Language Processing with Python is fun!"
tokens = word_tokenize(text)
print(tokens)

In [None]:
import spacy

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

text = "Apple is looking at buying U.K. startup for $1 billion."
doc = nlp(text)

# Extract named entities
for ent in doc.ents:
    print(ent.text, ent.label_)

In [None]:
from gensim.models import Word2Vec

# Sample sentences
sentences = [
    ["natural", "language", "processing"],
    ["python", "is", "a", "powerful", "language"],
    ["text", "processing", "with", "gensim"],
]

# Train Word2Vec model
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

# Get vector for a word
vector = model.wv['language']
print(vector)

In [None]:
from gensim.models import Word2Vec

In [None]:
sentences = [
    ["natural", "language", "processing"],
    ["python", "is", "a", "powerful", "language"],
    ["text", "processing", "with", "gensim"],
]

In [None]:
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

In [None]:
vector = model.wv['language']
print(vector)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# Sample data
texts = ["I love this product", "This is the worst experience", "Absolutely fantastic!", "Not good at all"]
labels = [1, 0, 1, 0]

# Vectorize text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X, labels)

# Predict sentiment for a new text
new_text = ["I hate this"]
X_new = vectorizer.transform(new_text)
prediction = classifier.predict(X_new)
print(prediction)

Setting Up Your Python Environment for NLP

In [None]:
python --version

In [None]:
python -m venv nlp_env

In [None]:
nlp_env\\Scripts\\activate

In [None]:
source nlp_env/bin/activate

In [None]:
pip install nltk spacy gensim scikit-learn

In [None]:
import nltk
from nltk.tokenize import word_tokenize
import spacy
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import CountVectorizer

# Verify NLTK
nltk.download('punkt')
text = "Natural Language Processing with Python is fun!"
tokens = word_tokenize(text)
print("NLTK Tokens:", tokens)

# Verify SpaCy
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
print("SpaCy Tokens:", [token.text for token in doc])

# Verify gensim
sentences = [["natural", "language", "processing"], ["python", "is", "fun"]]
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
print("Word2Vec Vocabulary:", list(model.wv.index_to_key))

# Verify scikit-learn
vectorizer = CountVectorizer()
X = vectorizer.fit_transform([text])
print("CountVectorizer Feature Names:", vectorizer.get_feature_names_out())

In [None]:
python test_nlp.py

In [None]:
NLTK Tokens: ['Natural', 'Language', 'Processing', 'with', 'Python', 'is', 'fun', '!']
SpaCy Tokens: ['Natural', 'Language', 'Processing', 'with', 'Python', 'is', 'fun', '!']
Word2Vec Vocabulary: ['natural', 'language', 'processing', 'python', 'is', 'fun']
CountVectorizer Feature Names: ['fun', 'is', 'language', 'natural', 'processing', 'python', 'with']

Example: End-to-End NLP Pipeline

In [None]:
import nltk
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from nltk.corpus import stopwords
nltk.download('stopwords')

# Sample data
texts = [
    "I love this product! It's amazing.",
    "This is the worst experience I've ever had.",
    "Absolutely fantastic! Highly recommend.",
    "Not good at all. Very disappointing."
]
labels = [1, 0, 1, 0]

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Custom tokenizer using SpaCy
def spacy_tokenizer(sentence):
    doc = nlp(sentence)
    return [token.text for token in doc]

# Stop words
stop_words = set(stopwords.words('english'))

# Define the pipeline
pipeline = Pipeline([
    ('vectorizer', CountVectorizer(tokenizer=spacy_tokenizer, stop_words=list(stop_words))),
    ('classifier', MultinomialNB())
])

# Train the model
pipeline.fit(texts, labels)

# Predict sentiment for a new text
new_text = ["I hate this product"]
prediction = pipeline.predict(new_text)
print(prediction)

In [None]:
import nltk
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from nltk.corpus import stopwords
nltk.download('stopwords')

In [10]:
texts = [
    "I love this product! It's amazing.",
    "This is the worst experience I've ever had.",
    "Absolutely fantastic! Highly recommend.",
    "Not good at all. Very disappointing."
]
labels = [1, 0, 1, 0]

In [11]:
nlp = spacy.load("en_core_web_sm")

In [12]:
def spacy_tokenizer(sentence):
    doc = nlp(sentence)
    return [token.text for token in doc]

In [13]:
stop_words = set(stopwords.words('english'))

In [17]:
pipeline = Pipeline([
    ('vectorizer', CountVectorizer(tokenizer=spacy_tokenizer, stop_words=list(stop_words))),
    ('classifier', MultinomialNB())
])

In [None]:
pipeline.fit(texts, labels)

In [None]:
new_text = ["I hate this product"]
prediction = pipeline.predict(new_text)
print(prediction)

# Chapter-1 Assignment

Exercise 1: Tokenization with NLTK

In [None]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

text = "Natural Language Processing enables computers to understand human language."
tokens = word_tokenize(text)
print(tokens)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  


Exercise 2: Named Entity Recognition with SpaCy

In [None]:
import spacy

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

text = "Google was founded by Larry Page and Sergey Brin while they were Ph.D. students at Stanford University."
doc = nlp(text)

# Extract named entities
for ent in doc.ents:
    print(ent.text, ent.label_)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  


Exercise 3: Sentiment Analysis with TextBlob

In [None]:
from textblob import TextBlob

text = "I am extremely happy with the service provided."
blob = TextBlob(text)
sentiment = blob.sentiment
print(sentiment)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  


Exercise 4: Text Summarization with sumy

In [None]:
!pip install sumy

In [None]:
import nltk
nltk.download('punkt_tab')
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer\

text = """
Natural Language Processing (NLP) is a fascinating field at the intersection of computer science, artificial intelligence, and linguistics. It enables machines to understand, interpret, and generate human language, opening up a world of possibilities for applications ranging from chatbots and translation services to sentiment analysis and beyond. The evolution of NLP has been driven by significant advances in machine learning and deep learning, which have enabled more sophisticated and accurate models for language understanding. This book aims to bring these cutting-edge techniques to you in an accessible and practical way, regardless of your current level of expertise.
"""


parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = LsaSummarizer()
summary = summarizer(parser.document, 2)
for sentence in summary:
    print(sentence)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  


Exercise 5: Text Classification with scikit-learn

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

texts = ["I love this product", "This is the worst experience", "Absolutely fantastic!", "Not good at all"]
labels = [1, 0, 1, 0]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)

classifier = MultinomialNB()
classifier.fit(X, labels)

new_text = ["This experience was fantastic"]
X_new = vectorizer.transform(new_text)
prediction = classifier.predict(X_new)
print(prediction)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  
