In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')


# Load dataset
data = pd.read_csv('reviews.csv')


# Text preprocessing function
def preprocess(text):
    text = text.lower()
    return text


# Apply preprocessing
data['clean_text'] = data['review'].apply(preprocess)


# Feature extraction
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['clean_text'])
y = data['sentiment']


# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train model
model = LogisticRegression()
model.fit(X_train, y_train)


# Predictions
y_pred = model.predict(X_test)


# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:", confusion_matrix(y_test, y_pred))

Accuracy: 0.6666666666666666
Confusion Matrix: [[0 1]
 [0 2]]


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sceta\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd


# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')


# Sample dataset
text = [
"I love this product, it is amazing!",
"This is the worst thing I ever bought",
"Absolutely fantastic experience",
"Totally disappointing and horrible",
"I enjoyed using it very much",
"I hate it, very bad quality"
]
labels = [1, 0, 1, 0, 1, 0] # 1 = Positive, 0 = Negative


# Text preprocessing function
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()


def preprocess(sentence):
    words = word_tokenize(sentence.lower())
    words = [stemmer.stem(w) for w in words if w not in stop_words and w.isalpha()]
    return " ".join(words)


processed_text = [preprocess(t) for t in text]


# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(processed_text)


y = labels


# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Model Training
model = MultinomialNB()
model.fit(X_train, y_train)


# Prediction
y_pred = model.predict(X_test)


# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.5


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sceta\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sceta\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
import spacy

# Load English model
nlp = spacy.load("en_core_web_sm")

# Sample text
doc = nlp("Apple is looking at buying U.K. startup for $1 billion.")

# ----- Tokenization -----
print("Tokens:")
for token in doc:
    print(token.text)

# ----- Part of Speech Tagging -----
print("Part of Speech Tags:")
for token in doc:
    print(token.text, "--", token.pos_)

# ----- Named Entity Recognition -----
print("Named Entities:")
for ent in doc.ents:
    print(ent.text, "|", ent.label_)


Tokens:
Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion
.
Part of Speech Tags:
Apple -- PROPN
is -- AUX
looking -- VERB
at -- ADP
buying -- VERB
U.K. -- PROPN
startup -- VERB
for -- ADP
$ -- SYM
1 -- NUM
billion -- NUM
. -- PUNCT
Named Entities:
Apple | ORG
U.K. | GPE
$1 billion | MONEY


In [9]:
# Step 1: Install NLTK using the command:
# pip install nltk

import nltk

# Step 2: Download required corpora
nltk.download('punkt')

# Step 3: Sample text
text = "Natural Language Processing with NLTK is fun!"

# Tokenization
from nltk.tokenize import word_tokenize

words = word_tokenize(text)
print("Tokenized Words:", words)


Tokenized Words: ['Natural', 'Language', 'Processing', 'with', 'NLTK', 'is', 'fun', '!']


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sceta\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
import pandas as pd
from nltk.tokenize import word_tokenize

# Step 1: Load dataset
data = pd.read_csv("Reviews.csv")

# Step 2: Define sentiment lexicon
positive_words = ["excellent", "happy", "great", "best", "fantastic", "good", "satisfied", "recommend", "perfectly"]

negative_words = ["terrible", "useless", "disappointed", "worst", "not", "poor", "bad", "waste"]

# Step 3: Sentiment analysis function
def analyze_sentiment(text):
    tokens = word_tokenize(text.lower())
    pos = 0
    neg = 0

    for word in tokens:
        if word in positive_words:
            pos += 1
        elif word in negative_words:
            neg += 1

    if pos > neg:
        return "positive"
    elif neg > pos:
        return "negative"
    else:
        return "neutral"

# Step 4: Apply sentiment analysis
data["Predicted_Sentiment"] = data["review"].apply(analyze_sentiment)

# Step 5: Display results
print(data[["review", "sentiment", "Predicted_Sentiment"]])

                                          review sentiment Predicted_Sentiment
0   The product is excellent and works perfectly  positive            positive
1             I am very happy with this purchase  positive            positive
2                Great quality and fast delivery  positive            positive
3           This is the best product I have used  positive            positive
4                Absolutely fantastic experience  positive            positive
5            The product is terrible and useless  negative            negative
6        I am very disappointed with the quality  negative            negative
7                       Worst purchase ever made  negative            negative
8                     Not worth the money at all  negative            negative
9         The item stopped working after one day  negative             neutral
10                          Good value for money  positive            positive
11         Really satisfied with the performance  po