In [5]:
# Importing all necessary libraries
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [6]:
sample_reviews = [
    ("This shampoo really smoothens split ends with great results.", "positive"),
    ("What a difference this shampoo makes ‚Äì it hydrates well even after a long day.", "positive"),
    ("I hate how this shampoo causes irritation so I threw it away.", "negative"),
    ("This amazing shampoo keeps my hair bouncy and it exceeded my expectations.", "positive"),
    ("Worst shampoo ever - it makes my scalp itchy and I'm throwing it away.", "negative"),
    ("Hands down, it improves elasticity just as I wanted.", "positive"),
    ("This product has caused dandruff and it‚Äôs getting worse.", "negative"),
    ("My experience with this shampoo nourishes my scalp while keeping it healthy.", "positive"),
    ("Avoid this product, it ruins the texture with repeated disappointment.", "negative"),
    ("Honestly, this product makes styling easier and it smells heavenly.", "positive"),
    ("From the first use, it caused flaking and I regret trying it.", "negative"),
    ("No doubt this shampoo strengthens my roots and it lasts all day.", "positive"),
    ("Ever since I used this shampoo, it made my hair greasy and my scalp damaged.", "negative"),
    ("This shampoo is a game-changer because it adds volume with consistent performance.", "positive"),
    ("Unfortunately, it smells unpleasant and leaves an awful film.", "negative"),
    ("It works wonders to soothe scalp and now I swear by it.", "positive"),
    ("The formula tangles my hair more, and I‚Äôll never buy again.", "negative"),
    ("I genuinely enjoy how it revives dull hair and makes my day better.", "positive"),
    ("This awful shampoo makes my hair stiff, and it made things worse.", "negative"),
    ("Highly recommend this as it enhances shine and my hair looks amazing.", "positive"),
    ("This shampoo just dries my ends and no one should buy this.", "negative"),
    ("Been using it for months and it keeps hair manageable and I love the results.", "positive"),
    ("What a letdown ‚Äì it causes static with no signs of recovery.", "negative"),
    ("This shampoo transformed my hair and locks in moisture so I'm very satisfied.", "positive"),
    ("Regret trying this because it feels like detergent and left my hair lifeless.", "negative"),
    ("This product made me realize it makes hair feel light without weighing it down.", "positive"),
    ("I'm disappointed it leaves buildup and it feels like a bad choice.", "negative"),
    ("Ever since I started using this shampoo, it eliminates dryness which is fantastic.", "positive"),
    ("What bothers me most is it doesn‚Äôt rinse out properly, leaving a sticky residue.", "negative"),
    ("This shampoo is absolutely amazing because it adds strength without any residue.", "positive"),
    ("The most noticeable effect is it clumps my hair and it didn‚Äôt help at all.", "negative"),
    ("What I like most is it rejuvenates my strands, making haircare so simple.", "positive"),
    ("I found that it strips natural oils, and I‚Äôm completely dissatisfied.", "negative"),
    ("From the first wash, it improves overall health and friends noticed too.", "positive"),
    ("It fails to deliver and leaves hair lifeless, which is very frustrating.", "negative"),
    ("My favorite thing about this shampoo is it protects color with visible improvement.", "positive"),
    ("Really disappointed that it makes hair dry and it didn‚Äôt meet expectations.", "negative"),
    ("I can't believe how well it revives dull hair and it smells heavenly.", "positive"),
    ("This product never soothes scalp, and my hair is worse off.", "negative"),
    ("This shampoo really calms my scalp and it‚Äôs worth every penny.", "positive"),
    ("I never thought a shampoo could weigh down my hair and flatten volume.", "negative"),
    ("Can‚Äôt believe how this shampoo brings back life to hair, and I feel more confident.", "positive"),
    ("It always causes irritation and makes detangling harder.", "negative"),
    ("I love how this shampoo makes detangling easier and the compliments haven‚Äôt stopped.", "positive"),
    ("This shampoo completely ruined my routine ‚Äì feels too harsh and heavy.", "negative"),
    ("This amazing shampoo enhances shine and adds a silky touch.", "positive"),
    ("I'm shocked how bad this shampoo feels cheap and dries my ends.", "negative"),
    ("After using it for a while, it controls my curls and protects color.", "positive"),
    ("It's frustrating that it increases hair fall and leaves flakes.", "negative"),
    ("No doubt this shampoo adds natural shine while keeping it healthy.", "positive"),
    ("Wouldn‚Äôt recommend it as it makes my scalp burn, and it‚Äôs a waste of money.", "negative"),
    ("Honestly, this product keeps my hair fresh and I love the results.", "positive"),
    ("My issue with this shampoo is it causes flaking and adds build-up fast.", "negative"),
    ("This shampoo is a game-changer because it locks in moisture and makes my day better.", "positive"),
    ("This awful shampoo ruined my scalp and I regret buying it.", "negative"),
    ("I'm thrilled it consistently nourishes my scalp and it smells divine.", "positive"),
    ("Regret trying this because it makes hair brittle and affects my scalp badly.", "negative"),
    ("This shampoo made things better ‚Äì hydrates well and feels smooth.", "positive"),
    ("I‚Äôll never buy this again ‚Äì it made my hair feel waxy.", "negative"),
    ("Love the bounce it adds ‚Äì smells great and works wonders.", "positive"),
    ("Couldn‚Äôt handle how greasy it left my roots.", "negative"),
    ("This shampoo defines my curls beautifully and smells luxurious.", "positive"),
    ("Left my scalp flaky and itchy, total disappointment.", "negative"),
    ("Smells divine and makes my hair feel light and fresh.", "positive"),
    ("Caused severe dryness and tangling every time I used it.", "negative"),
    ("Great for colored hair ‚Äì keeps shine and softness locked in.", "positive"),
    ("Leaves a film I can‚Äôt wash out ‚Äì horrible experience.", "negative"),
    ("Makes hair look fuller and keeps frizz under control.", "positive"),
    ("Disappointed ‚Äì not suitable for oily scalps at all.", "negative"),
    ("Softens ends and gives a clean feeling all day long.", "positive"),
    ("Way too thick and sticky ‚Äì didn‚Äôt like the texture.", "negative"),
    ("Perfect for daily use ‚Äì gentle and very effective.", "positive"),
    ("Worsened my dandruff issue over time ‚Äì not good.", "negative"),
    ("Feels like a salon treatment every time I use it.", "positive"),
    ("Foams too much and makes rinsing take forever.", "negative"),
    ("Best shampoo I‚Äôve ever used ‚Äì silky smooth results.", "positive"),
    ("Weird smell and doesn‚Äôt clean well at all.", "negative"),
    ("Love the cooling sensation on my scalp.", "positive"),
    ("My hair looked dull after just a few washes.", "negative"),
    ("I actually look forward to using this shampoo now.", "positive"),
    ("Leaves behind flakes and weird residue.", "negative"),
    ("Super nourishing and perfect for winter hair care.", "positive"),
    ("Made my hair look greasy the next day.", "negative"),
    ("It revived my dry hair like magic!", "positive"),
    ("Strong chemical smell ruined it for me.", "negative"),
    ("This product changed my entire routine for the better.", "positive"),
    ("Couldn‚Äôt style my hair after using this.", "negative"),
    ("Gives a fresh, clean finish without stripping oils.", "positive"),
    ("Hair felt like straw after one use.", "negative"),
    ("Luxurious texture and perfect for damaged hair.", "positive"),
    ("Didn't clean thoroughly ‚Äì still felt dirty.", "negative"),
    ("My frizz is gone, and I love how it feels.", "positive"),
    ("Not worth the hype ‚Äì average at best.", "negative"),
    ("I always get compliments on my hair now!", "positive"),
    ("Leaves an oily layer on my hairline.", "negative"),
    ("The best thing I‚Äôve done for my hair health.", "positive"),
    ("Too harsh ‚Äì made my scalp sensitive.", "negative"),
    ("My curls are more defined and shiny.", "positive"),
    ("I couldn‚Äôt get past the odd scent.", "negative"),
    ("So refreshing ‚Äì my hair feels alive again.", "positive"),
    ("I had to wash twice ‚Äì still didn‚Äôt feel clean.", "negative"),
    ("Adds body and bounce ‚Äì love the results!", "positive"),
    ("Left behind a greasy feel even after rinsing.", "negative"),
    ("Feels luxurious ‚Äì better than expensive salon brands.", "positive"),
]


df_test = pd.DataFrame(sample_reviews, columns=["Review", "Sentiment"])
df_test.head()

Unnamed: 0,Review,Sentiment
0,This shampoo really smoothens split ends with ...,positive
1,What a difference this shampoo makes ‚Äì it hydr...,positive
2,I hate how this shampoo causes irritation so I...,negative
3,This amazing shampoo keeps my hair bouncy and ...,positive
4,Worst shampoo ever - it makes my scalp itchy a...,negative


In [7]:
df = pd.read_csv("/content/50000_unique_shampoo_reviews_sentiment.csv")
df.head()

Unnamed: 0,Review,Sentiment
0,This shampoo unfortunately feels too harsh wit...,negative
1,This product has makes styling easier even aft...,positive
2,It's annoying that it weighs my hair down with...,negative
3,This product made me realize it keeps my hair ...,positive
4,Worst shampoo ever - it doesn't clean well and...,negative


In [8]:
print(df.info)
print(df.isnull().sum().sum())
print(df.duplicated().sum())

<bound method DataFrame.info of                                                   Review Sentiment
0      This shampoo unfortunately feels too harsh wit...  negative
1      This product has makes styling easier even aft...  positive
2      It's annoying that it weighs my hair down with...  negative
3      This product made me realize it keeps my hair ...  positive
4      Worst shampoo ever - it doesn't clean well and...  negative
...                                                  ...       ...
49995  This product has has a chemical smell and I‚Äôm ...  negative
49996  This shampoo made things worse affects my scal...  negative
49997  After using it for a while, it strengthens my ...  positive
49998  Highly recommend this as it protects color and...  positive
49999  Worst experience ever, it weighs my hair down ...  negative

[50000 rows x 2 columns]>
0
0


In [9]:
from nltk.corpus import stopwords
from nltk.tokenize import TreebankWordTokenizer

In [10]:
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [11]:
tokenizer = TreebankWordTokenizer()
stop_words = set(stopwords.words('english'))

In [12]:
def clean_and_tokenize(text):
    text = text.lower()  # lowercase
    text = re.sub(r'[^a-z\s]', '', text)  # remove non-alphabetic characters
    tokens = tokenizer.tokenize(text)  # tokenize using Treebank tokenizer
    filtered_tokens = [word for word in tokens if word not in stop_words]  # remove stopwords
    return ' '.join(filtered_tokens)  # join cleaned tokens back into string


In [13]:
df['cleaned_review'] = df['Review'].apply(clean_and_tokenize)
df.head()

Unnamed: 0,Review,Sentiment,cleaned_review
0,This shampoo unfortunately feels too harsh wit...,negative,shampoo unfortunately feels harsh improvement
1,This product has makes styling easier even aft...,positive,product makes styling easier even long day
2,It's annoying that it weighs my hair down with...,negative,annoying weighs hair without results
3,This product made me realize it keeps my hair ...,positive,product made realize keeps hair fresh without ...
4,Worst shampoo ever - it doesn't clean well and...,negative,worst shampoo ever doesnt clean well disappoin...


In [14]:
X = df['cleaned_review']
y = df['Sentiment']

# Split into train (80%) and test (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Convert text data into numerical format using Bag of Words
vectorizer = CountVectorizer()

# Fit and transform training data
X_train_vec = vectorizer.fit_transform(X_train)

# Transform test data
X_test_vec = vectorizer.transform(X_test)

In [15]:
# Initialize and train Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_vec, y_train)

In [16]:
# Predict on test data
y_pred = model.predict(X_test_vec)

# Print evaluation metrics
print("‚úÖ Accuracy:", accuracy_score(y_test, y_pred))
print("\nüìä Classification Report:\n", classification_report(y_test, y_pred))

‚úÖ Accuracy: 1.0

üìä Classification Report:
               precision    recall  f1-score   support

    negative       1.00      1.00      1.00      4932
    positive       1.00      1.00      1.00      5068

    accuracy                           1.00     10000
   macro avg       1.00      1.00      1.00     10000
weighted avg       1.00      1.00      1.00     10000



In [17]:
# Install spell checker (only needs to be done once in Colab)
!pip install pyspellchecker

Collecting pyspellchecker
  Downloading pyspellchecker-0.8.2-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.2-py3-none-any.whl (7.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m7.1/7.1 MB[0m [31m48.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.8.2


In [18]:
from spellchecker import SpellChecker
spell = SpellChecker()

def detect_and_correct_spelling(text):
    # Split the input into individual words
    words = text.split()

    # Identify words that are misspelled
    misspelled = spell.unknown(words)

    # Correct each misspelled word; leave correct ones unchanged
    # corrected_words = [
    #     spell.correction(word) if word in misspelled else word
    #     for word in words
    # ]

    corrected_words = [
        spell.correction(word) if (word in misspelled and spell.correction(word) is not None) else word
        for word in words
    ]

    # cleaned_text = clean_and_tokenize(corrected_text)

    # Join the corrected words back into a full sentence
    corrected_text = ' '.join(corrected_words)
    X_input_vec = vectorizer.transform([corrected_text])
    y_input_pred = model.predict(X_input_vec)
    y_input_pred
    return list(misspelled), corrected_text, y_input_pred

In [19]:
detect_and_correct_spelling("It feels luxrios with gret result.")

(['result.', 'luxrios', 'gret'],
 'It feels luxurious with get results',
 array(['positive'], dtype='<U8'))

In [24]:
input_neww = input("Put your review here:")
detect_and_correct_spelling(input_neww)

Put your review here:this shampoo is too expemmsive


(['expemmsive'],
 'this shampoo is too expensive',
 array(['negative'], dtype='<U8'))