In [1]:
!pip install pandas nltk spacy scikit-learn tensorflow textblob

Collecting spacy
  Using cached spacy-3.7.2-cp311-cp311-win_amd64.whl.metadata (26 kB)
Collecting tensorflow
  Using cached tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata (3.6 kB)
Collecting tensorflow-intel==2.15.0 (from tensorflow)
  Using cached tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata (5.1 kB)
Using cached spacy-3.7.2-cp311-cp311-win_amd64.whl (12.1 MB)
Using cached tensorflow-2.15.0-cp311-cp311-win_amd64.whl (2.1 kB)
Using cached tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl (300.9 MB)
Installing collected packages: spacy, tensorflow-intel, tensorflow
Successfully installed spacy-3.7.2 tensorflow-2.15.0 tensorflow-intel-2.15.0


In [2]:
import pandas as pd

# Load your dataset
df = pd.read_csv('sample_reviews.csv')

# Display the first few rows to check everything is loaded correctly
print(df.head())


   Customer ID     Location  Product ID  \
0            1      Atlanta         101   
1            2     New York         102   
2            3      Chicago         103   
3            4  Los Angeles         104   
4            5      Seattle         105   

                                              Review  Review Stars  
0        Love this product! It works wonders for me.             5  
1    Not happy with the quality. Broke after a week.             1  
2  It's okay, but I expected better based on the ...             3  
3    Amazing quality and excellent customer service!             5  
4           Terrible, would not recommend to anyone.             1  


In [3]:
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

# Function to clean and preprocess text
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove numbers, special characters, and punctuation
    text = re.sub(r'[^a-z\s]', '', text)
    # Tokenize text
    words = text.split()
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    # Lemmatize words
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    # Re-join words to form the cleaned text
    cleaned_text = ' '.join(words)
    return cleaned_text

# Apply the preprocessing function to the Review column
df['Cleaned_Review'] = df['Review'].apply(preprocess_text)

# Check the cleaned reviews
print(df[['Review', 'Cleaned_Review']].head())


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\deepg\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\deepg\AppData\Roaming\nltk_data...


                                              Review  \
0        Love this product! It works wonders for me.   
1    Not happy with the quality. Broke after a week.   
2  It's okay, but I expected better based on the ...   
3    Amazing quality and excellent customer service!   
4           Terrible, would not recommend to anyone.   

                               Cleaned_Review  
0                    love product work wonder  
1                    happy quality broke week  
2            okay expected better based price  
3  amazing quality excellent customer service  
4             terrible would recommend anyone  


In [4]:
from textblob import TextBlob

# Function to get sentiment
def get_sentiment(text):
    # Get sentiment score
    sentiment = TextBlob(text).sentiment.polarity
    # Classify sentiment
    if sentiment > 0:
        return 'Positive'
    elif sentiment == 0:
        return 'Neutral'
    else:
        return 'Negative'

# Apply the function to the cleaned reviews
df['Sentiment'] = df['Cleaned_Review'].apply(get_sentiment)

# Check the sentiments
print(df[['Review', 'Sentiment']].head())


                                              Review Sentiment
0        Love this product! It works wonders for me.  Positive
1    Not happy with the quality. Broke after a week.  Positive
2  It's okay, but I expected better based on the ...  Positive
3    Amazing quality and excellent customer service!  Positive
4           Terrible, would not recommend to anyone.  Negative


In [5]:
print(df[['Review', 'Sentiment']].head())


                                              Review Sentiment
0        Love this product! It works wonders for me.  Positive
1    Not happy with the quality. Broke after a week.  Positive
2  It's okay, but I expected better based on the ...  Positive
3    Amazing quality and excellent customer service!  Positive
4           Terrible, would not recommend to anyone.  Negative
