### Sentiment Classification on user input 

#### Importing the required libraries

In [1]:
import re
import nltk
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.base import BaseEstimator, TransformerMixin

# Download NLTK resources if not already downloaded
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\dines\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dines\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Defining a class to preprocess the raw review text to clean

In [2]:
class TextPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))

    def _preprocess_text(self, text):
        text = self._convert_to_lowercase(text)
        text = self._remove_br_tags(text)
        text = self._remove_special_characters(text)
        text = self._remove_extra_whitespace(text)
        text = self._remove_numbers(text)
        text = self._remove_stopwords(text)
        text = self._lemmatize_text(text)
        return text

    def _convert_to_lowercase(self, text):
        return text.lower()

    def _remove_br_tags(self, text):
        pattern = r'<br\s*/?><br\s*/?>'
        return re.sub(pattern, ' ', text)

    def _remove_special_characters(self, text):
        pattern = r'[^a-zA-z0-9\s]'
        return re.sub(pattern, '', text)

    def _remove_extra_whitespace(self, text):
        return re.sub(' +', ' ', text).strip()

    def _remove_numbers(self, text):
        pattern = r'\d+'
        return re.sub(pattern, '', text)

    def _remove_stopwords(self, text):
        tokens = text.split()
        filtered_tokens = [word for word in tokens if word not in self.stop_words]
        return ' '.join(filtered_tokens)

    def _lemmatize_text(self, text):
        tokens = text.split()
        lemmatized_tokens = [self.lemmatizer.lemmatize(token) for token in tokens]
        return ' '.join(lemmatized_tokens)
    
    

### Taking the input from user

In [14]:
text = str(input())
text_pro = TextPreprocessor()
cln_text = text_pro._preprocess_text(text)
cln_text

"I recently watched a movie called 'The Last Laugh'. I have to say, it was absolutely terrible. The acting was wooden, the plot was nonsensical, and I found myself checking my watch every few minutes, hoping it would end soon. Overall, I would not recommend this movie to anyone."


'recently watched movie called last laugh say absolutely terrible acting wooden plot nonsensical found checking watch every minute hoping would end soon overall would recommend movie anyone'

### Serializing the loaded model and vectors

In [4]:
import pickle
with open("imdb_sentiment_logistic.pkl", "rb") as f:
    model = pickle.load(f)
    
with open("vectors_trained.pkl", "rb") as file:
    vectors_loaded = pickle.load(file)

In [15]:
result = model.predict(vectors_loaded.transform([cln_text]))[0].capitalize()

In [16]:
print(result)

Negative
