In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
from joblib import load
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import nltk

# Download NLTK resources (if not already downloaded)
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Load the dataset
data = pd.read_csv("booking_reviews copy.csv")

# Preprocess the text data
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    
    # Remove punctuation and special characters
    text = re.sub(r"[^a-zA-Z0-9]", " ", text)
    
    # Tokenize text
    words = word_tokenize(text)
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    
    # Lemmatize words
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    
    # Join words back into text
    processed_text = " ".join(words)
    
    return processed_text

# Initialize the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the preprocessed text reviews
tfidf_features = tfidf_vectorizer.fit_transform(data['review_text'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(tfidf_features, data['sentiment'], test_size=0.2, random_state=42)

# Load the logistic regression model
logistic_model = load('logistic_regression_model.joblib')

# Main loop for continuous interaction
while True:
    # Prompt the user for input
    user_input = input("Enter your review (type 'exit' to quit): ")
    
    # Check if the user wants to exit
    if user_input.lower() == 'exit':
        print("Exiting...")
        break
    
    # Preprocess the input
    processed_input = preprocess_text(user_input)
    
    # Transform the preprocessed input using the TF-IDF vectorizer
    tfidf_input = tfidf_vectorizer.transform([processed_input])
    
    # Predict sentiment
    prediction = logistic_model.predict(tfidf_input)
    
    # Display the sentiment prediction
    print("Predicted sentiment:", prediction[0])
# Assuming 'data' is your DataFrame containing the reviews

# Preprocess the review text
data['processed_review'] = data['review'].apply(preprocess)

# Vectorize the preprocessed review text
tfidf_features = tfidf_vectorizer.transform(data['processed_review'])

# Predict sentiment labels using your trained model
predicted_sentiments = logistic_model.predict(tfidf_features)

# Add the predicted sentiment labels as a new column in the DataFrame
data['predicted_sentiment'] = predicted_sentiments


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ReNew\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ReNew\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ReNew\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


KeyError: 'sentiment'