<a href="https://colab.research.google.com/github/abhichiku18/Machine-Learning-Projects/blob/main/Product_Review_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Importing libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load data (replace with your dataset)
data = pd.read_csv('reviews.csv')

# Check if the column exists, and rename if necessary
if 'ReviewText' not in data.columns:
    if 'Review' in data.columns:
        data = data.rename(columns={'Review': 'ReviewText'})  # Renaming 'Review' to 'ReviewText'
    elif 'text' in data.columns:
        data = data.rename(columns={'text': 'ReviewText'})
    elif 'review' in data.columns:
        data = data.rename(columns={'review': 'ReviewText'})
    else:
        # If the column doesn't exist, raise an error
        raise KeyError(f"'ReviewText' column not found. Available columns: {data.columns.tolist()}")

# Data Preprocessing
# Convert all text to lowercase
data['ReviewText'] = data['ReviewText'].str.lower()

# Vectorization using TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
X = tfidf.fit_transform(data['ReviewText'])

# Label Encoding (Sentiment: Positive=1, Negative=0, Neutral=2)
y = data['Sentiment'].map({'Positive': 1, 'Negative': 0, 'Neutral': 2})

# Ensure no null values are present after mapping
if y.isnull().any():
    raise ValueError("Some sentiment values could not be mapped. Check the Sentiment column for unexpected values.")

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model: Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0
Confusion Matrix:
 [[30  0  0]
 [ 0 24  0]
 [ 0  0 24]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       1.00      1.00      1.00        24
           2       1.00      1.00      1.00        24

    accuracy                           1.00        78
   macro avg       1.00      1.00      1.00        78
weighted avg       1.00      1.00      1.00        78



In [6]:
# Function to Generate Detailed Explanation for Predictions
def explain_prediction(review_text, predicted_sentiment, confidence=None):
    # Map the sentiment to a descriptive phrase
    sentiment_map = {1: "Positive", 0: "Negative", 2: "Neutral"}
    explanation = f"""
    **Review Text:**
    "{review_text}"

    **Predicted Sentiment:**
    {sentiment_map.get(predicted_sentiment, "Unknown Sentiment")}
    """
    # Add confidence score if available
    if confidence is not None:
        explanation += f"""
    **Confidence Score:**
    {confidence * 100:.2f}%
    """

    # Add reasoning
    if predicted_sentiment == 1:
        reasoning = "The review contains words with strong positive connotations indicating satisfaction."
    elif predicted_sentiment == 0:
        reasoning = "The review includes negative terms suggesting dissatisfaction or disappointment."
    elif predicted_sentiment == 2:
        reasoning = "The review uses neutral language, indicating a lack of strong opinion."
    else:
        reasoning = "Reasoning could not be determined for this sentiment."

    explanation += f"""
    **Reasoning:**
    {reasoning}
    """
    return explanation.strip()

# Generating Explanations for Test Set Predictions
print("\n--- Detailed Explanations for Test Predictions ---\n")
for review, sentiment in zip(X_test[:5], y_pred[:5]):  # Display explanations for first 5 reviews
    # Decode the TF-IDF transformed review (optional, just for demonstration)
    original_review = data.iloc[review.indices[0]]['ReviewText']  # Replace with appropriate index mapping

    # Get confidence score (if your model supports predict_proba)
    confidence_score = max(model.predict_proba(review)[0]) if hasattr(model, 'predict_proba') else None

    # Print explanation
    print(explain_prediction(original_review, sentiment, confidence_score))
    print("\n" + "-"*50 + "\n")


--- Detailed Explanations for Test Predictions ---

**Review Text:**  
    "the product was decent, but it didn't meet all of my expectations."
    
    **Predicted Sentiment:**  
    Negative
    
    **Confidence Score:**  
    35.59%
    
    **Reasoning:**  
    The review includes negative terms suggesting dissatisfaction or disappointment.

--------------------------------------------------

**Review Text:**  
    "the product was decent, but it didn't meet all of my expectations."
    
    **Predicted Sentiment:**  
    Positive
    
    **Confidence Score:**  
    91.05%
    
    **Reasoning:**  
    The review contains words with strong positive connotations indicating satisfaction.

--------------------------------------------------

**Review Text:**  
    "the product was decent, but it didn't meet all of my expectations."
    
    **Predicted Sentiment:**  
    Positive
    
    **Confidence Score:**  
    59.29%
    
    **Reasoning:**  
    The review contains words with