In [2]:
# ============================================================================
# E-COMMERCE AUTOML APPLICATION FOR CUSTOMER FEEDBACK ANALYSIS
# ============================================================================
# This comprehensive application combines AutoML, GenAI, and user-friendly UI
# for analyzing e-commerce customer reviews and ratings automatically
# ============================================================================

# CELL 1: Import Essential Libraries and Setup
# ============================================================================
"""
Cell 1: Core Library Imports and Environment Setup
--------------------------------------------------
This cell imports all necessary libraries for our AutoML application.
We organize imports by functionality for better maintainability.
"""

# Core Data Processing Libraries
import pandas as pd
import numpy as np
import sqlite3
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Streamlit for Web Application
import streamlit as st
from streamlit_option_menu import option_menu
import streamlit.components.v1 as components

# Machine Learning Libraries
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

# Text Processing Libraries
import nltk
import spacy
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import re
import string
from collections import Counter

# Visualization Libraries
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from wordcloud import WordCloud

# AutoML Libraries
try:
    from pycaret.classification import *
    from pycaret.regression import *
    PYCARET_AVAILABLE = True
except ImportError:
    PYCARET_AVAILABLE = False
    st.warning("PyCaret not available. Using basic ML models.")

# GenAI Integration Libraries
try:
    from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
    TRANSFORMERS_AVAILABLE = True
except ImportError:
    TRANSFORMERS_AVAILABLE = False
    st.warning("Transformers library not available. Using basic text analysis.")

# Utility Libraries
import io
import base64
from datetime import datetime
import json
import pickle
import time

print("‚úÖ All libraries imported successfully!")
print(f"üìä PyCaret Available: {PYCARET_AVAILABLE}")
print(f"ü§ñ Transformers Available: {TRANSFORMERS_AVAILABLE}")

2025-09-01 00:09:50.378 
  command:

    streamlit run C:\Users\sukha\AppData\Roaming\Python\Python313\site-packages\ipykernel_launcher.py [ARGUMENTS]


‚úÖ All libraries imported successfully!
üìä PyCaret Available: False
ü§ñ Transformers Available: True


In [3]:
# CELL 2: Download Required NLTK Data and Setup NLP Components
# ============================================================================
"""
Cell 2: NLP Resources Setup
---------------------------
Downloads necessary NLTK data and initializes NLP components.
This ensures all text processing capabilities are available.
"""

# Download required NLTK data
nltk_downloads = [
    'punkt', 'stopwords', 'vader_lexicon', 'wordnet', 
    'omw-1.4', 'averaged_perceptron_tagger'
]

for resource in nltk_downloads:
    try:
        nltk.download(resource, quiet=True)
        print(f"‚úÖ Downloaded {resource}")
    except Exception as e:
        print(f"‚ùå Failed to download {resource}: {e}")

# Initialize NLP components
try:
    from nltk.corpus import stopwords
    from nltk.tokenize import word_tokenize
    from nltk.stem import WordNetLemmatizer
    
    # Initialize components
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    analyzer = SentimentIntensityAnalyzer()
    
    print("‚úÖ NLTK components initialized successfully!")
    
except Exception as e:
    print(f"‚ùå Error initializing NLTK components: {e}")
    # Fallback to basic processing
    stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'])
    lemmatizer = None
    analyzer = None

# Initialize spaCy (optional, with fallback)
try:
    # Try to load spaCy model
    nlp = spacy.load("en_core_web_sm")
    print("‚úÖ spaCy model loaded successfully!")
    SPACY_AVAILABLE = True
except OSError:
    print("‚ö†Ô∏è spaCy model not found. Using NLTK only.")
    nlp = None
    SPACY_AVAILABLE = False

# Initialize Hugging Face models (optional, with fallback)
if TRANSFORMERS_AVAILABLE:
    try:
        # Initialize sentiment analysis pipeline
        sentiment_pipeline = pipeline("sentiment-analysis", 
                                     model="cardiffnlp/twitter-roberta-base-sentiment-latest",
                                     return_all_scores=True)
        print("‚úÖ Hugging Face sentiment model loaded!")
        HF_SENTIMENT_AVAILABLE = True
    except Exception as e:
        print(f"‚ö†Ô∏è Could not load Hugging Face model: {e}")
        sentiment_pipeline = None
        HF_SENTIMENT_AVAILABLE = False
else:
    sentiment_pipeline = None
    HF_SENTIMENT_AVAILABLE = False

print(f"\nüìä NLP Setup Summary:")
print(f"   - NLTK: ‚úÖ")
print(f"   - spaCy: {'‚úÖ' if SPACY_AVAILABLE else '‚ùå'}")
print(f"   - Hugging Face: {'‚úÖ' if HF_SENTIMENT_AVAILABLE else '‚ùå'}")


‚úÖ Downloaded punkt
‚úÖ Downloaded stopwords
‚úÖ Downloaded vader_lexicon
‚úÖ Downloaded wordnet
‚úÖ Downloaded omw-1.4
‚úÖ Downloaded averaged_perceptron_tagger
‚úÖ NLTK components initialized successfully!
‚ö†Ô∏è spaCy model not found. Using NLTK only.


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


‚úÖ Hugging Face sentiment model loaded!

üìä NLP Setup Summary:
   - NLTK: ‚úÖ
   - spaCy: ‚ùå
   - Hugging Face: ‚úÖ


In [4]:
# CELL 3: Configuration and Constants Setup
# ============================================================================
"""
Cell 3: Application Configuration and Constants
----------------------------------------------
Defines all configuration parameters, constants, and global settings
for the AutoML application.
"""

# Application Configuration
APP_CONFIG = {
    "app_name": "E-commerce AutoML Analyzer",
    "version": "1.0.0",
    "description": "Automated Machine Learning for Customer Feedback Analysis",
    "max_file_size": 200,  # MB
    "supported_formats": [".csv", ".xlsx", ".json"],
    "default_test_size": 0.2,
    "random_state": 42
}

# Streamlit Page Configuration
st.set_page_config(
    page_title=APP_CONFIG["app_name"],
    page_icon="üõí",
    layout="wide",
    initial_sidebar_state="expanded"
)

# CSS Styling for Better UI
st.markdown("""
<style>
    /* Main container styling */
    .main {
        padding-top: 2rem;
    }
    
    /* Custom header styling */
    .custom-header {
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        padding: 1rem;
        border-radius: 10px;
        color: white;
        text-align: center;
        margin-bottom: 2rem;
    }
    
    /* Metric cards styling */
    .metric-card {
        background: white;
        padding: 1rem;
        border-radius: 10px;
        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        border-left: 4px solid #667eea;
    }
    
    /* Success/Error message styling */
    .success-msg {
        background-color: #d4edda;
        border-color: #c3e6cb;
        color: #155724;
        padding: 1rem;
        border-radius: 5px;
        border-left: 4px solid #28a745;
    }
    
    .error-msg {
        background-color: #f8d7da;
        border-color: #f5c6cb;
        color: #721c24;
        padding: 1rem;
        border-radius: 5px;
        border-left: 4px solid #dc3545;
    }
    
    /* Sidebar styling */
    .css-1d391kg {
        background-color: #f8f9fa;
    }
    
    /* Button styling */
    .stButton > button {
        width: 100%;
        border-radius: 20px;
        border: none;
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        color: white;
    }
    
    .stButton > button:hover {
        background: linear-gradient(90deg, #764ba2 0%, #667eea 100%);
    }
</style>
""", unsafe_allow_html=True)

# Model Configuration
ML_CONFIG = {
    "classification_models": {
        "Logistic Regression": LogisticRegression(random_state=APP_CONFIG["random_state"]),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=APP_CONFIG["random_state"]),
        "Naive Bayes": MultinomialNB(),
        "Decision Tree": DecisionTreeClassifier(random_state=APP_CONFIG["random_state"]),
        "SVM": SVC(random_state=APP_CONFIG["random_state"])
    },
    "regression_models": {
        "Random Forest": RandomForestRegressor(n_estimators=100, random_state=APP_CONFIG["random_state"]),
        "Decision Tree": DecisionTreeRegressor(random_state=APP_CONFIG["random_state"])
    }
}

# Text Processing Configuration
TEXT_CONFIG = {
    "max_features": 5000,
    "min_df": 2,
    "max_df": 0.95,
    "ngram_range": (1, 2),
    "stop_words": list(stop_words) if stop_words else None,
    "min_word_length": 2,
    "max_word_length": 50
}

# Visualization Configuration
VIZ_CONFIG = {
    "color_palette": ["#667eea", "#764ba2", "#f093fb", "#f5576c", "#4facfe", "#00f2fe"],
    "figure_size": (12, 8),
    "dpi": 300,
    "style": "whitegrid"
}

# GenAI Configuration
GENAI_CONFIG = {
    "max_text_length": 512,
    "confidence_threshold": 0.7,
    "batch_size": 16
}

# Database Configuration
DB_CONFIG = {
    "db_path": "data/automl_results.db",
    "tables": {
        "datasets": "datasets",
        "models": "trained_models",
        "results": "model_results"
    }
}

# Ensure data directory exists
Path("data").mkdir(exist_ok=True)
Path("models").mkdir(exist_ok=True)
Path("reports").mkdir(exist_ok=True)

print("‚úÖ Configuration setup completed!")
print(f"üìä App: {APP_CONFIG['app_name']} v{APP_CONFIG['version']}")
print(f"üé® Theme colors: {VIZ_CONFIG['color_palette'][:3]}")



NameError: name 'DecisionTreeRegressor' is not defined