In [None]:
!pip install streamlit flask scikit-learn ibm-watson tensorflow nltk wordcloud seaborn matplotlib datasets pyngrok



In [None]:
!pip install --upgrade datasets fsspec huggingface_hub
import streamlit as st
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from nltk.corpus import stopwords
import nltk
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
from datasets import load_dataset
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, SentimentOptions, KeywordsOptions, EntitiesOptions

# --- Setup ---
nltk.download('stopwords')

# IBM Watson NLU Setup
apikey = 'hfgB9xdfJ5jzJplitfjGX4o25pVcImEusrpuZCc1VN38'  # Replace with your IBM Watson NLU API key
url = 'https://api.au-syd.natural-language-understanding.watson.cloud.ibm.com/instances/66849091-08c9-4313-b955-eddb62017287'  # Replace with your IBM Watson NLU URL
authenticator = IAMAuthenticator(apikey)
nlu = NaturalLanguageUnderstandingV1(version='2021-08-01', authenticator=authenticator)
nlu.set_service_url(url)

# Text Cleaner
class TextCleaner:
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))

    def clean_text(self, text):
        text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
        text = text.lower()
        return " ".join([word for word in text.split() if word not in self.stop_words])

cleaner = TextCleaner()

# Data Load & Preprocess
dataset = load_dataset("amazon_polarity", split='train[:1000]')
df = pd.DataFrame({'review': dataset['content'], 'label': dataset['label']})
df['review_clean'] = df['review'].apply(cleaner.clean_text)

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['review_clean'])
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Deep Learning': None  # Placeholder
}

# Deep Learning Model
deep_model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
deep_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
deep_model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
y_pred_deep = (deep_model.predict(X_test) > 0.5).astype(int).flatten()

# Training classical models
model_scores = {}
for name, model in models.items():
    if model is not None:
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        model_scores[name] = {
            'Accuracy': accuracy_score(y_test, preds),
            'Precision': precision_score(y_test, preds),
            'Recall': recall_score(y_test, preds),
            'F1 Score': f1_score(y_test, preds)
        }
        joblib.dump(model, f"{name.replace(' ', '_')}.joblib")


# Add Deep Learning scores
model_scores['Deep Learning'] = {
    'Accuracy': accuracy_score(y_test, y_pred_deep),
    'Precision': precision_score(y_test, y_pred_deep),
    'Recall': recall_score(y_test, y_pred_deep),
    'F1 Score': f1_score(y_test, y_pred_deep)
}

# Streamlit UI
st.title("Cognitive Customer Insights")
review = st.text_area("Enter Product Review:", height=150)

def analyze_customer_review(review_text):
    sentiment_response = nlu.analyze(text=review_text, features=Features(sentiment=SentimentOptions())).get_result()
    sentiment = sentiment_response['sentiment']['document']['label']
    keyword_response = nlu.analyze(text=review_text, features=Features(keywords=KeywordsOptions())).get_result()
    keywords = [kw['text'] for kw in keyword_response['keywords']]
    intent_response = nlu.analyze(text=review_text, features=Features(entities=EntitiesOptions())).get_result()
    intent = [entity['type'] for entity in intent_response.get('entities', [])] or ['Unknown']
    return sentiment, keywords, intent

if 'sentiment_results' not in st.session_state:
    st.session_state.sentiment_results = []
    st.session_state.keyword_results = []
    st.session_state.intent_results = []
    st.session_state.reviews = []

if st.button("Analyze"):
    with st.spinner("Analyzing..."):
        sentiment, keywords, intent = analyze_customer_review(cleaner.clean_text(review))
        st.session_state.reviews.append(review)
        st.session_state.sentiment_results.append(sentiment)
        st.session_state.keyword_results.append(keywords)
        st.session_state.intent_results.append(intent)

    st.header("Analysis Results")
    tab1, tab2, tab3, tab4 = st.tabs(["Sentiment", "Keywords", "Intent", "Review History"])

    with tab1:
        st.subheader("Sentiment")
        st.markdown(f"**Sentiment:** {sentiment}")
        counts = pd.Series(st.session_state.sentiment_results).value_counts()
        fig, ax = plt.subplots()
        ax.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('pastel'))
        st.pyplot(fig)

    with tab2:
        st.subheader("Keywords")
        st.write(", ".join(keywords))
        wordcloud = WordCloud(width=800, height=400).generate(" ".join(keywords))
        fig_wc, ax_wc = plt.subplots()
        ax_wc.imshow(wordcloud, interpolation='bilinear')
        ax_wc.axis("off")
        st.pyplot(fig_wc)

    with tab3:
        st.subheader("Intents")
        st.write(", ".join(intent))

    with tab4:
        for i, text in enumerate(st.session_state.reviews):
            st.markdown(f"**Review {i+1}:** {text}")
            st.markdown(f"* Sentiment: {st.session_state.sentiment_results[i]}")
            st.markdown(f"* Keywords: {', '.join(st.session_state.keyword_results[i])}")
            st.markdown(f"* Intent: {', '.join(st.session_state.intent_results[i])}")
            st.write("---")

# Sidebar - Evaluation
st.sidebar.header("📊 Model Scores")
for model_name, scores in model_scores.items():
    st.sidebar.subheader(model_name)
    for metric, val in scores.items():
        st.sidebar.write(f"{metric}: {val:.2f}")

# Sidebar - Keyword Trends
st.sidebar.subheader("🔑 Top Keywords")
all_keywords = [kw for kws in st.session_state.keyword_results for kw in kws]
st.sidebar.write(pd.Series(all_keywords).value_counts().head(10))

# Sidebar - EDA
st.sidebar.subheader("📈 Review Lengths")
df['length'] = df['review'].apply(len)
fig_len, ax_len = plt.subplots()
sns.histplot(df['length'], bins=50, ax=ax_len)
ax_len.set_title("Review Length Distribution")
st.sidebar.pyplot(fig_len)


Collecting fsspec
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step




DeltaGenerator(_root_container=1, _parent=DeltaGenerator())

In [None]:
!pip install pyngrok --quiet
!ngrok config add-authtoken 2wqR4SuYXL9hdSuM8erxVRpqfge_5h41rjqkbcm3oMtei2cS3
from pyngrok import ngrok
# Create a tunnel to the Streamlit app
public_url = ngrok.connect(8501)
print("Public URL:", public_url)

# Save the Streamlit app to a file (app.py) -  Adjusted for clarity and session state
with open('app.py', 'w') as f:
  f.write('''
import streamlit as st
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from nltk.corpus import stopwords
import nltk
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
from datasets import load_dataset
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, SentimentOptions, KeywordsOptions, EntitiesOptions

nltk.download('stopwords')

# Custom text cleaner
class TextCleaner:
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))

    def clean_text(self, text):
        text = re.sub(r"[^a-zA-Z0-9\\s]", "", text)
        text = text.lower()
        return " ".join([word for word in text.split() if word not in self.stop_words])

cleaner = TextCleaner()

# IBM Watson NLU setup
apikey = 'hfgB9xdfJ5jzJplitfjGX4o25pVcImEusrpuZCc1VN38'
url = 'https://api.au-syd.natural-language-understanding.watson.cloud.ibm.com/instances/66849091-08c9-4313-b955-eddb62017287'
authenticator = IAMAuthenticator(apikey)
nlu = NaturalLanguageUnderstandingV1(version='2021-08-01', authenticator=authenticator)
nlu.set_service_url(url)

# Data Load & Preprocess
dataset = load_dataset("amazon_polarity", split='train[:1000]')
df = pd.DataFrame({'review': dataset['content'], 'label': dataset['label']})
df['review_clean'] = df['review'].apply(cleaner.clean_text)

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['review_clean'])
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Deep Learning': None  # Placeholder
}

# Deep Learning Model
deep_model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
deep_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
deep_model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
y_pred_deep = (deep_model.predict(X_test) > 0.5).astype(int).flatten()

# Training classical models
model_scores = {}
for name, model in models.items():
    if model is not None:
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        model_scores[name] = {
            'Accuracy': accuracy_score(y_test, preds),
            'Precision': precision_score(y_test, preds),
            'Recall': recall_score(y_test, preds),
            'F1 Score': f1_score(y_test, preds)
        }
        joblib.dump(model, f"{name.replace(' ', '_')}.joblib")


# Add Deep Learning scores
model_scores['Deep Learning'] = {
    'Accuracy': accuracy_score(y_test, y_pred_deep),
    'Precision': precision_score(y_test, y_pred_deep),
    'Recall': recall_score(y_test, y_pred_deep),
    'F1 Score': f1_score(y_test, y_pred_deep)
}

st.title("Cognitive Customer Insights")
review = st.text_area("Enter Product Review:", height=150)

def analyze_customer_review(review_text):
    sentiment_response = nlu.analyze(text=review_text, features=Features(sentiment=SentimentOptions())).get_result()
    sentiment = sentiment_response['sentiment']['document']['label']
    keyword_response = nlu.analyze(text=review_text, features=Features(keywords=KeywordsOptions())).get_result()
    keywords = [kw['text'] for kw in keyword_response['keywords']]
    intent_response = nlu.analyze(text=review_text, features=Features(entities=EntitiesOptions())).get_result()
    intent = [entity['type'] for entity in intent_response.get('entities', [])] or ['Unknown']
    return sentiment, keywords, intent

if 'sentiment_results' not in st.session_state:
    st.session_state.sentiment_results = []
    st.session_state.keyword_results = []
    st.session_state.intent_results = []
    st.session_state.reviews = []

if st.button("Analyze"):
    with st.spinner("Analyzing..."):
        sentiment, keywords, intent = analyze_customer_review(cleaner.clean_text(review))
        st.session_state.reviews.append(review)
        st.session_state.sentiment_results.append(sentiment)
        st.session_state.keyword_results.append(keywords)
        st.session_state.intent_results.append(intent)

    st.header("Analysis Results")
    tab1, tab2, tab3, tab4 = st.tabs(["Sentiment", "Keywords", "Intent", "Review History"])

    with tab1:
        st.subheader("Sentiment")
        st.markdown(f"**Sentiment:** {sentiment}")
        counts = pd.Series(st.session_state.sentiment_results).value_counts()
        fig, ax = plt.subplots()
        ax.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('pastel'))
        st.pyplot(fig)

    with tab2:
        st.subheader("Keywords")
        st.write(", ".join(keywords))
        wordcloud = WordCloud(width=800, height=400).generate(" ".join(keywords))
        fig_wc, ax_wc = plt.subplots()
        ax_wc.imshow(wordcloud, interpolation='bilinear')
        ax_wc.axis("off")
        st.pyplot(fig_wc)

    with tab3:
        st.subheader("Intents")
        st.write(", ".join(intent))

    with tab4:
        for i, text in enumerate(st.session_state.reviews):
            st.markdown(f"**Review {i+1}:** {text}")
            st.markdown(f"* Sentiment: {st.session_state.sentiment_results[i]}")
            st.markdown(f"* Keywords: {', '.join(st.session_state.keyword_results[i])}")
            st.markdown(f"* Intent: {', '.join(st.session_state.intent_results[i])}")
            st.write("---")

# Sidebar - Evaluation
st.sidebar.header("📊 Model Scores")
for model_name, scores in model_scores.items():
    st.sidebar.subheader(model_name)
    for metric, val in scores.items():
        st.sidebar.write(f"{metric}: {val:.2f}")

# Sidebar - Keyword Trends
st.sidebar.subheader("🔑 Top Keywords")
all_keywords = [kw for kws in st.session_state.keyword_results for kw in kws]
st.sidebar.write(pd.Series(all_keywords).value_counts().head(10))

# Sidebar - EDA
st.sidebar.subheader("📈 Review Lengths")
df['length'] = df['review'].apply(len)
fig_len, ax_len = plt.subplots()
sns.histplot(df['length'], bins=50, ax=ax_len)
ax_len.set_title("Review Length Distribution")
st.sidebar.pyplot(fig_len)
''')
# Ngrok setup for public URL
public_url = ngrok.connect(8501)
st.write("Public URL:", public_url)
ngrok.set_auth_token("2wqR4SuYXL9hdSuM8erxVRpqfge_5h41rjqkbcm3oMtei2cS3")

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml




Public URL: NgrokTunnel: "https://0bec-34-139-86-43.ngrok-free.app" -> "http://localhost:8501"


In [None]:
# Run the Streamlit app
!streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.139.86.43:8501[0m
[0m
2025-05-16 08:41:01.945590: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747384861.988197   22790 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747384862.001750   22790 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is 