In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# A suite of libraries and programs for symbolic and statistical natural language processing for English
# It supports classification, tokenization, stemming, tagging, parsing, and semantic reasoning functionalities

import nltk
import shutil

# Delete old NLTK data (if any)
nltk_data_path = "/root/nltk_data"
shutil.rmtree(nltk_data_path, ignore_errors=True)

# Redownload NLTK packages
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
df = pd.read_csv('/content/drive/MyDrive/data/Amazon_reviews.csv', on_bad_lines='skip')
print(df.shape)
df = df.head(500)
print(df.shape)


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/data/Amazon_reviews.csv'

In [None]:
df.head()

In [None]:
# Quick EDA - Print review score counts
print("\nCount of Reviews by Stars:")
print(df['Score'].value_counts().sort_index())

# Plot review score distribution
ax = df['Score'].value_counts().sort_index().plot(kind='bar', title='Count of Reviews by Stars', figsize=(10, 5))
ax.set_xlabel('Review Stars')
plt.show()

In [None]:
# Basic NLTK
example = df['Text'][50]
print(example)

In [None]:
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()

In [None]:
sia.polarity_scores('I hate FPT')

In [None]:
sia.polarity_scores(example)

In [None]:
# Run the polarity score (VADER) on the entire dataset
res = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['Text']
    myid = row['Id']
    res[myid] = sia.polarity_scores(text)

vaders = pd.DataFrame(res).T
vaders = vaders.reset_index().rename(columns={'index': 'Id'})
vaders = vaders.merge(df, how='left')

In [None]:
# Now we have sentiment score and metadata
vaders.head()

In [None]:
# Plot VADER results
ax = sns.barplot(data=vaders, x='Score', y='compound')
ax.set_title('Compound Score by Amazon Star Review')
plt.show()

# Print average sentiment per score
print("\nAverage Sentiment Scores by Review Star:")
print(vaders.groupby('Score')[['compound', 'pos', 'neu', 'neg']].mean())

In [None]:
# Detailed sentiment plots
fig, axs = plt.subplots(1, 3, figsize=(12, 3))
sns.barplot(data=vaders, x='Score', y='pos', ax=axs[0])
sns.barplot(data=vaders, x='Score', y='neu', ax=axs[1])
sns.barplot(data=vaders, x='Score', y='neg', ax=axs[2])
axs[0].set_title('Positive')
axs[1].set_title('Neutral')
axs[2].set_title('Negative')
plt.tight_layout()
plt.show()

# Print numerical results
print("\nSentiment Scores by Review Star:")
print(vaders.groupby('Score')[['pos', 'neu', 'neg']].mean())

In [None]:
# Example Sentiment Score
example = 'I am so happy!'
print("\nExample Sentence:", example)
print("Sentiment Analysis:", sia.polarity_scores(example))

In [None]:
!pip install flask nltk pyngrok

from flask import Flask, render_template, request
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from pyngrok import ngrok
import threading

from pyngrok import ngrok
ngrok.kill()  # This terminates all active tunnels
# Set your ngrok auth token
ngrok.set_auth_token("2nkl6K2j1sBKduiqbI8wK12OZ7b_5kMfodVD2ugziLMNL1q66")
public_url = ngrok.connect(5000)
print(" * ngrok tunnel available at:", public_url)

# Initialize Flask app
app = Flask(__name__, static_folder="static")

# Download VADER lexicon and initialize the sentiment analyzer
nltk.download('vader_lexicon', quiet=True)
sia = SentimentIntensityAnalyzer()

# Favicon route to avoid 404 errors
@app.route('/favicon.ico')
def favicon():
    return '', 204

# Home route for sentiment analysis
@app.route('/', methods=['GET', 'POST'])
def home():
    sentiment_result = None
    input_text = ""

    if request.method == 'POST':
        # Check if the request is JSON (for real-time analysis via JS)
        if request.is_json:
            data = request.get_json()
            input_text = data.get('text_input', '').strip()
            if input_text:
                if len(input_text) > 500:
                    return jsonify({"error": "Văn bản quá dài! Hãy nhập ít hơn 500 ký tự."})
                else:
                    sentiment_scores = sia.polarity_scores(input_text)
                    result = {
                        'text': input_text,
                        'positive': round(sentiment_scores['pos'] * 100, 2),
                        'neutral': round(sentiment_scores['neu'] * 100, 2),
                        'negative': round(sentiment_scores['neg'] * 100, 2),
                        'compound': round(sentiment_scores['compound'], 4)
                    }
                    return jsonify(result)
            else:
                return jsonify({"error": "No text input provided."})
        else:
            # For non-JSON POST requests (fallback to normal form submission)
            input_text = request.form.get('text_input', '').strip()
            if input_text:
                if len(input_text) > 500:
                    sentiment_result = {"error": "Văn bản quá dài! Hãy nhập ít hơn 500 ký tự."}
                else:
                    sentiment_scores = sia.polarity_scores(input_text)
                    sentiment_result = {
                        'text': input_text,
                        'positive': round(sentiment_scores['pos'] * 100, 2),
                        'neutral': round(sentiment_scores['neu'] * 100, 2),
                        'negative': round(sentiment_scores['neg'] * 100, 2),
                        'compound': round(sentiment_scores['compound'], 4)
                    }

    return render_template('index.html', result=sentiment_result, input_text=input_text)

# Run Flask in a separate thread
def run_flask():
    app.run(host="0.0.0.0", port=8000, debug=False)

threading.Thread(target=run_flask, daemon=True).start()

# Kill any existing ngrok tunnels (if any) and open a new one on port 8000
ngrok.kill()
ngrok.kill()
public_url = ngrok.connect(8000).public_url
print(f"🌍 Truy cập Flask tại: {public_url}")

 * ngrok tunnel available at: NgrokTunnel: "https://0db2-34-148-36-173.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


Address already in use
Port 8000 is in use by another program. Either identify and stop that program, or start the server with a different port.


🌍 Truy cập Flask tại: https://c4c4-34-148-36-173.ngrok-free.app
