In [1]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.23.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [17]:
import joblib
import pandas as pd
import gradio as gr
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem import RSLPStemmer

nltk.download('stopwords')

import re
import unicodedata
import spacy

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
from google.colab import files


uploaded = files.upload()


Saving vectorizer.pkl to vectorizer.pkl


In [14]:
model = joblib.load('review_classifier_model.pkl')
vectorizer = joblib.load('vectorizer.pkl')

In [38]:
#Pré processamento

def preprocess_text(text):
  text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
  text = text.lower()

  #[^a-zA-Z0-9\s]
  text = re.sub(r'[^\w\s]', '', text)

  stop_words = set(stopwords.words('english'))
  text = ' '.join([word for word in text.split() if word not in stop_words])

  #Lemmatizing
  nlp = spacy.load('en_core_web_sm')
  doc = nlp(text)
  text = ' '.join([token.lemma_ for token in doc])

  return text


# Função de classificação
def classify_sentiment(review_text):

    processed_text = preprocess_text(review_text)

    text_tfidf = vectorizer.transform([processed_text])

    pred = model.predict_proba(text_tfidf)[0]

    return {'Positive': float(pred[1]),  #Probabilidade de ser positivo
            'Negative': float(pred[0])}  #Probabilidade de ser negativo

# Interface Gradio
iface = gr.Interface(
    fn=classify_sentiment,
    inputs=gr.Textbox(label="Type your review", lines=3),
    outputs="label",
    title="Sentiment Analysis on a product/food/movie review",
    description="Type a review in english, and the model will classify as positive or negative.",
    examples=[
        ["This movie was amazing! The acting was superb."],
        ["Terrible taste and bad presentation. Would not recommend."],
        ["It was okay, nothing special."]
    ],
    theme="compact",
    #interpretation="default"
)

iface.launch(share=True)


Sorry, we can't find the page you are looking for.


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ca0e14a88498f31d44.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Teste fora do gradle

In [37]:
test_text = "One of the most horrible movies i watched"
processed = preprocess_text(test_text)
vectorized = vectorizer.transform([processed])
print(model.predict_proba(vectorized))

[[0.66536327 0.33463673]]
