In [1]:
!pip3 install -q transformers gradio
!pip install huggingface_hub transformers datasets gradio

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.1/311.1 kB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m105.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m90.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.9/92.9 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.7/302.7 kB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m9.8 MB/s

In [2]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
from transformers import RobertaTokenizer, RobertaConfig, RobertaForSequenceClassification
import numpy as np
from scipy.special import softmax

In [3]:
model_path = f"KABANDA18/FineTuning-Roberta-base_Model"

tokenizer = RobertaTokenizer.from_pretrained(model_path)
config = RobertaConfig.from_pretrained(model_path)
model = RobertaForSequenceClassification.from_pretrained(model_path)

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/999k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/958 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/888 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

In [4]:
from transformers import pipeline
classifier = pipeline("text-classification", model= f"KABANDA18/FineTuning-Roberta-base_Model")

In [5]:
# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

In [6]:
# Input preprocessing
text = "This covid came with its own agenda"
text = preprocess(text)

In [7]:
# PyTorch-based models
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

In [8]:
encoded_input

{'input_ids': tensor([[    0,   713, 47268,   808,   376,    19,    63,   308,  4026,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [9]:
output

SequenceClassifierOutput(loss=None, logits=tensor([[-4.5025,  3.6547, -3.9030]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [10]:
# score without softmax function

output[0][0].detach().numpy()

array([-4.502453 ,  3.6546822, -3.9029648], dtype=float32)

In [11]:
#display score with softmax function

scores

array([2.8645087e-04, 9.9919182e-01, 5.2168046e-04], dtype=float32)

In [12]:
config.id2label = {0: 'NEGATIVE', 1: 'NEUTRAL', 2: 'POSITIVE'}

In [13]:
# Print labels and scores
ranking = np.argsort(scores)
ranking = ranking[::-1]
print (f"Classified text:{text}")
for i in range(scores.shape[0]):
    l = config.id2label[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

Classified text:This covid came with its own agenda
1) NEUTRAL 0.9992
2) POSITIVE 0.0005
3) NEGATIVE 0.0003


# **GRADIO APP**

In [14]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import gradio as gr
import torch

# Initialize the tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained("KABANDA18/FineTuning-Roberta-base_Model")
model = RobertaForSequenceClassification.from_pretrained("KABANDA18/FineTuning-Roberta-base_Model")

def sentiment_analysis(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

    # Forward pass through the model
    with torch.no_grad():
        output = model(**inputs)

    # Extract the predicted probabilities
    scores = torch.nn.functional.softmax(output.logits, dim=1).squeeze().tolist()

    # Define the sentiment labels
    labels = ["Negative", "Neutral", "Positive"]

    # Create a dictionary of sentiment scores
    scores_dict = {label: score for label, score in zip(labels, scores)}

    return scores_dict

demo = gr.Interface(
    fn=sentiment_analysis,
    inputs=gr.Textbox(placeholder="Write/Type your tweet here"),
    outputs="text",
    #intrepretation="default",
    examples=[
        ["Covid Vaccine are Health"],
        ["There's a global pandemic ongoing called Covid"],
        ["Covid is dangerous"],
        ["Covid is affecting Businesses badly"],
        ["This so-called Covid is not going to block our shine. Come to The beach this weekend! It's going to be lit"],
    ],
    title="Covid Tweets Sentiment Analysis App",
    description="This Application is the interface to Our Sentiment Analysis Model fine-tuned from a Roberta-base model.",
)

demo.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://8d66df171315347f51.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


