<a href="https://colab.research.google.com/github/ashishlamsal/sentiment-analysis/blob/main/notebooks/inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install datasets evaluate transformers[sentencepiece]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.8.0-py3-none-any.whl (452 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m452.9/452.9 KB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers[sentencepiece]
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting huggingface-hub<1.0.0,>=0.2.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.4/182.4 KB[0m [31m20.1 MB/s

In [None]:
# load pretrained model
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification

In [None]:
id2label= {
    0: "negative",
    1: "neutral",
    2: "postive"
}

label2id = {
    "negative": 0,
    "neutral": 1,
    "postive": 2
}

saved_checkpoint = r'/content/drive/MyDrive/fusemachines/sentiment-analysis/output/4/'

config = AutoConfig.from_pretrained(saved_checkpoint, label2id=label2id, id2label=id2label)
tokenizer = AutoTokenizer.from_pretrained(saved_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(saved_checkpoint, num_labels=3)

In [None]:
config

BertConfig {
  "_name_or_path": "/content/drive/MyDrive/fusemachines/sentiment-analysis/output/4/",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": 0.5,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "negative",
    "1": "neutral",
    "2": "postive"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "negative": 0,
    "neutral": 1,
    "postive": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 197285
}

In [None]:
sequences = ['अमेरिकामा कोभिड बाट एकै दिन','कोरोना भाइरस भारत माघ गतेदेखि कोभिड विरुद्ध राष्ट्रव्यापी खोप अभियान']

tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
output = model(**tokens)
output

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.7702,  0.6726, -1.2691],
        [-1.9282, -1.1573,  2.6867]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
import torch

predictions = torch.nn.functional.softmax(output.logits, dim=-1)
print(predictions)

tensor([[0.4909, 0.4452, 0.0639],
        [0.0096, 0.0208, 0.9696]], grad_fn=<SoftmaxBackward0>)


In [None]:
import torch

def get_prediction(sentence, tokenize=tokenizer, model=model):
    # tokenize the input
    tokens = tokenizer(sentence, padding=True, truncation=True, return_tensors='pt')
    
    # ensure model and inputs are on the same device (GPU/CPU)
    tokens = {name: tensor.cpu() for name, tensor in tokens.items()}
    model = model.cpu()
    
    # get prediction - 10 classes "probabilities" (not really true because they still need to be normalized)
    with torch.no_grad():
        predictions = model(**tokens)[0].cpu().numpy()
    
    # get the top prediction class and convert it to its associated label
    top_prediction = predictions.argmax().item()
    
    return config.id2label[top_prediction] if top_prediction in [0,1,2] else 'no prediction'

In [None]:
get_prediction('कोरोना भाइरस भारत माघ गतेदेखि कोभिड विरुद्ध राष्ट्रव्यापी खोप अभियान')

'postive'

In [None]:
get_prediction('नेपालको संचार अमेरिकामा कोभिड को नयाँ रेकर्ड एकै दिन हजारभन्दा बढीको मृत्यु')

'negative'

In [None]:
get_prediction('भारतबाट घर फर्किने क्रम बढेसंँगै नाकामा संक्रमितको संख्यापनि बढ्न थालेको छ ।')

'negative'

In [None]:
get_prediction('जनक कोभिड बाट निको भएछन ')

'postive'

In [None]:
get_prediction('मेरो नाम बिचरा कोभिड हो')

'neutral'

In [None]:
get_prediction('स्याङ्जा-२ मा कांग्रेसका धनराज गुरूङ साढे ५ हजारभन्दा बढी मतले विजयी')

'postive'

In [None]:
get_prediction('त्यो मान्छे मरेको छैन ')
# get_prediction()

'postive'

In [None]:
get_prediction('२६ वर्षपछि चितवनमा ज्याम्बोरी')

'negative'

In [None]:
get_prediction(" भारतबाट घर फर्किने क्रम बढेसंँगै नाकामा संक्रमितको संख्यापनि घट्न थालेको छ")

'negative'