# **Implementation of Deep Neural Network using Softmax classifier for Yoruba Language 'Yo'**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Importing Dependent libraries

In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 4.2 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 58.4 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 37.8 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.24.0


In [3]:
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import pandas as pd
from sklearn.metrics import classification_report
import os

Model used is Davlan/naija-twitter-sentiment-afriberta-large

In [4]:
MODEL = "Davlan/naija-twitter-sentiment-afriberta-large" 
tokenizer = AutoTokenizer.from_pretrained(MODEL)

model = AutoModelForSequenceClassification.from_pretrained(MODEL)

Downloading:   0%|          | 0.00/429 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.55M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/503M [00:00<?, ?B/s]

Evaluation

In [9]:
def predict_by_ntsal(text):
  encoded_input = tokenizer(text, return_tensors='pt')
  output = model(**encoded_input) 
  scores = output[0][0].detach().numpy()
  scores = softmax(scores)
  id2label = {0:"positive", 1:"neutral", 2:"negative"}
  ranking = np.argsort(scores)
  ranking = ranking[::-1]
  predicted_label = id2label[ranking[0]]
  return predicted_label

In [6]:
def evaluate(file_path):
  data_and_labels = pd.read_csv(file_path, sep='\t') 
  data = data_and_labels.text
  trues = data_and_labels.label 
  preds = []
  l = list(trues)
  for review in data: 
    predicted_label = predict_by_ntsal(review)
    preds.append(predicted_label)   
  return classification_report(l, preds)

In [13]:
%cd /content/drive/MyDrive
PROJECT_DIR = '/content/drive/MyDrive/afrisent-semeval-2023'
TASK = 'SubtaskA'
LANGUAGE_CODE = 'yo'
file_path = os.path.join(PROJECT_DIR, TASK, 'train', 'splitted-train-dev-test', LANGUAGE_CODE, 'test.tsv')
print('Test Split')
print(evaluate(file_path))

/content/drive/MyDrive
Test Split
              precision    recall  f1-score   support

    negative       0.93      0.90      0.92       354
     neutral       0.88      0.95      0.91       644
    positive       0.95      0.89      0.92       707

    accuracy                           0.92      1705
   macro avg       0.92      0.92      0.92      1705
weighted avg       0.92      0.92      0.92      1705



In [11]:
%cd /content/drive/MyDrive
PROJECT_DIR = '/content/drive/MyDrive/afrisent-semeval-2023'
TASK = 'SubtaskA'
LANGUAGE_CODE = 'yo'
file_path = os.path.join(PROJECT_DIR, TASK, 'train', 'splitted-train-dev-test', LANGUAGE_CODE, 'train.tsv')
print('Train Split')
print(evaluate(file_path))

/content/drive/MyDrive
Train Split
              precision    recall  f1-score   support

    negative       0.92      0.88      0.90      1304
     neutral       0.88      0.96      0.92      2150
    positive       0.96      0.91      0.93      2511

    accuracy                           0.92      5965
   macro avg       0.92      0.91      0.92      5965
weighted avg       0.92      0.92      0.92      5965



In [12]:
PROJECT_DIR = '/content/drive/MyDrive/afrisent-semeval-2023'
TASK = 'SubtaskA'
LANGUAGE_CODE = 'yo'
file_path = os.path.join(PROJECT_DIR, TASK, 'train', 'splitted-train-dev-test', LANGUAGE_CODE, 'dev.tsv')
print('Dev Split')
print(evaluate(file_path))

Dev Split
              precision    recall  f1-score   support

    negative       0.94      0.91      0.92       214
     neutral       0.89      0.95      0.92       314
    positive       0.96      0.92      0.94       324

    accuracy                           0.93       852
   macro avg       0.93      0.92      0.93       852
weighted avg       0.93      0.93      0.93       852

