# Python Code for Sentiment Analysis with HausaSentiLex Model

# Step 1: Install libraries
This will install the Transformers library and its dependencies.

In [1]:
# Install libraries
!pip3 install transformers
import csv
import pandas as pd
!pip3 install colorama
from colorama import Fore

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m45.5 MB/s[0m eta [36m0:00:00[0m
Col

# Step 2: creates a pipeline object for text classification

Connect to HausaBERTa model from huggingface


In [2]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pridiction = pipeline("text-classification", model="mangaphd/HausaSentiLex")

Downloading (…)lve/main/config.json:   0%|          | 0.00/648 [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/434M [00:00<?, ?B/s]

Some layers from the model checkpoint at mangaphd/HausaSentiLex were not used when initializing TFBertForSequenceClassification: ['dropout_113']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at mangaphd/HausaSentiLex.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


Downloading (…)okenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

# Step 3: Sentiment Analysis
Function for sentiment analysis, which is the process of identifying the emotional tone of a piece of text. The function takes a text as input and returns a label indicating whether the text is positive, negative, or neutral.

In [3]:
# Text Sentiment Analysis
def sentiment_analysis(text):
  rs = pridiction(text)
  df = pd.DataFrame(rs)
  senti=df['label'][0]
  score=df['score'][0]
  if senti == 'LABEL_0' and score >= 0.6:
    lb=-1
  elif senti == 'LABEL_1' and score >= 0.6:
    lb=1
  else:
    lb=0
  return lb

# Step 4: Test Model with single sentence
You can write any text to test the model

In [4]:
sentence = "Aikinka Yana kyau baba uban Abba"
sentiment_analysis(sentence)

1

# Step 5: Main (test Model with bulk tweets)

In [5]:
# Start by loding the Text tweets
with open('hausa_tweets.csv','r') as test_tweets_file:
  reader = csv.reader(test_tweets_file)
  data = [['tweet'],['polarity']]
  for row in reader:
    MyPolarity = ''
    tweet = row[0]
    MyPolarity = sentiment_analysis(tweet)
    data.append([row[0],MyPolarity])
    if(MyPolarity == 1):
      print(Fore.GREEN + f"{tweet}")
    elif(MyPolarity == -1):
      print(Fore.RED + f"{tweet}")
    else:
      print(tweet)

[32m﻿‍️‍️‍️‍️ an haramta shanta ne a lokacin covid ďin ne
[32m‍️ ku dai yan chana baza ku bar mutane su zauna lafia ba
[31m^^happy bday sir allah ya kara tsawon kwana
[32m✓establishment of frsc nde ndlea nalda ✓abujakano express way ✓creation of states ✓abuja as fct ✓establishment of copyright law etc general allah ya kara lafiya da nisan kwana aameen
[31ma a ka buge late comer kenan lallai angaisheka first in the history late comer yazo ya kasa komai u deserve a medal for this
[31ma a mutuniyar fa yau ansa hijab gashi har kinyi kyau dole sema munje india gun
[31ma bincikesu dakyau wlh duk wanda aka kama da almundahna da dukiyar mu a kulle
[31ma dai yi mu gani idan tusaa zata hura wuta
[31ma daidai wannan lokacin sai dai rokon allah saboda gwamnati bashine a gabanta baallah ya zaunadda kasanmu lafiya
[31ma dandaqe yan iska muggai wanda basu ganin annabi da izinin allah shegun banza yan kutumar uba yan cikin wutar saqar marasa mutunci its very painful to even think about it wl

# Step 6: Save annotated tweets in CSV file

In [6]:
# Save output
annotatedtweets = pd.DataFrame(data,columns=['tweets','polarity'])
annotatedtweets.to_csv('myannotatedtweets.csv')
