In [1]:
from transformers import pipeline


In [2]:
from huggingface_hub import HfApi, HfFolder

In [3]:
# Set your API token
HfFolder.save_token("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")

In [4]:
# Create an API client
api = HfApi()

In [5]:
# Specify the model details
model_id = "Testys/cnn_yor_ner"
local_dir = "./my_model"

# Download the model folder
api.hf_hub_download(repo_id=model_id, filename="pytorch_model.bin", local_dir=local_dir)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


pytorch_model.bin:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

'my_model/pytorch_model.bin'

In [6]:
# Specify the model details
model_id = "Testys/cnn_yor_ner"
local_dir = "./my_model"

# Download the model folder
api.hf_hub_download(repo_id=model_id, filename="config.json", local_dir=local_dir)

config.json:   0%|          | 0.00/146 [00:00<?, ?B/s]

'my_model/config.json'

In [7]:
from transformers import AutoTokenizer, AutoConfig

In [8]:
tokenizer = AutoTokenizer.from_pretrained("Testys/cnn_yor_ner")

tokenizer_config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModelForTokenClassification

class CNNForNER(nn.Module):
    def __init__(self, pretrained_model_name, num_classes, max_length=128):
        super(CNNForNER, self).__init__()
        self.transformer = AutoModelForTokenClassification.from_pretrained(pretrained_model_name)
        self.max_length = max_length

        # Get the number of labels from the pretrained model
        pretrained_num_labels = self.transformer.num_labels

        self.conv1 = nn.Conv1d(in_channels=pretrained_num_labels, out_channels=256, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3, padding=1)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(in_features=128, out_features=num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits  # Shape: (batch_size, sequence_length, pretrained_num_labels)

        # Apply CNN layers
        logits = logits.permute(0, 2, 1)  # Shape: (batch_size, pretrained_num_labels, sequence_length)
        conv1_out = F.relu(self.conv1(logits))
        conv2_out = F.relu(self.conv2(conv1_out))
        conv2_out = self.dropout(conv2_out)
        conv2_out = conv2_out.permute(0, 2, 1)  # Shape: (batch_size, sequence_length, 128)
        final_logits = self.fc(conv2_out)  # Shape: (batch_size, sequence_length, num_classes)
        return final_logits

In [10]:
import json

with open("./my_model/config.json", "r") as file:
    config_data = json.load(file)  # Parse JSON string into a dictionary

config_data["pretrained_model_name"]  # Now you can access elements using string keys

'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0'

In [11]:
config_data

{'model_type': 'CNNForYorubaNER',
 'num_classes': 9,
 'max_length': 128,
 'pretrained_model_name': 'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0'}

In [None]:
model = CNNForNER(pretrained_model_name=config_data["pretrained_model_name"], num_classes=config_data["num_classes"])

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

In [None]:
model.load_state_dict(torch.load('./my_model/pytorch_model.bin'))
model.eval()  # Set the model to evaluation mode

CNNForNER(
  (transformer): XLMRobertaForTokenClassification(
    (roberta): XLMRobertaModel(
      (embeddings): XLMRobertaEmbeddings(
        (word_embeddings): Embedding(250002, 1024, padding_idx=1)
        (position_embeddings): Embedding(514, 1024, padding_idx=1)
        (token_type_embeddings): Embedding(1, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): XLMRobertaEncoder(
        (layer): ModuleList(
          (0-23): 24 x XLMRobertaLayer(
            (attention): XLMRobertaAttention(
              (self): XLMRobertaSelfAttention(
                (query): Linear(in_features=1024, out_features=1024, bias=True)
                (key): Linear(in_features=1024, out_features=1024, bias=True)
                (value): Linear(in_features=1024, out_features=1024, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): XLMRobertaSel

In [None]:
sentence = "Ibo ni Ilu Ogun wa ni orile ede Nigeria"
tokenized_input = tokenizer(sentence, return_tensors="pt")


In [None]:
# Perform inference
model.eval()
with torch.no_grad():
    outputs = model(**tokenized_input)
    predictions = torch.argmax(outputs, axis=-1)[0]

    print(predictions)

tensor([8, 8, 5, 8, 3, 8, 3, 5, 8, 8, 8, 8, 8, 8, 3, 3])


In [12]:
# prompt: Can you write a script to download the necessary document from the huggingface model repo. Make it such that the model is downloaded just once and not repeatedly. This is to carry out inferencing using streamlit.

from transformers import pipeline
from huggingface_hub import HfApi, HfFolder
from transformers import AutoTokenizer, AutoConfig
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModelForTokenClassification
import json
import os

class CNNForNER(nn.Module):
    def __init__(self, pretrained_model_name, num_classes, max_length=128):
        super(CNNForNER, self).__init__()
        self.transformer = AutoModelForTokenClassification.from_pretrained(pretrained_model_name)
        self.max_length = max_length

        # Get the number of labels from the pretrained model
        pretrained_num_labels = self.transformer.num_labels

        self.conv1 = nn.Conv1d(in_channels=pretrained_num_labels, out_channels=256, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3, padding=1)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(in_features=128, out_features=num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits  # Shape: (batch_size, sequence_length, pretrained_num_labels)

        # Apply CNN layers
        logits = logits.permute(0, 2, 1)  # Shape: (batch_size, pretrained_num_labels, sequence_length)
        conv1_out = F.relu(self.conv1(logits))
        conv2_out = F.relu(self.conv2(conv1_out))
        conv2_out = self.dropout(conv2_out)
        conv2_out = conv2_out.permute(0, 2, 1)  # Shape: (batch_size, sequence_length, 128)
        final_logits = self.fc(conv2_out)  # Shape: (batch_size, sequence_length, num_classes)
        return final_logits


In [19]:
# Set your API token
HfFolder.save_token("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")
# Create an API client
api = HfApi()
# Specify the model details
model_id = "Testys/cnn_sent_yor"
local_dir = "./sent_model"

In [20]:
# Check if the model is already downloaded
if not os.path.exists(local_dir):
    os.makedirs(local_dir)  # Create the directory if it doesn't exist

    # Download the model files only if they don't exist
    if not os.path.exists(os.path.join(local_dir, "sent_pytorch_model.bin")):
        api.hf_hub_download(repo_id=model_id, filename="sent_pytorch_model.bin", local_dir=local_dir)
    if not os.path.exists(os.path.join(local_dir, "config.json")):
        api.hf_hub_download(repo_id=model_id, filename="config.json", local_dir=local_dir)
    if not os.path.exists(os.path.join(local_dir, "modelling_cnn_sent.py")):
        api.hf_hub_download(repo_id=model_id, filename="modelling_cnn_sent.py", local_dir=local_dir)

tokenizer = AutoTokenizer.from_pretrained("Testys/cnn_sent_yor")


In [21]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel

class SentimentCNNModel(nn.Module):
    def __init__(self, transformer_model_name, num_classes, cnn_out_channels=100, cnn_kernel_sizes=[3, 5, 7]):
        super(SentimentCNNModel, self).__init__()
        # Load pre-trained transformer model
        self.transformer = AutoModel.from_pretrained(transformer_model_name)

        # CNN layers with multiple kernel sizes
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=self.transformer.config.hidden_size,
                      out_channels=cnn_out_channels,
                      kernel_size=k)
            for k in cnn_kernel_sizes
        ])

        # Dropout layer
        self.dropout = nn.Dropout(0.5)

        # Fully connected layer
        self.fc = nn.Linear(len(cnn_kernel_sizes) * cnn_out_channels, num_classes)

    def forward(self, input_ids, attention_mask):
        # Get hidden states from the transformer model
        transformer_outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = transformer_outputs.last_hidden_state  # Shape: (batch_size, seq_len, hidden_size)

        # Transpose for CNN input: (batch_size, hidden_size, seq_len)
        hidden_states = hidden_states.transpose(1, 2)

        # Apply convolution and pooling
        conv_outputs = [torch.relu(conv(hidden_states)) for conv in self.convs]
        pooled_outputs = [torch.max(output, dim=2)[0] for output in conv_outputs]

        # Concatenate pooled outputs and apply dropout
        cat_output = torch.cat(pooled_outputs, dim=1)
        cat_output = self.dropout(cat_output)

        # Final classification
        logits = self.fc(cat_output)

        return logits

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel

class SentimentCNNModel(nn.Module):
    def __init__(self, transformer_model_name, num_classes, cnn_out_channels=100, cnn_kernel_sizes=[3, 5, 7]):
        super(SentimentCNNModel, self).__init__()
        # Load pre-trained transformer model
        self.transformer = AutoModel.from_pretrained(transformer_model_name)

        # CNN layers with multiple kernel sizes
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=self.transformer.config.hidden_size,
                      out_channels=cnn_out_channels,
                      kernel_size=k)
            for k in cnn_kernel_sizes
        ])

        # Dropout layer
        self.dropout = nn.Dropout(0.5)

        # Fully connected layer
        self.fc = nn.Linear(len(cnn_kernel_sizes) * cnn_out_channels, num_classes)

    def forward(self, input_ids, attention_mask):
        # Get hidden states from the transformer model
        transformer_outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = transformer_outputs.last_hidden_state  # Shape: (batch_size, seq_len, hidden_size)

        # Transpose for CNN input: (batch_size, hidden_size, seq_len)
        hidden_states = hidden_states.transpose(1, 2)

        # Apply convolution and pooling
        conv_outputs = [torch.relu(conv(hidden_states)) for conv in self.convs]
        pooled_outputs = [torch.max(output, dim=2)[0] for output in conv_outputs]

        # Concatenate pooled outputs and apply dropout
        cat_output = torch.cat(pooled_outputs, dim=1)
        cat_output = self.dropout(cat_output)

        # Final classification
        logits = self.fc(cat_output)

        return logits


In [26]:
with open("./sent_model/config.json", "r") as file:
    config_data = json.load(file)  # Parse JSON string into a dictionary

config_data["pretrained_model_name"]  # Now you can access elements using string keys
config_data
model = SentimentCNNModel(transformer_model_name=config_data["pretrained_model_name"], num_classes=config_data["num_classes"])
model.load_state_dict(torch.load('./sent_model/sent_pytorch_model.bin', map_location=torch.device('cpu')))
model.eval()  # Set the model to evaluation mode



Some weights of XLMRobertaModel were not initialized from the model checkpoint at Davlan/naija-twitter-sentiment-afriberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SentimentCNNModel(
  (transformer): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(70006, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-9): 10 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
    

In [28]:
sentence = "Mo ni Ife Iroyin yi "

inputs = tokenizer(sentence, return_tensors="pt")

model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs, dim=1)[0]

In [30]:
def decode_predictions(predictions, id2label):
    """
    Decode model's numerical predictions into descriptive labels using the id2label mapping.

    Args:
    predictions (torch.Tensor): The tensor containing indices of predicted labels.
    id2label (dict): A dictionary mapping label indices back to their string representations.

    Returns:
    list: A list of descriptive label names corresponding to the predicted indices.
    """
    # Convert predictions tensor to list if not already in list form
    if not isinstance(predictions, list):
        predictions = predictions.tolist()

    # Map each index to its corresponding label using id2label
    decoded_labels = id2label[predictions]

    return decoded_labels