<a href="https://colab.research.google.com/github/chineidu/NLP-Tutorial/blob/main/notebook/06_Transformers/07c_seq_classif_wf_polars.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install rich
!pip install polars[pyarrow]
!pip install transformers[torch]
!pip install torch datasets evaluate
!pip install seqeval

Collecting transformers[torch]
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m61.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers[torch])
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers[torch])
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m68.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers[torch])
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m70.6 MB/s

# Sentence Classification

In [2]:
# Built-in library
import re
import json
import logging
from typing import Any, Dict, List, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
import pandas as pd
import polars as pl
from rich import print
import torch

# Visualization
import matplotlib.pyplot as plt


# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
# %load_ext lab_black

# auto reload imports
# %load_ext autoreload
# %autoreload 2

### Prepare Data

```text
Create:
- Training data
- Validation data
- Test data

```

In [3]:
from google.colab import drive


drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
fp: str = "/content/drive/MyDrive/My doc/Deep Learning/Data/training_data_2.parquet"
N: int = 45_000

df_original: pl.DataFrame = pl.read_parquet(source=fp).rename({"tags": "label"})
df_original = df_original.sample(n=N, shuffle=True, seed=123)

df_original.head()

customer_id,nuban,date,description,amount,type,label
str,i64,str,str,f64,str,str
"""39396""",1,"""2021-11-16""","""MOBILE BANKING…",3100.0,"""Debit""","""spend.mobileSp…"
"""38727""",1,"""2022-04-12""","""POS/WEB PMT JE…",3220.0,"""Debit""","""spend.shopping…"
"""40065""",1,"""2022-08-27""","""Paystack/antam…",6200.0,"""Credit""","""behavioural.lo…"
"""40826""",1,"""2022-06-20""","""Amt includes C…",2621.5,"""Debit""","""spend.ussdTran…"
"""40187""",1,"""2022-03-19""","""POS/WEB PMT T …",10000.0,"""Debit""","""spend.shopping…"


In [11]:
# Drop columns
df_original = df_original.drop(["customer_id", "nuban"])

In [12]:
list(df_original["label"].unique())

['spend.atmSpend',
 'spend.spendOnTransfers',
 'behavioural.loanRepayments',
 'spend.posSpend',
 'income.salaryEarner',
 'income.gigWorker',
 'spend.shopping',
 'spend.gambling',
 'spend.webSpend',
 'spend.ussdTransactions',
 'spend.bills',
 'spend.cashWithdrawal',
 'spend.bankCharges',
 'spend.mobileSpend',
 'behavioural.loanAmount',
 'spend.airtime']

In [13]:
# Rename the labels
labels: dict[str, Any] = list(df_original["label"].unique())
id2label: dict[str, Any] = {idx: key for idx, key in enumerate(labels)}
label2id: dict[str, Any] = {key: idx for idx, key in id2label.items()}

print(f"label2id: {label2id}")

print(f"id2label: {id2label}")

In [14]:
# Create a copy!
df: pd.DataFrame = df_original.clone()

In [18]:
# Map the labels
df = df.with_columns(pl.col("label").map_dict(label2id))

df.head()

date,description,amount,type,label
str,str,f64,str,i64
"""2021-11-16""","""MOBILE BANKING…",3100.0,"""Debit""",10
"""2022-04-12""","""POS/WEB PMT JE…",3220.0,"""Debit""",14
"""2022-08-27""","""Paystack/antam…",6200.0,"""Credit""",13
"""2022-06-20""","""Amt includes C…",2621.5,"""Debit""",2
"""2022-03-19""","""POS/WEB PMT T …",10000.0,"""Debit""",14


### Split Data

```text
Split the data into:
- Training data
- Validation data
- Test data
```

In [19]:
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict


RANDOM_STATE: int = 123
TEST_SIZE: float = 0.08

X_t, X_test = train_test_split(df, test_size=TEST_SIZE, random_state=RANDOM_STATE)

X_t.shape, X_test.shape

((41400, 5), (3600, 5))

In [20]:
X_train, X_validation = train_test_split(
    X_t, test_size=TEST_SIZE, random_state=RANDOM_STATE
)
X_train.shape, X_validation.shape, X_test.shape

((38088, 5), (3312, 5), (3600, 5))

In [23]:
# Create dataset objects
# Convert to Pandas since there's no Dataset.from_polars() method yet
train_dataset: DatasetDict = Dataset.from_pandas(df=X_train.to_pandas())
validation_dataset: DatasetDict = Dataset.from_pandas(df=X_validation.to_pandas())
test_dataset: DatasetDict = Dataset.from_pandas(df=X_test.to_pandas())

train_dataset

Dataset({
    features: ['date', 'description', 'amount', 'type', 'label'],
    num_rows: 38088
})

In [24]:
# Create a dataset dict object
dataset: DatasetDict = DatasetDict(
    {"train": train_dataset, "validation": validation_dataset, "test": test_dataset}
)

dataset

DatasetDict({
    train: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label'],
        num_rows: 38088
    })
    validation: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label'],
        num_rows: 3312
    })
    test: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label'],
        num_rows: 3600
    })
})

In [25]:
print(dataset.get("train")[0])

print(dataset.get("validation")[0])

print(dataset.get("test")[0])

In [26]:
def lower_case_slower(example: dict[str, Any]) -> dict[str, Any]:
    """Convert the text to lowercase. It's a MUCH SLOWER version."""
    return {"description": example.get("description").lower()}


# For a faster implementation, use a list comprehension
def lower_case(example: dict[str, Any]) -> dict[str, Any]:
    """Convert the text to lowercase."""
    return {"description": [val.lower() for val in example.get("description")]}


def add_description_length(example: dict[str, Any]) -> dict[str, Any]:
    """This is used to add the length of the description to the dataset."""
    return {"description_length": [len(val) for val in example.get("description")]}

In [27]:
# Much slower
# dataset_1 = dataset.map(lower_case_slower)
# dataset_1

In [28]:
# Much faster!
dataset_1 = dataset.map(lower_case, batched=True)
dataset_1

Map:   0%|          | 0/38088 [00:00<?, ? examples/s]

Map:   0%|          | 0/3312 [00:00<?, ? examples/s]

Map:   0%|          | 0/3600 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label'],
        num_rows: 38088
    })
    validation: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label'],
        num_rows: 3312
    })
    test: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label'],
        num_rows: 3600
    })
})

In [29]:
dataset_1 = dataset_1.map(add_description_length, batched=True)
dataset_1

Map:   0%|          | 0/38088 [00:00<?, ? examples/s]

Map:   0%|          | 0/3312 [00:00<?, ? examples/s]

Map:   0%|          | 0/3600 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length'],
        num_rows: 38088
    })
    validation: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length'],
        num_rows: 3312
    })
    test: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length'],
        num_rows: 3600
    })
})

In [30]:
print(dataset_1.get("train")[0])

print(dataset_1.get("validation")[0])

print(dataset_1.get("test")[0])

In [31]:
# Drop descriptions that are less than THRESHOLD
THRESHOLD: int = 12
dataset_1 = dataset_1.filter(lambda x: x.get("description_length") >= THRESHOLD)
dataset_1

Filter:   0%|          | 0/38088 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3312 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3600 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length'],
        num_rows: 37659
    })
    validation: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length'],
        num_rows: 3281
    })
    test: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length'],
        num_rows: 3575
    })
})

In [32]:
print(dataset_1.get("validation")[0])

In [33]:
def concat_data(example: dict[str, Any]) -> dict[str, Any]:
    """This is used to create a new column by combining columns in the dataset."""

    result: dict[str, Any] = {
        "body": [
            f"{date} \n {description} \n {amount} \n {type_}"
            for (date, type_, amount, description) in zip(
                example.get("date"),
                example.get("description"),
                example.get("amount"),
                example.get("type"),


            )
        ]
    }

    return result

In [34]:
dataset_1 = dataset_1.map(concat_data, batched=True)
dataset_1

Map:   0%|          | 0/37659 [00:00<?, ? examples/s]

Map:   0%|          | 0/3281 [00:00<?, ? examples/s]

Map:   0%|          | 0/3575 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length', 'body'],
        num_rows: 37659
    })
    validation: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length', 'body'],
        num_rows: 3281
    })
    test: Dataset({
        features: ['date', 'description', 'amount', 'type', 'label', 'description_length', 'body'],
        num_rows: 3575
    })
})

In [35]:
print(dataset_1.get("train")[0])

### Tokenize Data

In [36]:
from transformers import AutoTokenizer


MODEL_CHECKPOINT: str = "bert-base-cased"
tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)


def tokenize_function(examples: dict[str, Any]) -> dict[str, Any]:
    """This is used to tokenize the text. It returns a dict containing the
    input_ids, token_type_ids and attention_mask."""
    return tokenizer(examples.get("body"), truncation=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [37]:
FEATURES: list[str] = [
    "date",
    "description",
    "amount",
    "type",
    "description_length",
    "body",
]

tokenized_dataset: DatasetDict = dataset_1.map(
    tokenize_function,
    batched=True,
    remove_columns=FEATURES,
)
tokenized_dataset

Map:   0%|          | 0/37659 [00:00<?, ? examples/s]

Map:   0%|          | 0/3281 [00:00<?, ? examples/s]

Map:   0%|          | 0/3575 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 37659
    })
    validation: Dataset({
        features: ['label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3281
    })
    test: Dataset({
        features: ['label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3575
    })
})

In [38]:
print(tokenized_dataset.get("train")[0])

In [39]:
# Number of unique labels
N_LABELS: int = len(tokenized_dataset.get("train").unique("label"))

N_LABELS

16

In [40]:
from transformers import DataCollatorWithPadding


data_collator: DataCollatorWithPadding = DataCollatorWithPadding(tokenizer=tokenizer)

In [41]:
# Apply the data collator
batch: torch.Tensor = data_collator([tokenized_dataset["train"][i] for i in range(2)])

print(batch)

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [42]:
# The 2nd idx has been padded with 0s
for row in batch.get("input_ids"):
    print(row)

In [43]:
from sklearn.metrics import recall_score, accuracy_score
from datasets import load_metric


metric = load_metric("seqeval")


def compute_metrics(eval_preds: tuple) -> dict[str, Any]:
    """This is used to calculate the evaluation metrics."""
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    accuracy = np.mean(predictions == labels)

    return {
        "accuracy": accuracy
    }

Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

In [44]:
labels: list[int] = tokenized_dataset.get("train").unique("label")
predictions: list[str] = labels.copy()

# Simulate prediction
predictions[0] = 2
predictions[2] = 3
predictions[4] = 6
predictions[5] = 0
print(metric.compute(predictions=[predictions], references=[labels]))

### Define Model

In [45]:
from transformers import AutoModelForSequenceClassification


# Now we can just pass them to the AutoModelForTokenClassification.from_pretrained() method,
# and they will be set in the model’s configuration and then properly saved and uploaded to the Hub:
model: AutoModelForSequenceClassification = AutoModelForSequenceClassification.from_pretrained(
    MODEL_CHECKPOINT, num_labels=N_LABELS, id2label=id2label, label2id=label2id
)

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [46]:
from huggingface_hub import notebook_login


# Login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [47]:
from transformers import TrainingArguments


OUTPUT_DIR: str = "bert-finetuned-sequence-classification"
STRATEGY: str = "epoch"
LEARNING_RATE: float = 2e-5
NUM_EPOCHS: int = 3
WEIGHT_DECAY: float = 0.01


args: TrainingArguments = TrainingArguments(
    OUTPUT_DIR,
    evaluation_strategy=STRATEGY,
    save_strategy=STRATEGY,
    learning_rate=LEARNING_RATE,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size= 16,
    weight_decay=WEIGHT_DECAY,
    push_to_hub=True,
)

In [48]:
from transformers import Trainer


# Train the model!
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_dataset.get("train"),
    eval_dataset=tokenized_dataset.get("validation"),
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4913,0.475989,0.809205
2,0.4004,0.418169,0.802499
3,0.3576,0.396024,0.826882


TrainOutput(global_step=7062, training_loss=0.49188894862258986, metrics={'train_runtime': 1296.528, 'train_samples_per_second': 87.138, 'train_steps_per_second': 5.447, 'total_flos': 3049683179721024.0, 'train_loss': 0.49188894862258986, 'epoch': 3.0})

In [49]:
trainer.push_to_hub(commit_message="Training complete")

'https://huggingface.co/chineidu/bert-finetuned-sequence-classification/tree/main/'

In [50]:
from transformers import pipeline


TASK: str = "text-classification"
MODEL_CHECKPOINT: str = "chineidu/bert-finetuned-sequence-classification"
sequence_classifier: pipeline = pipeline(task=TASK, model=MODEL_CHECKPOINT)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.71k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [52]:
text: str = "2023-10-10 TRF/Loan/FRM JOHN DOE STEVE TO (IBADAN) FRANK LAMPS INTEREST FREE MONATAN - 301	57553.75 Debit"
print(sequence_classifier(text))

In [53]:
text: str = "2023-02-25 feb/sal 125000.00	Credit"
print(sequence_classifier(text))

In [56]:
text: str ="2020-01-05 POS/WEB PMT BETWAY NG/1853277904 PSTK LANG	20000.00 Credit"
print(sequence_classifier(text))

In [60]:
df_original.to_pandas().head(10)

Unnamed: 0,date,description,amount,type,label
0,2021-11-16,MOBILE BANKING,3100.0,Debit,spend.mobileSpend
1,2022-04-12,POS/WEB PMT JENDOL SUPERSTORES LA NG,3220.0,Debit,spend.shopping
2,2022-08-27,Paystack/antamvayLoan Me,6200.0,Credit,behavioural.loanAmount
3,2022-06-20,Amt includes COMM & VAT/USSD/OMOTAYO DUMOYE ALANI,2621.5,Debit,spend.ussdTransactions
4,2022-03-19,POS/WEB PMT T OVERCOMER STORES 002043 2TEPBNG6 NG,10000.0,Debit,spend.shopping
5,2022-03-27,"ATM WDL @10352022 ODYSSEY PLAZA, ATM 2 ONIRU, LEKKI. NG REF:606104/000708805530",10000.0,Debit,spend.atmSpend
6,2022-07-01,CSH DEP 4 ISAAC @UMUAHIA BRANCH,110000.0,Credit,behavioural.loanAmount
7,2021-10-02,POS/WEB PURCHASE ROU,20000.0,Debit,spend.webSpend
8,2022-05-27,TRF/null/FRM DANIEL CHINWEUBA NDIMELE TO MBAMA THANKGOD N,100000.0,Credit,income.gigWorker
9,2022-03-14,Onyinyechukwu Iheoma Mbaegbulam/Transfer from to MBAEGBULAM I.OYINYE,78500.0,Credit,income.salaryEarner


In [63]:
print(id2label)

In [68]:
X_test = X_test.to_pandas()
X_test["label"] = X_test["label"].map(id2label)

X_test.head()

Unnamed: 0,date,description,amount,type,label
0,2022-04-28,BILLS/ BET9JA/2546527,10107.5,Debit,spend.gambling
1,2022-04-06,WONDERS SHAANUFUN/NIP TFR FROM WONDERS SHAANUFUN,1000000.0,Credit,income.gigWorker
2,2022-04-18,OKash/Transfer from OKash to EMMANUEL OLUFEMI OMIDIJI,42000.0,Credit,behavioural.loanAmount
3,2022-06-12,AIRTIME TOPUP FOR +2347038557928,50.0,Debit,spend.airtime
4,2022-08-22,TRF/null/FRM ABAYOMI EMMANUEL OLOWOOKERE TO BABATUNDE OLUWABUNMI,33750.0,Credit,income.salaryEarner


In [75]:
sample_df: pd.DataFrame = (
     X_test.loc[X_test["label"]
    .isin(["income.salaryEarner", "income.gigWorker", "behavioural.loanAmount"])]
    .sample(n=50, random_state=13)[["description", "label"]]
  )


sample_df.head()

Unnamed: 0,description,label
1729,OLUWATOSIN JOSHUA AKINFOLARIN/MOBILE/UNION Transfer from OLUWATOSIN JOSHUA AKINFOLARIN - NA,income.gigWorker
3143,NIP SYSTEM,income.gigWorker
652,"OWEN QUINCY EKPEOWEN, QUINCY EKPE/via GTWORLD",income.gigWorker
1492,INAGBOR BEATRICE OMOZE/MOB/ARUNA JAFARU/UTO/11406626551/Loan,behavioural.loanAmount
1728,CSH DEP 287 ABAAJI JOYCE @IKORODU 2 BRANCH,behavioural.loanAmount


In [None]:
text_samples: list[str] = (
                            X_test.loc[X_test["label"]
                            .isin(["income.salaryEarner", "income.gigWorker", "behavioural.loanAmount"])]
                            .sample(n=30, random_state=13)["description"]
                            .to_list()
                            )
text_samples

In [73]:
(
     X_test.loc[X_test["label"]
    .isin(["income.salaryEarner", "income.gigWorker", "behavioural.loanAmount"])]
    .sample(n=30, random_state=13)[["description", "label"]]
)

Unnamed: 0,description,label
1729,OLUWATOSIN JOSHUA AKINFOLARIN/MOBILE/UNION Transfer from OLUWATOSIN JOSHUA AKINFOLARIN - NA,income.gigWorker
3143,NIP SYSTEM,income.gigWorker
652,"OWEN QUINCY EKPEOWEN, QUINCY EKPE/via GTWORLD",income.gigWorker
1492,INAGBOR BEATRICE OMOZE/MOB/ARUNA JAFARU/UTO/11406626551/Loan,behavioural.loanAmount
1728,CSH DEP 287 ABAAJI JOYCE @IKORODU 2 BRANCH,behavioural.loanAmount
922,AZEEZ OLAREWAJU SAHEED/TRF FRM AZEEZ OLAREWAJU SAHEED,income.salaryEarner
2807,LIFE CARE CLINIC & MATERNITY HOME/Agent shola,income.salaryEarner
281,"CHQ DEP 000000018363 BO GROOMING PEOPLE YABA BRANCH, ONDO IFO FELIX ESTHER @ADEYEMI COLLEGE OF EDUCATION",behavioural.loanAmount
2953,ONYEMA IHUOMA ADLYNN/770 TRANSFER FROM ONYEMA IHUOMA ADLYNN **0,income.salaryEarner
2569,TRF/null/FRM ALI SAUKI PHONE ACCESSORIES ENTERPRISES TO MUSA WAKILI OSENI,income.salaryEarner


In [72]:
print(sequence_classifier(text_samples))

In [None]:
texts: list[str] = ["TRF/Loan payment by Oladosu Taiwo Mariam/FRM OLADOSU MARIAM TO MARIAM TAIWO OLADOSU- 305	96453.750000	D",
                    "Amt includes COMM & VAT/USSD/FATIMAH ABIOLA ABDULWAHEED	10021.500000	D",
                    "POS/WEB PMT BRANCH INT/1828853631 PSTK LANG	200000.000000	D",
                    "TRF/Tr/FRM ADEBAYO ADENEKAN TO OLUWASEGUN M CLEMENT - 032	2510.750000	D",
                    "ATM WDL @10441796 KARU BRANCH ABUJA, NIGERING REF:741746/235312741746	40000.000000	D",
                    ]
print(sequence_classifier(texts))