## Data Preparation For Sentence Classification

In [1]:
# Built-in library
import re
import json
import logging
from typing import Any, Dict, List, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
import pandas as pd
from rich import print
import torch

# Visualization
import matplotlib.pyplot as plt


# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def set_up_logger(delim: str = "::") -> Any:
    """This is used to create a basic logger."""

    format_ = f"[%(levelname)s] {delim} %(asctime)s {delim} %(message)s"
    logging.basicConfig(level=logging.INFO, format=format_)
    logger = logging.getLogger(__name__)
    return logger


# Global variable
logger = set_up_logger()


def load_data(*, filename: str, sep: str = ",") -> pd.DataFrame:
    """This is used to load the data.

    NB: Supported formats are 'csv' and 'parquet'.

    Params:
    -------
        filename (str): The filepath.
        sep (str, default=","): The separator. e.g ',', '\t', etc

    Returns:
    --------
        data (pd.DataFrame): The loaded dataframe.
    """
    data = (
        pd.read_csv(filename, sep=sep)
        if filename.split(".")[-1] == "csv"
        else pd.read_parquet(filename)
    )
    logger.info(f"Shape of data: {data.shape}\n")
    return data

### Prepare Data

```text
Create:
- Training data
- Validation data
- Test data

Labels
------
salary
gambling
loan
airtime
ussdTransactions
flightRisk
savingsAndInvestments
entertainment
spend
  - posSpend
  - atmSpend
  - mobileSpend
  - webSpend
```

In [3]:
fp: str = "../../data/trans_TAGS_24.parquet"
N: int = 500_000
df: pd.DataFrame = (
    load_data(filename=fp).sample(n=N, random_state=123).reset_index(drop=True)
)

df.head()

[INFO] :: 2023-10-27 23:41:44,243 :: Shape of data: (2355234, 7)



Unnamed: 0,customer_id,nuban,date,description,amount,type,tags
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]"
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]"
2,56251,1,2022-01-14,FGN ELECTRONIC MONEY TRANSFER LEVY,100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.spendOnTransfers, transactionpattern.transactionLessThan10000, transactionpattern.mostFrequentBalanceRange]"
3,55465,1,2022-02-19,Quantum USSD,20026.880859,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.ussdTransactions, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]"
4,56980,1,2022-05-28,POS/WEB PMT BETWAY NG/1853277904 PSTK LANG,10000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.gambling, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentTransactionRange, transactionpattern.mostFrequentBalanceRange]"


In [4]:
def extract_tags(data: pd.DataFrame, pattern: str) -> pd.Series:
    data = data.copy()

    result: np.ndarray = (
        data["tags"].astype("str").str.extract(pat=pattern, flags=re.I).to_numpy()
    ).flatten()
    result: pd.Series = pd.Series(data=result).astype(str)
    return result

In [5]:
df_1: pd.DataFrame = df.copy()

KEYWORD: str = "salary"
PATTERN: str = r"(salary\w{0,10})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head()

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",
2,56251,1,2022-01-14,FGN ELECTRONIC MONEY TRANSFER LEVY,100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.spendOnTransfers, transactionpattern.transactionLessThan10000, transactionpattern.mostFrequentBalanceRange]",
3,55465,1,2022-02-19,Quantum USSD,20026.880859,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.ussdTransactions, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",
4,56980,1,2022-05-28,POS/WEB PMT BETWAY NG/1853277904 PSTK LANG,10000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.gambling, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentTransactionRange, transactionpattern.mostFrequentBalanceRange]",


In [6]:
KEYWORD: str = "loan"
PATTERN: str = r"(loan\w{0,10})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,


In [7]:
KEYWORD: str = "gambling"
PATTERN: str = r"(gambling\w{0,5})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,


In [8]:
KEYWORD: str = "airtime"
PATTERN: str = r"(\w{0,5}airtime\w{0,5})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,


In [9]:
KEYWORD: str = "ussdTransactions"
PATTERN: str = r"(ussd)"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,


In [10]:
KEYWORD: str = "flightRisk"
PATTERN: str = r"(\w{0,10}flightRisk\w{0,10})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label,flightRisk_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,


In [11]:
KEYWORD: str = "spendOnTransfers"
PATTERN: str = r"(\w{0,10}spendOnTransfers\w{0,10})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label,flightRisk_label,spendOnTransfers_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,


In [12]:
KEYWORD: str = "entertainment"
PATTERN: str = r"(entertainment\w{0,10})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label,flightRisk_label,spendOnTransfers_label,entertainment_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,


In [13]:
KEYWORD: str = "spend"
PATTERN: str = r"(\w{0,40}spend)"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label,flightRisk_label,spendOnTransfers_label,entertainment_label,spend_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend


In [14]:
KEYWORD: str = "bills"
PATTERN: str = r"(\w{0,10}bills\w{0,10})"
df_1[f"{KEYWORD}_label"] = extract_tags(data=df_1, pattern=PATTERN)

df_1.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label,flightRisk_label,spendOnTransfers_label,entertainment_label,spend_label,bills_label
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend,
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend,


In [15]:
def extract_label(tags: list[str]) -> list[str]:
    """This is used to extract the tags from the list of tags."""
    result: list[str] = [val for val in tags if val != "nan"]
    if len(result) == 0:
        result = ["other"]
    return result

In [16]:
cols: list[str] = list(df_1.columns)[-10:]
df_1["list_labels"] = df_1.apply(lambda x: list(x[cols]), axis=1)

In [17]:
df_2: pd.DataFrame = df_1.copy()

df_2.head()

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label,flightRisk_label,spendOnTransfers_label,entertainment_label,spend_label,bills_label,list_labels
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend,,"[nan, nan, nan, nan, nan, nan, nan, nan, spend, nan]"
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend,,"[nan, nan, nan, nan, nan, nan, nan, nan, spend, nan]"
2,56251,1,2022-01-14,FGN ELECTRONIC MONEY TRANSFER LEVY,100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.spendOnTransfers, transactionpattern.transactionLessThan10000, transactionpattern.mostFrequentBalanceRange]",,,,,,,spendOnTransfers,,spend,,"[nan, nan, nan, nan, nan, nan, spendOnTransfers, nan, spend, nan]"
3,55465,1,2022-02-19,Quantum USSD,20026.880859,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.ussdTransactions, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,ussd,,,,spend,,"[nan, nan, nan, nan, ussd, nan, nan, nan, spend, nan]"
4,56980,1,2022-05-28,POS/WEB PMT BETWAY NG/1853277904 PSTK LANG,10000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.gambling, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentTransactionRange, transactionpattern.mostFrequentBalanceRange]",,,gambling,,,,,,spend,,"[nan, nan, gambling, nan, nan, nan, nan, nan, spend, nan]"


In [18]:
df_2["cleaned_labels"] = df_2["list_labels"].apply(extract_label)

df_2.head(2)

Unnamed: 0,customer_id,nuban,date,description,amount,type,tags,salary_label,loan_label,gambling_label,airtime_label,ussdTransactions_label,flightRisk_label,spendOnTransfers_label,entertainment_label,spend_label,bills_label,list_labels,cleaned_labels
0,55844,1,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend,,"[nan, nan, nan, nan, nan, nan, nan, nan, spend, nan]",[spend]
1,56822,1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,"[balance, behavioural.accountSweep, behavioural.inflowOutflowRate, debit, spend.posSpend, spend.webSpend, transactionpattern.transactionBetween10000And100000, transactionpattern.mostFrequentBalanceRange]",,,,,,,,,spend,,"[nan, nan, nan, nan, nan, nan, nan, nan, spend, nan]",[spend]


In [19]:
IMP_COLS: list[str] = ["date", "description", "amount", "type", "cleaned_labels"]
df_2 = df_2[IMP_COLS]

df_2.head()

Unnamed: 0,date,description,amount,type,cleaned_labels
0,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,[spend]
1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,[spend]
2,2022-01-14,FGN ELECTRONIC MONEY TRANSFER LEVY,100.0,D,"[spendOnTransfers, spend]"
3,2022-02-19,Quantum USSD,20026.880859,D,"[ussd, spend]"
4,2022-05-28,POS/WEB PMT BETWAY NG/1853277904 PSTK LANG,10000.0,D,"[gambling, spend]"


In [20]:
def clean_salary(tags: list[str], type: str) -> list[str]:
    """This is used to clean the salary tags."""
    if type == "D":
        result: list[str] = [val for val in tags if val != "salaryEarner"]
        if len(result) == 0:
            result = ["other"]
    else:
        result = tags
    return result

In [21]:
df_2["cleaned_labels"] = df_2.apply(
    lambda x: clean_salary(tags=x["cleaned_labels"], type=x["type"]), axis=1
)

In [22]:
df_2["cleaned_labels"] = df_2["cleaned_labels"].str[0]

df_2.head()

Unnamed: 0,date,description,amount,type,cleaned_labels
0,2022-05-29,POS/WEB PMT T MODERNCOMMUNICATI 000104 2070849Y NG,20000.0,D,spend
1,2022-03-19,POS/WEB PMT NULL LA NG,13100.0,D,spend
2,2022-01-14,FGN ELECTRONIC MONEY TRANSFER LEVY,100.0,D,spendOnTransfers
3,2022-02-19,Quantum USSD,20026.880859,D,ussd
4,2022-05-28,POS/WEB PMT BETWAY NG/1853277904 PSTK LANG,10000.0,D,gambling


In [23]:
df_2["cleaned_labels"] = df_2["cleaned_labels"].apply(
    lambda x: "posOrWebSpend" if "pos" in x else ("posOrWebSpend" if "web" in x else x)
)

In [24]:
df_2.sample(n=20, random_state=8)

Unnamed: 0,date,description,amount,type,cleaned_labels
428765,2022-12-17,Amt includes COMM & VAT/USSD/ANDRIYA YUSUF D,500.0,C,other
354568,2022-02-20,TRF//FRM ABASS KOLAPO TO ADISA ABASS AKOREDE- 011,15026.879883,D,spendOnTransfers
353541,2022-09-17,AGU CHIKELUBA NEWMAN/MOB/CHIJIOKE GODWIN/UTO/12625657631/Eedc,20000.0,C,other
129905,2022-09-07,T933984 2TAB11TS LA LANG,160000.0,D,other
83894,2022-06-09,TRF/Food/FRM QUEEN OKAKA TO OHUE MARY IKAEHOTA- 058,1510.75,D,spendOnTransfers
166168,2022-08-06,TRF//FRM PRINCE UDE OGBONNAYA TO A.K.AUSCA INVESTMENT COY.LTD- 011,2010.75,D,spendOnTransfers
156584,2022-06-28,TRF//FRM ANYIKWA IFEANYI TO FAVOUR ADINDU VIVIAN - 033,2010.75,D,spendOnTransfers
115509,2022-06-09,FGN ELECTRONIC MONEY TRANSFER LEVY,50.0,D,spendOnTransfers
86146,2022-09-12,TRF//FRM ADEDEJI CHRISTOPHER TO ABOSEDE OLUWATOSIN DAHUNSI- 076,2010.75,D,spendOnTransfers
491511,2022-05-14,TRF/Loan/FRM HAMZAT ABIODUN MUBAR TO (IBADAN) AL-AMANAH INTEREST FREE MONATAN - 301,57553.75,D,loanRepayments


In [25]:
df_2["cleaned_labels"].value_counts(normalize=True)

other               0.410990
spendOnTransfers    0.222586
spend               0.144992
airtime             0.110922
ussd                0.067908
loanRepayments      0.013126
salaryEarner        0.010738
gambling            0.010120
loanAmount          0.005898
flightRisk          0.002578
entertainment       0.000142
Name: cleaned_labels, dtype: float64

In [26]:
df_2.to_parquet("./my_data/trans_data.parquet", index=False)