In [2]:
import numpy as np
import snorkel
from snorkel.labeling import labeling_function
from snorkel.labeling import LabelingFunction
from snorkel.labeling import PandasLFApplier
from snorkel.labeling import LFAnalysis
import re
from snorkel.labeling.model import MajorityLabelVoter
import json
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [22]:
%load_ext autoreload
%autoreload 2
import label_improve as li

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
# Loading the data 
dataset_name = "banking77"
idx_to_label = json.load(open(f"../weak_datasets/{dataset_name}/label.json"))
label_to_idx = {l:i for i,l in idx_to_label.items()}
valid_df = li.data_to_df(json.load(open(f"../weak_datasets/{dataset_name}/valid.json", "r")))
train_df = li.data_to_df(json.load(open(f"../weak_datasets/{dataset_name}/train.json", "r")))
test_df = li.data_to_df(json.load(open(f"../weak_datasets/{dataset_name}/test.json", "r")))

# Sample a dev set to help seed ideas for LFs
dev_df = train_df.sample(250, random_state=123)

In [24]:
# Get rid of the "weak_labels" column
dev_df = dev_df.drop(columns=["weak_labels"])
dev_df.head()


Unnamed: 0,text,label
1603,How to unblock my PIN?,49
3159,I need to figure out what these charges are on...,16
2213,"I got some cash at an ATM earlier, but now app...",75
498,I think I was charged extra on my payment on l...,17
1038,Can I auto top if I'm low on funds?,4


In [25]:
# Save the dataset to csv
dev_df.to_csv(f"./dev.csv", index=False)

In [26]:
keywords = {
    "activate_my_card": ["activate", "card", "activation"],
    "age_limit": ["age", "limit", "restriction", "requirement"],
    "apple_pay_or_google_pay": ["apple pay", "google pay", "mobile payment", "wallet", "pay"],
    "atm_support": ["atm", "support", "cash withdrawal", "machine"],
    "automatic_top_up": ["automatic", "top up", "auto", "funds", "recharge"],
    "balance_not_updated_after_bank_transfer": ["balance", "updated", "bank transfer", "not", "pending"],
    "balance_not_updated_after_cheque_or_cash_deposit": ["balance", "updated", "cheque", "cash deposit", "pending"],
    "beneficiary_not_allowed": ["beneficiary", "not allowed", "transfer", "restriction"],
    "cancel_transfer": ["cancel", "transfer", "stop", "abort", "revoke"],
    "card_about_to_expire": ["card", "expire", "expiry", "about", "soon"],
    "card_acceptance": ["card", "acceptance", "accepted", "merchant", "declined"],
    "card_arrival": ["card", "arrival", "arrived", "delivered", "shipping"],
    "card_delivery_estimate": ["card", "delivery", "estimate", "arrival", "time"],
    "card_linking": ["card", "linking", "connect", "associate"],
    "card_not_working": ["card", "not working", "malfunction", "issue", "problem"],
    "card_payment_fee_charged": ["card", "payment", "fee", "charged", "cost"],
    "card_payment_not_recognised": ["card", "payment", "not recognised", "unknown", "unauthorized"],
    "card_payment_wrong_exchange_rate": ["card", "payment", "wrong exchange rate", "incorrect", "rate"],
    "card_swallowed": ["card", "swallowed", "atm", "machine", "lost"],
    "cash_withdrawal_charge": ["cash", "withdrawal", "charge", "fee", "cost"],
    "cash_withdrawal_not_recognised": ["cash", "withdrawal", "not recognised", "unknown", "unauthorized"],
    "change_pin": ["change", "pin", "modify", "update", "password"],
    "compromised_card": ["compromised", "card", "fraud", "stolen", "security"],
    "contactless_not_working": ["contactless", "not working", "issue", "problem", "payment"],
    "country_support": ["country", "support", "available", "region", "location"],
    "declined_card_payment": ["declined", "card payment", "rejected", "failed"],
    "declined_cash_withdrawal": ["declined", "cash withdrawal", "rejected", "failed"],
    "declined_transfer": ["declined", "transfer", "rejected", "failed"],
    "direct_debit_payment_not_recognised": ["direct debit", "payment", "not recognised", "unknown", "unauthorized"],
    "disposable_card_limits": ["disposable", "card", "limits", "restriction"],
    "edit_personal_details": ["edit", "personal details", "update", "modify", "information"],
    "exchange_charge": ["exchange", "charge", "fee", "cost"],
    "exchange_rate": ["exchange", "rate", "currency", "conversion"],
    "exchange_via_app": ["exchange", "app", "mobile", "application", "convert"],
    "extra_charge_on_statement": ["extra", "charge", "statement", "fee", "unexpected"],
    "failed_transfer": ["failed", "transfer", "error", "unsuccessful", "issue"],
    "fiat_currency_support": ["fiat", "currency", "support", "available", "type"],
    "get_disposable_virtual_card": ["get", "disposable", "virtual card", "obtain"],
    "get_physical_card": ["get", "physical card", "obtain", "order"],
    "getting_spare_card": ["getting", "spare card", "extra", "additional"],
    "getting_virtual_card": ["getting", "virtual card", "obtain", "order"],
    "lost_or_stolen_card": ["lost", "stolen", "card", "missing", "fraud"],
    "lost_or_stolen_phone": ["lost", "stolen", "phone", "mobile", "missing"],
    "order_physical_card": ["order", "physical card", "request", "purchase"],
    "passcode_forgotten": ["passcode", "forgotten", "reset", "password"],
    "pending_card_payment": ["pending", "card payment", "processing", "waiting"],
    "pending_cash_withdrawal": ["pending", "cash withdrawal", "processing", "waiting"],
    "pending_top_up": ["pending", "top up", "processing", "waiting"],
    "pending_transfer": ["pending", "transfer", "processing", "waiting"],
    "pin_blocked": ["pin", "blocked", "locked", "disabled"],
    "receiving_money": ["receiving", "money", "funds", "incoming"],
    "Refund_not_showing_up": ["refund", "not showing up", "missing", "absent"],
    "request_refund": ["request", "refund", "ask", "demand"],
    "reverted_card_payment?": ["reverted", "card payment", "reversed", "cancelled"],
    "supported_cards_and_currencies": ["supported", "cards", "currencies", "available", "types"],
    "terminate_account": ["terminate", "account", "close", "delete"],
    "top_up_by_bank_transfer_charge": ["top up", "bank transfer", "charge", "fee"],
    "top_up_by_card_charge": ["top up", "card", "charge", "fee"],
    "top_up_by_cash_or_cheque": ["top up", "cash", "cheque", "deposit"],
    "top_up_failed": ["top up", "failed", "error", "unsuccessful"],
    "top_up_limits": ["top up", "limits", "restriction"],
    "top_up_reverted": ["top up", "reverted", "reversed", "cancelled"],
    "topping_up_by_card": ["topping up", "card", "funds", "recharge"],
    "transaction_charged_twice": ["transaction", "charged twice", "duplicate", "double"],
    "transfer_fee_charged": ["transfer", "fee", "charged", "cost"],
    "transfer_into_account": ["transfer", "into account", "deposit", "funds"],
    "transfer_not_received_by_recipient": ["transfer", "not received", "recipient", "missing"],
    "transfer_timing": ["transfer", "timing", "delay", "time"],
    "unable_to_verify_identity": ["unable", "verify identity", "problem", "issue"],
    "verify_my_identity": ["verify", "my identity", "confirm", "authenticate"],
    "verify_source_of_funds": ["verify", "source of funds", "confirm", "origin"],
    "verify_top_up": ["verify", "top up", "confirm", "authenticate"],
    "virtual_card_not_working": ["virtual card", "not working", "issue", "problem"],
    "visa_or_mastercard": ["visa", "mastercard", "card", "type"],
    "why_verify_identity": ["why", "verify identity", "reason", "purpose"],
    "wrong_amount_of_cash_received": ["wrong amount", "cash received", "incorrect", "error"],
    "wrong_exchange_rate_for_cash_withdrawal": ["wrong exchange rate", "cash withdrawal", "incorrect", "rate"]
}

In [27]:
li.analysis_LFs_with_weak_labels(test_df,77)

Test Coverage: 0.5756493506493506
acuracy for the not abstains
0.8147651006711409
acuracy for all
0.39415584415584415


In [28]:
keywords_by_idx = {int(label_to_idx[k]):v for k,v in keywords.items()}
lfs = li.keywords_to_LFs(keywords_by_idx)
li.analysis_LFs(lfs, test_df, 77)

  1%|          | 36/3080 [00:00<00:08, 356.09it/s]

100%|██████████| 3080/3080 [00:03<00:00, 814.00it/s]


Test Coverage: 0.9769480519480519
acuracy for the not abstains
0.6246081504702194
acuracy for all
0.25876623376623376


Unnamed: 0,j,Polarity,Coverage,Overlaps,Conflicts,Correct,Incorrect,Emp. Acc.,Conflict Ratio
lf_activate,0,[0],0.010714,0.010714,0.010714,24,9,0.727273,1.000000
lf_card,1,[0],0.325974,0.325974,0.325974,39,965,0.038845,1.000000
lf_activation,2,[0],0.000974,0.000974,0.000974,3,0,1.000000,1.000000
lf_age,3,[1],0.010714,0.008766,0.008442,12,21,0.363636,0.787879
lf_limit,4,[1],0.015260,0.010390,0.010065,1,46,0.021277,0.659574
...,...,...,...,...,...,...,...,...,...
lf_error_3,330,[75],0.003571,0.003571,0.003571,0,11,0.000000,1.000000
lf_wrong exchange rate_2,331,[76],0.001948,0.001948,0.001948,3,3,0.500000,1.000000
lf_cash withdrawal_4,332,[76],0.015584,0.015584,0.015584,6,42,0.125000,1.000000
lf_incorrect_3,333,[76],0.003896,0.003896,0.003896,0,12,0.000000,1.000000
