In [1]:
import polars as pl
import polars_istr as istr

# IBAN

In [2]:
# First str does not have a valid country code. So not an iban. Second and third are valid.
# Last one has invalid checksum
df = pl.DataFrame({
    "iban": ["AA110011123Z5678", "DE44500105175407324931", "AD1200012030200359100100", "MR0000020001010000123456754"]
})
df.head()

iban
str
"""AA110011123Z5678"""
"""DE44500105175407324931"""
"""AD1200012030200359100100"""
"""MR0000020001010000123456754"""


In [3]:
df.select(
    istr.iban_country_code("iban").alias("country_code"),
    istr.iban_check("iban").alias("reason"),
    istr.iban_is_valid("iban").alias("is_valid"),
    istr.iban_bban("iban").alias("bban"),
    istr.iban_bank_id("iban").alias("bank_id"),
    istr.iban_branch_id("iban").alias("branch_id"),
) 

country_code,reason,is_valid,bban,bank_id,branch_id
str,str,bool,str,str,str
,"""Invalid country code""",False,,,
"""DE""","""ok""",True,"""500105175407324931""","""50010517""",
"""AD""","""ok""",True,"""00012030200359100100""","""0001""","""2030"""
,"""Invalid checksum""",False,,,


In [4]:
df.select(
    istr.iban_extract_all("iban").alias("ib")
).unnest("ib")

country_code,check_digits,bban,bank_id,branch_id
str,str,str,str,str
,,,,
"""DE""","""44""","""500105175407324931""","""50010517""",
"""AD""","""12""","""00012030200359100100""","""0001""","""2030"""
,,,,


# ISIN

In [5]:
df = pl.DataFrame(
    {
        "isin": [
            "US0378331005", # AAPL
            "US0378331008", # AAPL w/ bad check digit
            "US037833100", # AAPL w/o check digit
            "CA00206RGB20", # Canadian
            "XS1550212416", # Other
            None,
        ]
    }
)
df.head()

isin
str
"""US0378331005"""
"""US0378331008"""
"""US037833100"""
"""CA00206RGB20"""
"""XS1550212416"""


In [6]:
df.select(
    istr.isin_country_code("isin").alias("country_code"),
    istr.isin_check_digit("isin").alias("check_digit"),
    istr.isin_security_id("isin").alias("security_id"),
    istr.isin_is_valid("isin").alias("is_valid"),
)

country_code,check_digit,security_id,is_valid
str,str,str,bool
"""US""","""5""","""037833100""",True
,,,False
,,,False
"""CA""","""0""","""00206RGB2""",True
"""XS""","""6""","""155021241""",True
,,,False


# URL

In [7]:
df = pl.DataFrame(
    {
        "url": [
            "https://example.com/data.csv#row=4",
            "google.com", 
            "ww.google.com", 
            "abc123@email.com", 
            "https://127.0.0.1/", 
            "https://test.com/",
            "file:///tmp/foo",
            "https://example.com/products?page=2&sort=desc",
            None,
        ]
    }
)

In [8]:
df.select(
    istr.url_host("url").alias("host"),
    istr.url_domain("url").alias("domain"),
    istr.url_fragment("url").alias("fragment"),
    istr.url_path("url").alias("path"),
    istr.url_query("url").alias("query"),
    istr.url_check("url").alias("check"),
    istr.url_is_valid("url").alias("is_valid"),
    istr.url_is_special("url").alias("is_special"),
)

host,domain,fragment,path,query,check,is_valid,is_special
str,str,str,str,str,str,bool,bool
"""example.com""","""example.com""","""row=4""","""/data.csv""",,"""ok""",True,True
,,,,,"""relative URL without a base""",False,
,,,,,"""relative URL without a base""",False,
,,,,,"""relative URL without a base""",False,
"""127.0.0.1""",,,"""/""",,"""ok""",True,True
"""test.com""","""test.com""",,"""/""",,"""ok""",True,True
,,,"""/tmp/foo""",,"""ok""",True,True
"""example.com""","""example.com""",,"""/products""","""page=2&sort=desc""","""ok""",True,True
,,,,,,,


# CUSIP

In [9]:
df = pl.DataFrame({
    "cusip": [
        "303075105",  # regular cusip (FactSet - Common Stock)
        "30307510",  # regular cusip ex. check digit
        "G0052B105",  # regular CINS (Abingdon Capital PLC - Shares)
        "HELLOWORLD",  # Invalid
    ]
})

In [10]:
df.select(
        istr.cusip_issue_num("cusip").alias("issue_num"),
        istr.cusip_issuer_num("cusip").alias("issuer_num"),
        istr.cusip_check_digit("cusip").alias("check_digit"),
        istr.cusip_country_code("cusip").alias("country_code"),
        istr.cusip_payload("cusip").alias("payload"),
        istr.cusip_is_private_issue("cusip").alias("is_private_issue"),
        istr.cusip_has_private_issuer("cusip").alias("has_private_issuer"),
        istr.cusip_is_private_use("cusip").alias("is_private_use"),
        istr.cusip_is_cins("cusip").alias("is_cins"),
        istr.cusip_is_cins_base("cusip").alias("is_cins_base"),
        istr.cusip_is_cins_extended("cusip").alias("is_cins_extended"),
    )

issue_num,issuer_num,check_digit,country_code,payload,is_private_issue,has_private_issuer,is_private_use,is_cins,is_cins_base,is_cins_extended
str,str,str,str,str,bool,bool,bool,bool,bool,bool
"""10""","""303075""","""5""",,"""30307510""",False,False,False,False,,
,,,,,,,,,,
"""10""","""0052B""","""5""","""G""","""G0052B10""",False,False,False,True,True,False
,,,,,,,,,,
