In [8]:
import pandas as pd
from faker import Faker
import re

faker = Faker()

addresses = pd.Series(faker.address() for _ in range(100_000))

In [12]:
(
    addresses
    .map( lambda x: re.search(r"\w{2} \d{5}", x).group() )
    .str.split(" ", expand=True)
    .rename(columns={0: "state", 1: "zip"})
)

Unnamed: 0,state,zip
0,RI,27892
1,UT,66374
2,ME,90070
3,NE,75659
4,MS,14189
...,...,...
99995,NY,31602
99996,AS,42843
99997,PR,24591
99998,MD,68949


In [13]:
## better way:
# ?P is a named group
addresses.str.extract(r"(?P<state>\w{2}) (?P<zip>\d{5})")

Unnamed: 0,state,zip
0,RI,27892
1,UT,66374
2,ME,90070
3,NE,75659
4,MS,14189
...,...,...
99995,NY,31602
99996,AS,42843
99997,PR,24591
99998,MD,68949


In [53]:
data = {
    "office_serial_number": [
        "US101-001",
        "UK201-006",
        "CA301-003",
        "AU401-004",
        "UK202-005",
        "IN302-006",
        "IR102-007",
        "AU402-006",
        "SL303-009",
        "UK203-010",
        "FR403-011",
        "US103-012",
    ]
}

df = pd.DataFrame(data)


filt = df['office_serial_number'].str.contains("^(UK|IN|AU)\d{3}-006")
df[filt]

# The above works but to suppress the warning:
# UserWarning: This pattern is interpreted as a regular expression, and has match groups. 

# 1) # use str.match() instead of str.contains()

# 2) re.escape() escape all special characters
# df['office_serial_number'].str.contains(re.escape("^(UK|IN|AU)\d{3}-006"))

# 3) change (X|Y|Z) from a capture group to match group (?:X|Y|Z)
# df['office_serial_number'].str.contains("^(?:=UK|IN|AU)\d{3}-006")

0     False
1     False
2     False
3     False
4     False
5      True
6     False
7      True
8     False
9     False
10    False
11    False
Name: office_serial_number, dtype: bool