# Chapter 6. Working with Strings

In [1]:
%load_ext lab_black
%load_ext sql
%sql postgresql://sql-cookbook:sql-cookbook@0.0.0.0:5432/sql-cookbook

## 6.1 Walking a String

In [2]:
%%sql
select unnest(regexp_matches(ename, '.', 'g')) as c
from emp
where ename = 'KING';

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
4 rows affected.


c
K
I
N
G


In [3]:
%%sql
select regexp_split_to_table(ename, '(?=)') as c
from emp
where ename = 'KING';

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
4 rows affected.


c
K
I
N
G


## 6.2 Embedding Quotes Within String Literals

In [4]:
%%sql
select *
from (values ('g''day mate'),
             ('beavers'' teeth'),
             ('''')) as t(qmarks)

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
3 rows affected.


qmarks
g'day mate
beavers' teeth
'


## 6.3 Counting the Occurrences of a Character in a String

In [5]:
%%sql
select length(regexp_replace('10,CLARK,MANAGER', '[^,]', '', 'g')) as cnt;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
1 rows affected.


cnt
2


In [6]:
%%sql
select cardinality(array(select * from regexp_matches('10,CLARK,MANAGER', ',', 'g'))) as cnt;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
1 rows affected.


cnt
2


## 6.4 Removing Unwanted Characters from a String

In [7]:
%%sql
select ename,
       regexp_replace(ename, '(?i)[aeiouy]', '', 'g') as stripped1,
       sal,
       replace(sal::varchar, '0', '')::int            as stripped2
from emp;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
14 rows affected.


ename,stripped1,sal,stripped2
SMITH,SMTH,800,8
ALLEN,LLN,1600,16
WARD,WRD,1250,125
JONES,JNS,2975,2975
MARTIN,MRTN,1250,125
BLAKE,BLK,2850,285
CLARK,CLRK,2450,245
SCOTT,SCTT,3000,3
KING,KNG,5000,5
TURNER,TRNR,1500,15


## 6.5 Separating Numeric and Character Data

In [8]:
%%sql
with data as (select ename || sal as data from emp),
     matches as (
         select regexp_matches(data, '(.+)(?<=\D)(?=\d)(.+)') as matches
         from data
     )
select matches[1] as ename, matches[2]::int as sal
from matches;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
14 rows affected.


ename,sal
SMITH,800
ALLEN,1600
WARD,1250
JONES,2975
MARTIN,1250
BLAKE,2850
CLARK,2450
SCOTT,3000
KING,5000
TURNER,1500


## 6.6 Determining Whether a String Is Alphanumeric

In [9]:
%%sql
with v as (
    select ename as data
    from emp
    where deptno = 10
    union all
    select format('%s, $%s.00', ename, sal) as data
    from emp
    where deptno = 20
    union all
    select format('%s%s', ename, deptno) as data
    from emp
    where deptno = 30
)
select data
from v
where data ~ '^\w+$';

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
9 rows affected.


data
CLARK
KING
MILLER
ALLEN30
WARD30
MARTIN30
BLAKE30
TURNER30
JAMES30


## 6.7 Extracting Initials from a Name

In [10]:
%%sql
select regexp_replace('Stewie Griffin', '(\w)\w+\s*', '\1.', 'g') as initials;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
1 rows affected.


initials
S.G.


## 6.8 Ordering by Parts of a String

In [11]:
%%sql
select ename
from emp
order by regexp_matches(ename, '..$');

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
14 rows affected.


ename
ALLEN
MILLER
TURNER
JAMES
JONES
MARTIN
BLAKE
ADAMS
KING
FORD


## 6.9 Ordering by a Number in a String

In [12]:
%%sql
with v as (
    select format(e'%s\t%s\t%s', ename, empno, dname) as data
    from emp natural join dept
)
select *
from v
order by regexp_matches(data, '\d+');

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
14 rows affected.


data
SMITH	7369	RESEARCH
ALLEN	7499	SALES
WARD	7521	SALES
JONES	7566	RESEARCH
MARTIN	7654	SALES
BLAKE	7698	SALES
CLARK	7782	ACCOUNTING
SCOTT	7788	RESEARCH
KING	7839	ACCOUNTING
TURNER	7844	SALES


In [13]:
%%sql
with v as (
    select format(e'%s\t%s\t%s', ename, empno, dname) as data
    from emp natural join dept
)
select *
from v
order by (regexp_matches(data, '\d+'))[1]::int;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
14 rows affected.


data
SMITH	7369	RESEARCH
ALLEN	7499	SALES
WARD	7521	SALES
JONES	7566	RESEARCH
MARTIN	7654	SALES
BLAKE	7698	SALES
CLARK	7782	ACCOUNTING
SCOTT	7788	RESEARCH
KING	7839	ACCOUNTING
TURNER	7844	SALES


## 6.10 Creating a Delimited List from Table Rows

In [14]:
%%sql
select deptno, string_agg(ename, ',' order by ename) as emps
from emp
group by deptno
order by deptno;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
3 rows affected.


deptno,emps
10,"CLARK,KING,MILLER"
20,"ADAMS,FORD,JONES,SCOTT,SMITH"
30,"ALLEN,BLAKE,JAMES,MARTIN,TURNER,WARD"


## 6.11 Converting Delimited Data into a Multivalued IN-List

In [15]:
%%sql
with empno as (
    select '7654,7698,7782,7788' as empno
)
select ename, sal, deptno
from emp
where empno in (
    select unnest(regexp_split_to_array(empno, ','))::int as empno
    from empno
);

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
4 rows affected.


ename,sal,deptno
MARTIN,1250,30
BLAKE,2850,30
CLARK,2450,10
SCOTT,3000,20


## 6.12 Alphabetizing a String

In [16]:
%%sql
with emp as (
    select ename as old_name, regexp_split_to_table(ename, '(?=)') as new_name
    from emp
)
select old_name, string_agg(new_name, '' order by new_name) as new_name
from emp
group by old_name;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
14 rows affected.


old_name,new_name
ADAMS,AADMS
ALLEN,AELLN
BLAKE,ABEKL
CLARK,ACKLR
FORD,DFOR
JAMES,AEJMS
JONES,EJNOS
KING,GIKN
MARTIN,AIMNRT
MILLER,EILLMR


## 6.13 Identifying Strings That Can Be Treated as Numbers

In [17]:
%%sql
with v as (
    select substr(ename, 1, 2) || deptno || substr(ename, 3, 2) as mixed
    from emp
    where deptno = 10
    union all
    select empno::varchar
    from emp
    where deptno = 20
    union all
    select ename
    from emp
    where deptno = 30
)
select mixed, (regexp_matches(mixed, '\d+'))[1]::int as numbers
from v;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
8 rows affected.


mixed,numbers
CL10AR,10
KI10NG,10
MI10LL,10
7369,7369
7566,7566
7788,7788
7876,7876
7902,7902


## 6.14 Extracting the nth Delimited Substring

In [18]:
%%sql
with v as (
    select 'mo,larry,curly' as name
    union all
    select 'tina,gina,jaunita,regina,leena' as name
)
select (regexp_split_to_array(name, ','))[2] as name
from v;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
2 rows affected.


name
larry
gina


In [19]:
%%sql
with v as (
    select 'mo,larry,curly' as name
    union all
    select 'tina,gina,jaunita,regina,leena' as name
)
select split_part(name, ',', 2) as name
from v;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
2 rows affected.


name
larry
gina


## 6.15 Parsing an IP Address

In [20]:
%%sql
with ip as (select '111.22.3.4' as ip),
     octets as (
         select regexp_matches(ip, '(\d+).(\d+).(\d+).(\d+)')::int[] as octets
         from ip
     )
select octets[1] as a, octets[2] as b, octets[3] as c, octets[4] as d
from octets;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
1 rows affected.


a,b,c,d
111,22,3,4


## 6.16 Comparing Strings by Sound

In [21]:
%%sql
create extension if not exists fuzzystrmatch;

with names as (
    select *
    from (values ('Johnson'),
                 ('Jonson'),
                 ('Jonsen'),
                 ('Jensen'),
                 ('Johnsen'),
                 ('Shakespeare'),
                 ('Shakspear'),
                 ('Shaekspir'),
                 ('Shakespar'))
             as t(a_name)
)
select a.a_name, b.a_name, soundex(a.a_name) as soundex
from names a cross join names b
where a.a_name != b.a_name
  and soundex(a.a_name) = soundex(b.a_name)
order by a.a_name, b.a_name;

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
Done.
24 rows affected.


a_name,a_name_1,soundex
Jensen,Johnsen,J525
Jensen,Johnson,J525
Jensen,Jonsen,J525
Jensen,Jonson,J525
Johnsen,Jensen,J525
Johnsen,Johnson,J525
Johnsen,Jonsen,J525
Johnsen,Jonson,J525
Johnson,Jensen,J525
Johnson,Johnsen,J525


## 6.17 Finding Text Not Matching a Pattern

In [22]:
%%sql
with employee_comment as (
    select *
    from (values (7369, '126 Varnum, Edmore MI 48829, 989 313-5351'),
                 (7499, e'1105 McConnell Court\nCedar Lake MI 48812\n' ||
                        e'Home: 989-387-4321\nCell: (237) 438-3333'),
                 (9999, '906-387-1698, 313-535.8886'),
                 (7900, 'Cares for 100-year-old aunt during the day. ' ||
                        'Schedule only for evening and night shifts.')
         ) as t(emp_id, text)
)
select text
from employee_comment
where exists(select 1
             from regexp_matches(text, '(?<!\d)(?:\d{{3}}|\(\d{{3}}\))(.{{0,2}})\d{{3}}(.{{0,2}})\d{{4}}(?!\d)', 'g') as match
             where match[1] != match[2])

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
3 rows affected.


text
"126 Varnum, Edmore MI 48829, 989 313-5351"
1105 McConnell Court Cedar Lake MI 48812 Home: 989-387-4321 Cell: (237) 438-3333
"906-387-1698, 313-535.8886"


In [23]:
%%sql
with employee_comment as (
    select *
    from (values (7369, '126 Varnum, Edmore MI 48829, 989 313-5351'),
                 (7499, e'1105 McConnell Court\nCedar Lake MI 48812\n' ||
                        e'Home: 989-387-4321\nCell: (237) 438-3333'),
                 (9999, '906-387-1698, 313-535.8886'),
                 (7900, 'Cares for 100-year-old aunt during the day. ' ||
                        'Schedule only for evening and night shifts.')
         ) as t(emp_id, text)
)
select text
from employee_comment
where regexp_replace(text, '(?<!\d)(?:\d{{3}}|\(\d{{3}}\))(.{{0,2}})\d{{3}}\1\d{{4}}(?!\d)', '') ~
      '(?<!\d)(?:\d{{3}}|\(\d{{3}}\))(.{{0,2}})\d{{3}}(.{{0,2}})\d{{4}}(?!\d)'

 * postgresql://sql-cookbook:***@0.0.0.0:5432/sql-cookbook
3 rows affected.


text
"126 Varnum, Edmore MI 48829, 989 313-5351"
1105 McConnell Court Cedar Lake MI 48812 Home: 989-387-4321 Cell: (237) 438-3333
"906-387-1698, 313-535.8886"
