-
Notifications
You must be signed in to change notification settings - Fork 9
/
stringr.R
142 lines (110 loc) · 4.06 KB
/
stringr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
## ---- include = FALSE----------------------------------------------------
library(stringr)
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE
)
## ------------------------------------------------------------------------
str_length("abc")
## ------------------------------------------------------------------------
x <- c("abcdef", "ghifjk")
# The 3rd letter
str_sub(x, 3, 3)
# The 2nd to 2nd-to-last character
str_sub(x, 2, -2)
## ------------------------------------------------------------------------
str_sub(x, 3, 3) <- "X"
x
## ------------------------------------------------------------------------
str_dup(x, c(2, 3))
## ------------------------------------------------------------------------
x <- c("abc", "defghi")
str_pad(x, 10) # default pads on left
str_pad(x, 10, "both")
## ------------------------------------------------------------------------
str_pad(x, 4)
## ------------------------------------------------------------------------
x <- c("Short", "This is a long string")
x %>%
str_trunc(10) %>%
str_pad(10, "right")
## ------------------------------------------------------------------------
x <- c(" a ", "b ", " c")
str_trim(x)
str_trim(x, "left")
## ------------------------------------------------------------------------
jabberwocky <- str_c(
"`Twas brillig, and the slithy toves ",
"did gyre and gimble in the wabe: ",
"All mimsy were the borogoves, ",
"and the mome raths outgrabe. "
)
cat(str_wrap(jabberwocky, width = 40))
## ------------------------------------------------------------------------
x <- "I like horses."
str_to_upper(x)
str_to_title(x)
str_to_lower(x)
# Turkish has two sorts of i: with and without the dot
str_to_lower(x, "tr")
## ------------------------------------------------------------------------
x <- c("y", "i", "k")
str_order(x)
str_sort(x)
# In Lithuanian, y comes between i and k
str_sort(x, locale = "lt")
## ------------------------------------------------------------------------
strings <- c(
"apple",
"219 733 8965",
"329-293-8753",
"Work: 579-499-7527; Home: 543.355.3679"
)
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
## ------------------------------------------------------------------------
# Which strings contain phone numbers?
str_detect(strings, phone)
str_subset(strings, phone)
## ------------------------------------------------------------------------
# How many phone numbers in each string?
str_count(strings, phone)
## ------------------------------------------------------------------------
# Where in the string is the phone number located?
(loc <- str_locate(strings, phone))
str_locate_all(strings, phone)
## ------------------------------------------------------------------------
# What are the phone numbers?
str_extract(strings, phone)
str_extract_all(strings, phone)
str_extract_all(strings, phone, simplify = TRUE)
## ------------------------------------------------------------------------
# Pull out the three components of the match
str_match(strings, phone)
str_match_all(strings, phone)
## ------------------------------------------------------------------------
str_replace(strings, phone, "XXX-XXX-XXXX")
str_replace_all(strings, phone, "XXX-XXX-XXXX")
## ------------------------------------------------------------------------
str_split("a-b-c", "-")
str_split_fixed("a-b-c", "-", n = 2)
## ------------------------------------------------------------------------
a1 <- "\u00e1"
a2 <- "a\u0301"
c(a1, a2)
a1 == a2
## ------------------------------------------------------------------------
str_detect(a1, fixed(a2))
str_detect(a1, coll(a2))
## ------------------------------------------------------------------------
i <- c("I", "İ", "i", "ı")
i
str_subset(i, coll("i", ignore_case = TRUE))
str_subset(i, coll("i", ignore_case = TRUE, locale = "tr"))
## ------------------------------------------------------------------------
x <- "This is a sentence."
str_split(x, boundary("word"))
str_count(x, boundary("word"))
str_extract_all(x, boundary("word"))
## ------------------------------------------------------------------------
str_split(x, "")
str_count(x, "")