# 文字列

## 長さ

In [1]:
length("リプサム")

4

In [2]:
sizeof("リプサム")

12

---
## 文字列の結合

In [3]:
"L"  * "ipsum"

"Lipsum"

---
## 文字列の比較

In [4]:
"abc" == "def"

false

In [5]:
"abc" != "def"

true

In [6]:
"abc" >  "def" # 辞書順で "abc" < "def"

false

In [7]:
"abc" >= "def" # 辞書順で "abc" ≦ "def"

false

In [8]:
"abc" <  "def" # 辞書順で "abc" > "def"
isless("abc","def") # 同じ

true

In [9]:
"abc" <= "def" # 辞書順で "abc" ≧ "def"

true

In [10]:
cmp("abc","def") # 辞書順で "abc" と "def" を比較

-1

---
## 文字列の繰り返し

In [11]:
"Lorem" ^ 3
repeat("Lorem",3) # 同じ

"LoremLoremLorem"

---
## 部分文字列

In [12]:
"LOREM IPSUM dolor sit amet"[2+1] # 1番目が先頭

'R': ASCII/Unicode U+0052 (category Lu: Letter, uppercase)

In [13]:
SubString("LOREM IPSUM dolor sit amet",2+1,4+1) # 2≦x≤4 を取り出し
SubString("LOREM IPSUM dolor sit amet",2+1:4+1) # 同じ

"REM"

In [14]:
first("LOREM IPSUM dolor sit amet",5)

"LOREM"

In [15]:
last("LOREM IPSUM dolor sit amet",4)

"amet"

---
## 検索

### 文字列の検索

In [16]:
occursin("dolor","LOREM IPSUM dolor sit amet")

true

In [17]:
findfirst("M","LOREM IPSUM dolor sit amet") # "M"が初めて現れる位置

5:5

In [18]:
findnext("M","LOREM IPSUM dolor sit amet",5+1) # "M"が5文字目以降に初めて現れる位置

11:11

In [19]:
findlast("M","LOREM IPSUM dolor sit amet") # "M"が最後に現れる位置

11:11

In [20]:
startswith("LOREM IPSUM dolor sit amet","Lorem")

false

In [21]:
endswith("LOREM IPSUM dolor sit amet","amet")

true

### 正規表現での検索

In [22]:
occursin(r"[a-z]*([a-z]m)"i,"LOREM IPSUM dolor sit amet")

true

In [23]:
m=match(r"[a-z]*([a-z]m)"i,"LOREM IPSUM dolor sit amet")
println(m)
println(m.match)
println(m.captures)

RegexMatch("LOREM", 1="EM")
LOREM
Union{Nothing, SubString{String}}["EM"]


In [24]:
mi=eachmatch(r"[a-z]*([a-z]m)"i,"LOREM IPSUM dolor sit amet")
for m in mi
	println(m)
	println(m.match)
	println(m.captures)
end

RegexMatch("LOREM", 1="EM")
LOREM
Union{Nothing, SubString{String}}["EM"]
RegexMatch("IPSUM", 1="UM")
IPSUM
Union{Nothing, SubString{String}}["UM"]
RegexMatch("am", 1="am")
am
Union{Nothing, SubString{String}}["am"]


---
## 置換

### 正規表現を使わない置換

In [25]:
replace("LOREM IPSUM dolor sit amet","M"=>"ℳ",count=1)

"LOREℳ IPSUM dolor sit amet"

In [26]:
replace("LOREM IPSUM dolor sit amet","M"=>"ℳ")

"LOREℳ IPSUℳ dolor sit amet"

### 正規表現を使う置換

In [27]:
replace("LOREM IPSUM dolor sit amet",r"(\w)(m)"i=>s"\g<2>\g<1>",count=1)

"LORME IPSUM dolor sit amet"

In [28]:
replace("LOREM IPSUM dolor sit amet",r"(\w)(m)"i=>s"\g<2>\g<1>")

"LORME IPSMU dolor sit maet"

正規表現中で
```Julia
	r"(\w)(m)"i
		=> s"\g<2>\g<1>"
```
とする代わりに
```Julia
	r"(?<former>\w)(?<latter>m)"i
		=> s"\g<latter>\g<former>"
```
のようにグループに名称を与えることができる

---
## 分割と結合

### 正規表現を使わない分割

In [29]:
split("LOREM IPSUM dolor sit amet"," ";limit=2,keepempty=true)

2-element Array{SubString{String},1}:
 "LOREM"               
 "IPSUM dolor sit amet"

In [30]:
rsplit("LOREM IPSUM dolor sit amet"," ";limit=2,keepempty=true)

2-element Array{SubString{String},1}:
 "LOREM IPSUM dolor sit"
 "amet"                 

In [31]:
split("LOREM IPSUM dolor sit amet"," ")

5-element Array{SubString{String},1}:
 "LOREM"
 "IPSUM"
 "dolor"
 "sit"  
 "amet" 

### 正規表現を使う分割

In [32]:
split("LOREM IPSUM dolor sit amet",r"(?i)\ [\w\ ]+\ ")

2-element Array{SubString{String},1}:
 "LOREM"
 "amet" 

### 結合

In [33]:
join(["L","I","D","S","A"],"_")

"L_I_D_S_A"

---
## 文字列の反転

In [34]:
reverse("LOREM IPSUM dolor sit amet")

"tema tis rolod MUSPI MEROL"

---
## 大文字/小文字の切替

In [35]:
uppercase("LOREM IPSUM dolor sit amet")

"LOREM IPSUM DOLOR SIT AMET"

In [36]:
lowercase("LOREM IPSUM dolor sit amet")

"lorem ipsum dolor sit amet"

In [37]:
titlecase("LOREM IPSUM dolor sit amet")

"Lorem Ipsum Dolor Sit Amet"

In [38]:
uppercasefirst("LOREM IPSUM dolor sit amet")

"LOREM IPSUM dolor sit amet"

In [39]:
lowercasefirst("LOREM IPSUM dolor sit amet")

"lOREM IPSUM dolor sit amet"

---
## 文字列を揃える

In [46]:
rpad("align",11,"-") # -で埋める左端揃え

"align------"

In [47]:
lpad("align",11,"-") # -で埋める右端揃え

"------align"

---
## 文字列の削除

### 文字単位の削除

In [43]:
chop("ええ すごいわ ええ") # 末尾の1文字を取り除く

"ええ すごいわ え"

In [44]:
chop("ええ すごいわ ええ",head=2,tail=3) # 先頭2文字,末尾3文字を取り除く

" すごいわ"

In [45]:
chomp("text with newline\n") # 末尾の改行文字を取り除く

"text with newline"

### 前後の空白の除去

In [40]:
strip("   redundant   ")

"redundant"

In [41]:
lstrip("   redundant   ")

"redundant   "

In [42]:
rstrip("   redundant   ")

"   redundant"

---
## 文字列 ⇄ バイト

In [48]:
Vector{UInt8}("LOREM")

5-element Array{UInt8,1}:
 0x4c
 0x4f
 0x52
 0x45
 0x4d

In [49]:
String(b"LOREM")

"LOREM"

---
## 文字 ⇄ コードポイント

In [50]:
Char(74)

'J': ASCII/Unicode U+004a (category Lu: Letter, uppercase)

In [51]:
Int('J')

74

---
## エスケープ文字を表示

In [52]:
escape_string("a	b	c")

"a\\tb\\tc"