# 정규 표현식으로 해결하는 일반적인 문제들

## 북미 전화번호

In [1]:
str = "J. Doe: 248-555-1234\n" +
    "B. Smith: (313) 555-1234\n" +
    "A. Lee: (810)555-1234";

str.match(/\(?[2-9]\d\d\)?[ -]?[2-9]\d\d-\d{4}/g)

[Array] ["248-555-1234","(313) 555-1234","(810)555-1234"]

In [2]:
str = "J. Doe: 248-555-1234\n" +
    "B. Smith: (313) 555-1234\n" +
    "A. Lee: (810)555-1234\n" +
    "M. Jones: 734.555.9999";

str.match(/[\(.]?[2-9]\d\d[\).]?[ -]?[2-9]\d\d[-.]\d{4}/g)

[Array] ["248-555-1234","(313) 555-1234","(810)555-1234","734.555.9999"]

## 미국 우편번호

In [3]:
str = "999 1st Avenue, Bigtown, NY, 11222\n" +
    "123 Hight Street, Any City, MI 48034-1234";

str.match(/\d{5}(-\d{4})?/g)

[Array] ["11222","48034-1234"]

## 캐나다 우편번호

In [4]:
str = "123 4th Streeet, Toronto, Ontario, M1A 1A1\n" +
    "567 8th Avenue, Montreal, Quebec, H9Z 9Z9";

str.match(/[ABCEGHJKLMNPRSTVXY]\d[A-Z] \d[A-Z]\d/g)

[Array] ["M1A 1A1","H9Z 9Z9"]

## 영국 우편번호

In [5]:
str = "171 Kyverdale Road, London N16 6PS\n" +
    "33 Main Street, Portsmouth, P01 3AX\n" +
    "18 High Street, London NW11 8AB";

str.match(/[A-Z]{1,2}\d[A-Z\d]? \d[ABD-HJLNP-UW-Z]{2}/g)

[Array] ["N16 6PS","P01 3AX","NW11 8AB"]

## 미국 사회보장번호

In [6]:
str = "John Smith: 123-45-6789";

str.match(/\d{3}-\d{2}-\d{4}/g)

[Array] ["123-45-6789"]

In [7]:
str = "localhost 127.0.0.1";

str.match(/(((\d{1,2})|(1\d{2})|(2[0-4]\d)|(25[0-5]))\.){3}((\d{1,2})|(1|\d{2})|(2[0-4]\d)|(25[0-5]))/g)

[Array] ["127.0.0.1"]

## URL

In [8]:
str = "http://www.forta.com/blog\n" +
    "https://www.forta.com:80/blog/index.cfm\n" +
    "http://www.forta.com\n" +
    "http://ben:password@www.forta.com\n" +
    "http://localhost/index.php?ab=1&c=2\n" +
    "http://localhost:8500";

str.match(/https?:\/\/[-\w.]+(:\d+)?(\/([\w/_.]*)?)?/g)

[Array] ["http://www.forta.com/blog","https://www.forta.com:80/blog/index.cfm","http://www.forta.com","http://ben","http://localhost/index.php","http://localhost:8500"]

## 완전한 URL

In [9]:
str = "http://www.forta.com/blog\n" +
    "https://www.forta.com:80/blog/index.cfm\n" +
    "http://www.forta.com\n" +
    "http://ben:password@www.forta.com\n" +
    "http://localhost/index.php?ab=1&c=2\n" +
    "http://localhost:8500";

str.match(/https?:\/\/(\w*:\w*@)?[-\w.]+(:\d+)?(\/([\w/_.]*(\?\S+)?)?)?/g)

[Array] ["http://www.forta.com/blog","https://www.forta.com:80/blog/index.cfm","http://www.forta.com","http://ben:password@www.forta.com","http://localhost/index.php?ab=1&c=2","http://localhost:8500"]

## 이메일 주소

In [10]:
str = "My name is Ben Forta, and my\n" +
    "email address is ben@forta.com.";

str.match(/(\w+\.)*\w+@(\w+\.)+[A-Za-z]+/g)

[Array] ["ben@forta.com"]

## HTML 주석

In [11]:
str = "<!-- Start of page -->\n" +
    "<html>\n" +
    "<!-- Start of head -->\n" +
    "<head>\n" +
    "<title>My Title</title><!-- Page title -->\n" +
    "</head>\n" +
    "<!-- Body -->\n" +
    "<body>";

str.match(/<!-{2,}.*?-{2,}>/g)

[Array] ["<!-- Start of page -->","<!-- Start of head -->","<!-- Page title -->","<!-- Body -->"]

## 자바스크립트 주석

In [12]:
str = "<script langulage=\"JavaScript\">\n" +
    "// Turn off fileds used only by replace\n" +
    "function hideReplaceFields() {\n" +
    "  document.getElementById('RegExReplace').disabled=true;'\n" +
    "  document.getELementById('replaceheader').diabled=true;\n" +
    "}\n" +
    "// Turn on fields used only by replace\n" +
    "function showReplaceFields() {\n" +
    "  document.getElementById('RegExReplace').disabled=false;'\n" +
    "  document.getELementById('replaceheader').diabled=false;\n" +
    "}";

str.match(/\/\/.*/g)

[Array] ["// Turn off fileds used only by replace","// Turn on fields used only by replace"]

## 신용카드 번호

### Master Card

In [13]:
str = "MasterCard: 5212345678901234\n" +
    "Visa 1: 4123456789012\n" +
    "Visa 2: 4123456789012345\n" +
    "Amex: 371234567890123\n" +
    "Discover: 6011123456789012\n" +
    "Diners Club: 38812345678901";

str.match(/5[1-5]\d{14}/g)

[Array] ["5212345678901234"]

### Visa Card

In [14]:
str = "MasterCard: 5212345678901234\n" +
    "Visa 1: 4123456789012\n" +
    "Visa 2: 4123456789012345\n" +
    "Amex: 371234567890123\n" +
    "Discover: 6011123456789012\n" +
    "Diners Club: 38812345678901";

str.match(/4\d{12}(\d{3})?/g)

[Array] ["4123456789012","4123456789012345"]

### Amex

In [15]:
str = "MasterCard: 5212345678901234\n" +
    "Visa 1: 4123456789012\n" +
    "Visa 2: 4123456789012345\n" +
    "Amex: 371234567890123\n" +
    "Discover: 6011123456789012\n" +
    "Diners Club: 38812345678901";

str.match(/3[47]\d{13}/g)

[Array] ["371234567890123"]

### Discover

In [16]:
str = "MasterCard: 5212345678901234\n" +
    "Visa 1: 4123456789012\n" +
    "Visa 2: 4123456789012345\n" +
    "Amex: 371234567890123\n" +
    "Discover: 6011123456789012\n" +
    "Diners Club: 38812345678901";

str.match(/6011\d{12}/g)

[Array] ["6011123456789012"]

### Diners club

In [17]:
str = "MasterCard: 5212345678901234\n" +
    "Visa 1: 4123456789012\n" +
    "Visa 2: 4123456789012345\n" +
    "Amex: 371234567890123\n" +
    "Discover: 6011123456789012\n" +
    "Diners Club: 38812345678901";

str.match(/(30[0-5]|36\d|38\d)\d{11}/g)

[Array] ["38812345678901"]

### All

In [18]:
str = "MasterCard: 5212345678901234\n" +
    "Visa 1: 4123456789012\n" +
    "Visa 2: 4123456789012345\n" +
    "Amex: 371234567890123\n" +
    "Discover: 6011123456789012\n" +
    "Diners Club: 38812345678901";

str.match(/(5[1-5]\d{14})|(4\d{12}(\d{3})?)|(3[47]\d{13})|(6011\d{12})|((30[0-5]|36\d|38\d)\d{11})/g)

[Array] ["5212345678901234","4123456789012","4123456789012345","371234567890123","6011123456789012","38812345678901"]

## 주민등록번호

In [19]:
str = "790814-1234567";

str.match(/\d+-\d+/g)

[Array] ["790814-1234567"]

In [20]:
str = "790814-1234567\n" +
    "135-600";

str.match(/\d+-\d+/g)

[Array] ["790814-1234567","135-600"]

In [21]:
str = "790814-1234567\n" +
    "135-600";

str.match(/\d{6}-\d{7}/g)

[Array] ["790814-1234567"]

In [22]:
str = "790814-1234567\n" +
    "135-600\n" +
    "799999-1234567\n" +
    "791231-1234567";

str.match(/\d{6}-\d{7}/g)

[Array] ["790814-1234567","799999-1234567","791231-1234567"]

In [23]:
str = "790814-1234567\n" +
    "135-600\n" +
    "799999-1234567\n" +
    "791231-1234567";

// str.match(/\d{2}[0-1]\d[0-3]\d-\d{7}/g)
str.match(/\d{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])-\d{7}/g)

[Array] ["790814-1234567","791231-1234567"]

In [24]:
str = "790814-1234567\n" +
    "135-600\n" +
    "799999-1234567\n" +
    "791231-1234567\n" +
    "830105-5678123";

str.match(/\d{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])-\d{7}/g)

[Array] ["790814-1234567","791231-1234567","830105-5678123"]

In [25]:
str = "790814-1234567\n" +
    "135-600\n" +
    "799999-1234567\n" +
    "791231-1234567\n" +
    "830105-5678123";

str.match(/\d{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])-[1-4]\d{6}/g)

[Array] ["790814-1234567","791231-1234567"]

## 미국식 날짜 형식을 국제표준(ISO 8601) 날짜 형식으로 바꾸기

In [26]:
str = "08/14/1979";

str.replace(/([\d]{2})\/([\d]{2})\/([\d]{4})/g, "$3-$1-$1")

"1979-08-08"

## 함수 이름 바꾸기

In [27]:
str = "def get_id(account):\n" +
    "return account['id']";

str.replace(/id/g, "userid")

"def get_userid(account):\nreturn account['userid']"

In [28]:
str = "def get_id(account):\n" +
    "      return account['id']\n" +
    "\n" +
    "  def get_idle_time():\n" +
    "      conf = load_conf('default.conf')\n" +
    "      return conf['idle_time']";

str.replace(/id/g, "userid")

"def get_userid(account):\n      return account['userid']\n\n  def get_useridle_time():\n      conf = load_conf('default.conf')\n      return conf['useridle_time']"

In [29]:
str = "def get_id(account):\n" +
    "      return account['id']\n" +
    "\n" +
    "  def get_idle_time():\n" +
    "      conf = load_conf('default.conf')\n" +
    "      return conf['idle_time']";

str.replace(/([_'])(id)(['(])/g, "$1userid$3")

"def get_userid(account):\n      return account['userid']\n\n  def get_idle_time():\n      conf = load_conf('default.conf')\n      return conf['idle_time']"

## 정규 표현식과 한글과 유니코드

### 한글 일치시키기

In [30]:
str = "안녕하세요? 나부군입니다.";

str.match(/나부군/g)

[Array] ["나부군"]

* `\w,` `\W`: 영문자만 반응한다

In [31]:
str = "국어: 수\n" +
    "영어: 우\n" +
    "수학: 미\n" +
    "미술: 양\n" +
    "체육: 가";

str.match(/(수|우|미|양|가)/g)

[Array] ["수","우","수","미","미","양","가"]

* `[]`는 2바이트 문자를 인지하지 못한다.(? 하는듯 싶다...)

In [32]:
str = "국어: 수\n" +
    "영어: 우\n" +
    "수학: 미\n" +
    "미술: 양\n" +
    "체육: 가";

str.match(/[수우미양가]/g)

[Array] ["수","우","수","미","미","양","가"]

In [33]:
str = "국어: 수\n" +
    "영어: 우\n" +
    "수학: 미\n" +
    "미술: 양\n" +
    "체육: 가";

str.match(/\b(수|우|미|양|가)\b/g)

null

* `\b`, `\B`: 영문자만 반응한다.

In [34]:
str = "국어: 수\n" +
    "영어: 우\n" +
    "수학: 미\n" +
    "미술: 양\n" +
    "체육: 가";

// str.match(/(?<=\s)(수|우|미|양|가)(?=\s)/g)

"국어: 수\n영어: 우\n수학: 미\n미술: 양\n체육: 가"

In [35]:
str = "국어: 수\n" +
    "영어: 우\n" +
    "수학: 미\n" +
    "미술: 양\n" +
    "체육: 가";

str.match(/(\s)(수|우|미|양|가)(\s)/g)

[Array] [" 수\n"," 우\n"," 미\n"," 양\n"]

In [36]:
str = "국어:수\n" +
    "영어:우\n" +
    "수학:미\n" +
    "미술:양\n" +
    "체육:가";

// str.match(/(?<=\s)(수|우|미|양|가)(?=\s)/g)

"국어:수\n영어:우\n수학:미\n미술:양\n체육:가"

In [37]:
str = "국어:수\n" +
    "영어:우\n" +
    "수학:미\n" +
    "미술:양\n" +
    "체육:가";

str.match(/(\s)(수|우|미|양|가)(\s)/g)

null

In [38]:
str = "국어:수\n" +
    "영어:우\n" +
    "수학:미\n" +
    "미술:양\n" +
    "체육:가";

// str.match(/(?<=[^가-힣])(수|우|미|양|가)(?=[^가-힣])/g)

"국어:수\n영어:우\n수학:미\n미술:양\n체육:가"

In [39]:
str = "국어:수\n" +
    "영어:우\n" +
    "수학:미\n" +
    "미술:양\n" +
    "체육:가";

str.match(/([^가-힣])(수|우|미|양|가)([^가-힣])/g)

[Array] [":수\n",":우\n",":미\n",":양\n"]

### 유니코드 일치시키기

In [40]:
str = "Copyright © 2009 인사이트";

str.match(/\u00a9/g)

[Array] ["©"]