Skip to content

Latest commit



198 lines (161 loc) · 7.49 KB

File metadata and controls

198 lines (161 loc) · 7.49 KB

7.3 正则处理




7.3.1 正则匹配


func Match(pattern string, b []byte) (matched bool, error error)
func MatchReader(pattern string, r io.RuneReader) (matched bool, error error)
func MatchString(pattern string, s string) (matched bool, error error)


//IsIp 判断输出值是否为Ip地址格式(仅判断位数)
func IsIp(s string) bool {
	ipPattern := "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$"
	m, err := regexp.MatchString(ipPattern, s)
	myutils.CheckError(err, "Regexp error")
	return m
func main() {
	ip1 := ""
	ip2 := "dada513123"
	fmt.Println(ip1, " : ", regexpnote.IsIp(ip1))
	fmt.Println(ip2, " : ", regexpnote.IsIp(ip2))
}  :  true
dada513123  :  false

7.3.2 通过正则获取内容

func SimpleCrawler() {
	//Send Get request
	resp, err := http.Get("")
	myutils.CheckError(err, "Http Get Error")
	//Close Resp body
	defer resp.Body.Close()
	//Read resp data
	data, err := ioutil.ReadAll(resp.Body)
	myutils.CheckError(err, "Read data error")
	src := string(data)
	log.Println("Response : ", src)
	reg, _ := regexp.Compile(`<[\S\s]+?>`)
	src = reg.ReplaceAllStringFunc(src, strings.ToLower)
	reg, _ = regexp.Compile(`<style[\S\s]+?</style>`)
	src = reg.ReplaceAllString(src, "")
	reg, _ = regexp.Compile(`<script[\s\S]+?</script>`)
	src = reg.ReplaceAllString(src, "")
	reg, _ = regexp.Compile(`<[\S\s]+?>`)
	src = reg.ReplaceAllString(src, "\n")
	reg, _ = regexp.Compile(`\s{2,}`)
	src = reg.ReplaceAllString(src, "\n")

	log.Println("Response : ", src)
	myutils.WriteDataToFile("../resources/tmp/baidu.txt", src)


func Compile(expr string) (*Regexp, error)
func CompilePOSIX(expr string) (*Regexp, error)
func MustCompile(str string) *Regexp
func MustCompilePOSIX(str string) *Regexp


前缀为Must的函数,正则表达式不合法是直接panic,无Must前缀的会返回错误 搜索函数


func (re *Regexp) Find(b []byte) []byte
func (re *Regexp) FindAll(b []byte, n int) [][]byte
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int
func (re *Regexp) FindAllString(s string, n int) []string
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int
func (re *Regexp) FindIndex(b []byte) (loc []int)
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int)
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int
func (re *Regexp) FindString(s string) string
func (re *Regexp) FindStringIndex(s string) (loc []int)
func (re *Regexp) FindStringSubmatch(s string) []string
func (re *Regexp) FindStringSubmatchIndex(s string) []int
func (re *Regexp) FindSubmatch(b []byte) [][]byte
func (re *Regexp) FindSubmatchIndex(b []byte) []int

18个函数我们根据输入源(byte slice、string和io.RuneReader)不同还可以继续简化成如下几个,其他的只是输入源不一样,其他功能基本是一样的

func (re *Regexp) Find(b []byte) []byte
func (re *Regexp) FindAll(b []byte, n int) [][]byte
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int
func (re *Regexp) FindIndex(b []byte) (loc []int)
func (re *Regexp) FindSubmatch(b []byte) [][]byte
func (re *Regexp) FindSubmatchIndex(b []byte) []int


func RegexpFind() {
	//Source String
	s := "Hello World Fuck World"
	src := []byte(s)

	//Regular Expression
	reg, err := regexp.Compile(`[A-Za-z]{2,4}`)
	myutils.CheckError(err, "Regexp error")

	first := reg.Find(src)
	log.Printf("first : %s \n", first)
	all := reg.FindAll(src, -1)
	log.Printf("All: %s \n", all)
	fi := reg.FindIndex(src)
	log.Printf("First index: %v", fi)
	ai := reg.FindAllIndex(src, -1)
	log.Printf("All index: %v", ai)

	reg2, err := regexp.Compile(`He(.*)Wo(.*)`)
	myutils.CheckError(err, "Regexp error")

	submatch := reg2.FindSubmatch(src)
	log.Printf("Submatch %v \n", submatch)
	for _, v := range submatch {
		log.Println("\t", string(v))

	si := reg2.FindSubmatchIndex(src)
	log.Printf("Submatch index %v \n", si)

	as := reg2.FindAllSubmatch(src, -1)
	log.Printf("All Submatch  %s \n", as)

} 匹配函数


func (re *Regexp) Match(b []byte) bool
func (re *Regexp) MatchReader(r io.RuneReader) bool
func (re *Regexp) MatchString(s string) bool 替换函数

func (re *Regexp) ReplaceAll(src, repl []byte) []byte
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte
func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string
func (re *Regexp) ReplaceAllString(src, repl string) string
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string