Skip to content

Commit

Permalink
close #7
Browse files Browse the repository at this point in the history
  • Loading branch information
deemakuzovkin committed Sep 2, 2021
1 parent 6e93657 commit 5f8a231
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 38 deletions.
45 changes: 9 additions & 36 deletions cmd/distr/wscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,22 @@ package main
import (
"flag"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/digital-technology-agency/web-scan/pkg/models"
"github.com/digital-technology-agency/web-scan/pkg/services/generators"
"github.com/digital-technology-agency/web-scan/pkg/services/json"
"github.com/digital-technology-agency/web-scan/pkg/services/page"
"github.com/digital-technology-agency/web-scan/pkg/utils"
"github.com/zenthangplus/goccm"
"net/http"
"runtime"
)

var (
coreCount = flag.String(`core_count`, "1", `Example 1`)
alphabet = flag.String(`alphabet`, "", `Example abcdefg`)
urlLen = flag.String(`len`, "", `Example 2`)
concurrencyCount = flag.String(`concurrency`, "10", `Example 10`)
concurrencyCount = flag.String(`concurrency`, "5", `Example 5`)
protocols = []string{"http", "https"}
)

func genWritersProtocols(names []string) map[string]*json.EachRowWriter {
result := map[string]*json.EachRowWriter{}
for _, name := range names {
writer, _ := json.NewEachRowWriter(fmt.Sprintf("%s.txt", name))
result[name] = writer
}
return result
}

func main() {
flag.Parse()
/*check flags*/
Expand All @@ -45,38 +34,25 @@ func main() {
Alphabet: *alphabet,
Len: utils.Int(*urlLen),
}
protocolWriters := genWritersProtocols(protocols)
protocolWriters := json.NewEachRowWriters(protocols)
for domenName := range gen.Gen() {
cuncurency.Wait()
total += 1
for _, protokol := range protocols {
go func(protokol, domen string, w *json.EachRowWriter) {
defer cuncurency.Done()
url := fmt.Sprintf("%s://%s.ru", protokol, domen)
res, err := http.Get(url)
if err != nil {
fmt.Printf("Err:[%s]\n", err.Error())
return
pageService := page.PageService{
Url: url,
}
defer res.Body.Close()
if res.StatusCode != 200 {
fmt.Printf("Status code [%d] error [%s]", res.StatusCode, res.Status)
item, err := pageService.ReadPage()
if err != nil {
return
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
fmt.Printf("Err:[%s]\n", err.Error())
if item == nil {
fmt.Printf("Page is nil\n")
return
}
item := models.Page{}
doc.Find("title").Each(func(i int, s *goquery.Selection) {
item.Title = s.Text()
})
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
if s.AttrOr("name", "") == "description" {
item.Description = s.AttrOr("content", "")
}
})
err = w.WriteLine(item)
if err != nil {
fmt.Printf("Write line err:[%s]\n", err.Error())
Expand All @@ -88,7 +64,4 @@ func main() {
}
cuncurency.WaitAllDone()
println(fmt.Sprintf("Total size:[%d] Result:[%d]", total, domenNames))
/* for key, value := range list {
fmt.Printf("Domen:[%s] Title:[%s] Description:[%s]\n", key, value.Title, value.Description)
}*/
}
6 changes: 4 additions & 2 deletions pkg/models/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package models

/*Page type of page*/
type Page struct {
Title string
Description string
Title string `json:"title"`
Description string `json:"description"`
Url string `json:"url"`
Robots string `json:"robots"`
}
10 changes: 10 additions & 0 deletions pkg/services/json/file-each-row-writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ type EachRowWriter struct {
file *os.File
}

// NewEachRowWriters create new writers.
func NewEachRowWriters(names []string) map[string]*EachRowWriter {
result := map[string]*EachRowWriter{}
for _, name := range names {
writer, _ := NewEachRowWriter(fmt.Sprintf("%s.txt", name))
result[name] = writer
}
return result
}

// NewEachRowWriter new writer.
func NewEachRowWriter(path string) (*EachRowWriter, error) {
create, err := os.Create(path)
Expand Down
62 changes: 62 additions & 0 deletions pkg/services/page/page-service.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package page

import (
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/digital-technology-agency/web-scan/pkg/models"
"io/ioutil"
"net/http"
)

// PageService page service.
type PageService struct {
Url string
}

// ReadPage read page.
func (s PageService) ReadPage() (*models.Page, error) {
url := s.Url
item := models.Page{
Url: url,
}
res, err := http.Get(url)
if err != nil {
fmt.Printf("Err:[%s]\n", err.Error())
return nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
fmt.Printf("Status code [%d] error [%s]", res.StatusCode, res.Status)
return nil, err
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
fmt.Printf("Err:[%s]\n", err.Error())
return nil, err
}
doc.Find("title").Each(func(i int, s *goquery.Selection) {
item.Title = s.Text()
})
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
if s.AttrOr("name", "") == "description" {
item.Description = s.AttrOr("content", "")
}
})
urlRobotTxt := fmt.Sprintf("%s/robots.txt", url)
resRobots, err := http.Get(urlRobotTxt)
if err != nil {
fmt.Printf("Err:[%s]\n", err.Error())
return nil, err
}
if resRobots.StatusCode != 200 {
fmt.Printf("Robots txt. Status:[%d]\n", resRobots.StatusCode)
return nil, err
}
allBytesRobotsTxt, err := ioutil.ReadAll(resRobots.Body)
if err != nil {
fmt.Printf("Err:[%s]\n", err.Error())
return nil, err
}
item.Robots = string(allBytesRobotsTxt)
return &item, nil
}

0 comments on commit 5f8a231

Please sign in to comment.