Skip to content

Commit

Permalink
har har har!
Browse files Browse the repository at this point in the history
  • Loading branch information
arp242 committed Feb 11, 2020
0 parents commit 4561ebc
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
*.har
12 changes: 12 additions & 0 deletions README.markdown
@@ -0,0 +1,12 @@
[![GoDoc](https://godoc.org/github.com/arp242/har?status.svg)](https://pkg.go.dev/arp242.net/har)

Read [HAR](https://en.wikipedia.org/wiki/HAR_(file_format)) ("HTTP Archive
format") archives.

Use `har.FromFile("path.har")` to read in to a `HAR` struct. You can then use
`Extract()`.

See `./cmd/unhar` for an example.

Install with `go get arp242.net/har/cmd/unhar`, which will put the binary at
`~/go/bin/unhar`.
29 changes: 29 additions & 0 deletions cmd/unhar/main.go
@@ -0,0 +1,29 @@
package main

import (
"fmt"
"os"

"arp242.net/har"
)

func main() {
if len(os.Args) == 1 {
fmt.Fprintln(os.Stderr, "unhar: must give at least one filename")
os.Exit(1)
}

for _, f := range os.Args[1:] {
h, err := har.FromFile(f)
if err != nil {
fmt.Fprintf(os.Stderr, "unhar: reading %q: %s\n", f, err)
os.Exit(1)
}

err = h.Extract(false)
if err != nil {
fmt.Fprintf(os.Stderr, "unhar: extracting %q: %s\n", f, err)
os.Exit(1)
}
}
}
3 changes: 3 additions & 0 deletions go.mod
@@ -0,0 +1,3 @@
module arp242.net/har

go 1.13
183 changes: 183 additions & 0 deletions har.go
@@ -0,0 +1,183 @@
package har

import (
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
)

// Har archive.
type Har struct {
File string `json:"-"` // File this was read from.

Log struct {
Version string `json:"version"` // "1.2"
Creator struct {
Name string `json:"name"` // "Firefox"
Version string `json:"version"` // "72.0.2"
} `json:"creator"`
Browser struct {
Name string `json:"name"` // "Firefox"
Version string `json:"version"` // "72.0.2"
} `json:"browser"`
Pages []struct {
StartedDateTime time.Time `json:"startedDateTime"`
ID string `json:"id"` // "page_1"
PageTimings struct {
OnContentLoad int `json:"onContentLoad"`
OnLoad int `json:"onLoad"`
} `json:"pageTimings"`
} `json:"pages"`

Entries []Entry `json:"entries"`
} `json:"log"`
}

type Entry struct {
PageRef string `json:"pageRef"` // "page_1"
StartedDateTime time.Time `json:"startedDateTime"`

Request struct {
BodySize int `json:"bodySize"`
HeadersSize int `json:"headersSize"`
Method string `json:"method"`
URL string `json:"url"` // "http://localhost/..."
HTTPVersion string `json:"httpVersion"` // "HTTP/1.1"
Headers []struct {
Key string `json:"key"`
Value string `json:"value"`
} `json:"headers"`
Cookies []struct {
Key string `json:"key"`
Value string `json:"value"`
} `json:"headers"`
QueryString []struct {
Key string `json:"key"`
Value string `json:"value"`
} `json:"queryString"`
} `json:"request"`

Response struct {
HeadersSize int `json:"headersSize"`
BodySize int `json:"bodySize"`
Status int `json:"status"`
StatusText string `json:"statusText"` // "OK"
HTTPVersion string `json:"httpVersion"` // "HTTP/1.1"
RedirectURL string `json:"redirectURL"`
Headers []struct {
Key string `json:"key"`
Value string `json:"value"`
} `json:"headers"`
Cookies []struct {
Key string `json:"key"`
Value string `json:"value"`
} `json:"headers"`
Content struct {
Encoding string `json:"encoding"`
Size int `json:"size"`
Text string `json:"text"`
} `json:"content"`
} `json:"response"`

Cache struct {
// TODO
} `json:"cache"`

Timings struct {
Blocked int `json:"blocked"`
DNS int `json:"dns"`
Connect int `json:"connect"`
SSL int `json:"ssl"`
Send int `json:"send"`
Wait int `json:"wait"`
Receive int `json:"receive"`
} `json:"timings"`

Time int `json:"time"`
SecurityState string `json:"_securityState"` // TODO
ServerIPAddress string `json:"serverIPAddress"` // "::1"
Connection string `json:"connection"` // "80"
}

// FromFile reads a file in to a Har struct.
func FromFile(f string) (*Har, error) {
fp, err := os.Open(f)
if err != nil {
return nil, fmt.Errorf("open: %w", err)
}

data, err := ioutil.ReadAll(fp)
fp.Close()
if err != nil {
return nil, fmt.Errorf("read: %w", err)
}

var h Har
err = json.Unmarshal(data, &h)
if err != nil {
return nil, fmt.Errorf("json: %w", err)
}

h.File = f
return &h, nil
}

// Extract all the files.
func (h *Har) Extract(verbose bool) error {
root := filepath.Base(h.File)
if ext := filepath.Ext(root); ext != "" {
root = root[:len(root)-len(ext)]
}

for _, e := range h.Log.Entries {
path := root + "/" + strings.TrimPrefix(strings.TrimPrefix(e.Request.URL, "http://"), "https://")
if verbose {
fmt.Println(" ", path)
}

for _, h := range e.Response.Headers {
if h.Key == "Content-Disposition" {
for _, v := range strings.Split(h.Value, ";") {
v := strings.TrimSpace(v)
if strings.HasPrefix(v, "filename=") {
v = filepath.Clean(strings.Trim(v, `"`))
v = strings.ReplaceAll(v, "/", "")
path = filepath.Dir(path) + "/" + v
}
}
break
}
}
if strings.HasSuffix(path, "/") {
path += "index.html"
}

err := os.MkdirAll(filepath.Dir(path), 0755)
if err != nil {
return fmt.Errorf("mkdir: %w", err)
}

var resp []byte
if e.Response.Content.Encoding == "base64" {
resp, err = base64.StdEncoding.DecodeString(e.Response.Content.Text)
if err != nil {
return fmt.Errorf("base64: %w", err)
}
} else {
resp = []byte(e.Response.Content.Text)
}

err = ioutil.WriteFile(path, resp, 0644)
if err != nil {
return fmt.Errorf("write: %w", err)
}
}

fmt.Printf("Extracted %d files\n", len(h.Log.Entries))
return nil
}

0 comments on commit 4561ebc

Please sign in to comment.