/
programkind.go
167 lines (148 loc) · 4.07 KB
/
programkind.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// Copyright 2024 Chainguard, Inc.
// SPDX-License-Identifier: Apache-2.0
package action
import (
"context"
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/chainguard-dev/clog"
"github.com/liamg/magic"
)
var archiveMap = map[string]bool{
".apk": true,
".jar": true,
".tar.gz": true,
".tar.xz": true,
".tar": true,
".tgz": true,
".zip": true,
}
// map from extensions to program kinds.
var extMap = map[string]string{
".7z": "",
".asm": "",
".c": "C source",
".cron": "crontab",
".crontab": "crontab",
".expect": "Expect script",
".html": "",
".jar": "Java program",
".java": "Java source",
".js": "Javascript",
".json": "",
".php": "PHP file",
".pl": "PERL script",
".py": "Python script",
".rb": "Ruby script",
".scpt": "compiled AppleScript",
".scptd": "compiled AppleScript",
".service": "systemd",
".sh": "Shell script",
".ts": "Typescript",
".yaml": "",
".yara": "",
".yml": "",
}
// programKind tries to identify if a path is a program.
func programKind(ctx context.Context, path string) string {
var header [263]byte
logger := clog.FromContext(ctx).With("path", path)
f, err := os.Open(path)
if err != nil {
logger.Error("os.Open", slog.Any("error", err))
return ""
}
defer f.Close()
desc := ""
var headerString string
n, err := io.ReadFull(f, header[:])
switch {
case err == nil || errors.Is(err, io.ErrUnexpectedEOF):
// Read the full buffer, or some bytes, all good
kind, err := magic.Lookup(header[:n])
if err == nil {
desc = kind.Description
} else {
desc = ""
}
headerString = string(header[:n])
case errors.Is(err, io.EOF):
// Nothing was read, so set the buffer so.
desc = ""
headerString = ""
}
// TODO: Is it safe to log unsanitized file stuff?
logger.Debug("magic", slog.String("desc", desc), slog.String("header", headerString), slog.Any("err", err))
// the magic library gets these wrong
if strings.HasSuffix(path, ".json") {
return ""
}
// By Magic
d := strings.ToLower(desc)
if strings.Contains(d, "executable") || strings.Contains(d, "mach-o") || strings.Contains(d, "script") {
return desc
}
// By Filename
switch {
case strings.Contains(path, "systemd"):
return "systemd"
case strings.Contains(path, ".elf"):
return "Linux ELF binary"
case strings.Contains(path, ".xcoff"):
return "XCOFF progam"
case strings.Contains(path, ".dylib"):
return "macOS dynamic library"
case strings.HasSuffix(path, "profile"):
return "Shell script"
}
if found, kind := byExtension(path); found {
return kind
}
// By string match
switch {
case strings.Contains(headerString, "import "):
return "Python script"
case strings.HasPrefix(headerString, "#!/bin/sh") || strings.HasPrefix(headerString, "#!/bin/bash") || strings.Contains(headerString, `echo "`) || strings.Contains(headerString, `if [`) || strings.Contains(headerString, `grep `) || strings.Contains(headerString, "if !"):
return "Shell script"
case strings.HasPrefix(headerString, "#!"):
return "script"
case strings.Contains(headerString, "#include <"):
return "C Program"
}
return ""
}
// byExtension returns true, and descriptive file type if the extension is
// known, and false otherwise.
func byExtension(path string) (bool, string) {
ret, ok := extMap[filepath.Ext(path)]
return ok, ret
}
// getExt returns the extension of a file path
// and attempts to avoid including fragments of filenames with other dots before the extension.
func getExt(path string) string {
base := filepath.Base(path)
// Handle files with version numbers in the name
// e.g. file1.2.3.tar.gz -> .tar.gz
re := regexp.MustCompile(`\d+\.\d+\.\d+$`)
base = re.ReplaceAllString(base, "")
ext := filepath.Ext(base)
if ext != "" && strings.Contains(base, ".") {
parts := strings.Split(base, ".")
if len(parts) > 2 {
subExt := fmt.Sprintf(".%s%s", parts[len(parts)-2], ext)
if isValidExt := func(ext string) bool {
_, ok := archiveMap[ext]
return ok
}(subExt); isValidExt {
return subExt
}
}
}
return ext
}