/
sanitize_file.sh
executable file
·94 lines (82 loc) · 1.36 KB
/
sanitize_file.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/bin/sh
. "${0%/*}/common.in"
set -e
utf8() {
local enc="$1" f="$2"
rm -f "$f~"
if iconv -f "$enc" -t "utf8" -o "$f~" "$f" 2> /dev/null; then
if [ -s "$f~" ]; then
echo "iconv: $f: $enc -> UTF-8"
mv "$f~" "$f"
return 0
fi
fi
# echo "E: iconv: $f: not $enc!" >&2
rm -f "$f~"
return 1
}
sanitize_text_file() {
local f="$f" ok=
local enc=$(file --mime-encoding "$f" | cut -d: -f2 | cut -c2-) enca=
cp "$f" "$f.orig"
# encoding
case "$enc" in
us-ascii|utf-8) ;;
*)
enca=$(enca -L zh -i -- "$f") || true
case "$enca" in
GBK|BIG5)
utf8 "$enca" "$f"
;;
*)
ok=
for x in GBK GB18030 GB2312; do
if utf8 "$x" "$f"; then
ok=1; break;
fi
done
if [ -z "$ok" ]; then
err "E: $f: unrecognized ($enc:$enca)"
fi
;;
esac
esac
fromdos "$f"
if $force || ! cmp "$f" "$f.orig" > /dev/null; then
sed -i 's,[ \t]\+$,,' "$f"
fi
rm "$f.orig"
}
sanitize() {
local f="$1"
if [ -L "$f" -o ! -e "$f" ]; then
return
fi
case "$f" in
build*.sh|*/build*.sh)
chmod 0755 "$f" ;;
*)
chmod 0644 "$f" ;;
esac
if [ -n "$(textfile "$f")" ]; then
sanitize_text_file "$f"
else
type="$(file -i "$f" | cut -d: -f2 | cut -c2-)"
err "E: $f: $type"
fi
}
if [ "x$1" = "x-f" ]; then
force=true
shift
else
force=false
fi
if [ $# -eq 0 ]; then
while read f; do
sanitize "$f"
done
else
for f; do
sanitize "$f"
done
fi