forked from linux4ever07/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hdd_dump.sh
executable file
·174 lines (127 loc) · 4.12 KB
/
hdd_dump.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/bin/bash
# This script will look for all files in directory given as first
# argument, sort them by smallest > largest, and put that list in an
# array. We will then go through that array and copy each file one by
# one to the output directory. The script will check the MD5 hashes of
# all the files to avoid copying duplicates (in order to save space in
# the output directory).
# This script can be useful when dumping the content of failing hard
# drives or broken partitions. The script outputs a list of files that
# were copied, and a list of files that couldn't be copied, in the
# output directory.
# Since the script copies the smallest files first, the highest possible
# number of files will be copied (preferably all of the files). This is
# because smaller files are faster to read / write, and there's
# statistically a smaller chance of a bad block / sector hitting a small
# file. By copying the smaller files first, if the hard drive really is
# about to fail, the largest possible number of files will be copied.
# If the script has problems reading a file, it will retry reading it a
# maximum of 10 times, 5 times to check the MD5 hash, and 5 times to
# copy the file.
# Permissions and modification dates of the input files are preserved in
# the output files by the script.
set -o pipefail
usage () {
printf '\n%s\n\n' "Usage: $(basename "$0") [in_dir] [out_dir]"
exit
}
# If the script isn't run with sudo / root privileges, then quit.
if [[ $(whoami) != 'root' ]]; then
printf '\n%s\n\n' 'You need to be root to run this script!'
exit
fi
if [[ ! -d $1 || -z $2 ]]; then
usage
elif [[ -f $2 ]]; then
printf '\n%s\n\n' "\"${2}\" is a file!"
exit
fi
session="${RANDOM}-${RANDOM}"
in_dir=$(readlink -f "$1")
out_dir=$(readlink -f "$2")
declare -A md5s
cp_log="${out_dir}/hdd_dump_copied-${session}.txt"
error_log="${out_dir}/hdd_dump_errors-${session}.txt"
regex_du='^([0-9]+)([[:blank:]]+)(.*)$'
mkdir -p "$out_dir" || exit
used=$(du --summarize --block-size=1 "$in_dir" | grep -Eo '^[0-9]+')
free=$(df --output=avail --block-size=1 "$out_dir" | tail -n +2 | tr -d '[:blank:]')
if [[ $used -gt $free ]]; then
diff=$(( used - free ))
cat <<USED
Not enough free space in:
${out_dir}
Difference is ${diff} bytes.
USED
exit
fi
# The 'md5copy' function checks the MD5 hash of the input file, and
# tries to copy the file. It will try 5 times in total, both for getting
# the MD5 hash, and for copying the file, sleeping 10 seconds between
# each try.
md5copy () {
if_tmp="$1"
of_tmp="$2"
declare md5_if
for n in {1..5}; do
md5_if=$(md5sum -b "$if_tmp" 2>&-)
exit_status="$?"
md5_if="${md5_if%% *}"
if [[ $exit_status -eq 0 ]]; then
if [[ ${md5s[${md5_if}]} -eq 1 ]]; then
return
fi
else
if [[ $n -eq 5 ]]; then
printf '%s\n' "$if_tmp" >> "$error_log"
return
fi
sleep 1
fi
done
md5s["${md5_if}"]=1
printf '%s' "copying: ${if_tmp}... "
for n in {1..5}; do
cp -p "$if_tmp" "$of_tmp" 2>&-
exit_status="$?"
if [[ $exit_status -eq 0 ]]; then
printf '%s\n' 'done'
printf '%s\n' "$if_tmp" >> "$cp_log"
return
else
if [[ $n -eq 5 ]]; then
printf '%s\n' 'error'
printf '%s\n' "$if_tmp" >> "$error_log"
if [[ -f $of_tmp ]]; then
rm -f "$of_tmp" 2>&-
fi
return
fi
sleep 1
fi
done
}
touch "$cp_log" "$error_log"
mapfile -d'/' -t dn_parts <<<"$in_dir"
dn_parts[-1]="${dn_parts[-1]%$'\n'}"
start="${#dn_parts[@]}"
mapfile -t files < <(find "$in_dir" -type f -exec du -b {} + 2>&- | sort -n | sed -E "s/${regex_du}/\3/")
for (( i = 0; i < ${#files[@]}; i++ )); do
if="${files[${i}]}"
# Removes the directory name from the beginning of the string. Creating
# the basename this way because it's more safe than using regex:es, if
# the string contains weird characters (that are interpreted as part of
# the regex).
mapfile -d'/' -t fn_parts <<<"$if"
fn_parts[-1]="${fn_parts[-1]%$'\n'}"
stop=$(( (${#fn_parts[@]} - ${#dn_parts[@]}) - 1 ))
dn=$(printf '/%s' "${fn_parts[@]:${start}:${stop}}")
dn="${dn:1}"
bn="${fn_parts[-1]}"
of_dn="${out_dir}/${dn}"
of="${of_dn}/${bn}"
mkdir -p "$of_dn" || exit
if [[ ! -f $of ]]; then
md5copy "$if" "$of"
fi
done