forked from linux4ever07/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rm_dup_lines.sh
executable file
·59 lines (41 loc) · 1.29 KB
/
rm_dup_lines.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
# This script removes duplicate lines from IRC logs in the current
# directory.
konversation_regex='^\[[[:alpha:]]+, [[:alpha:]]+ [0-9]+, [0-9]+\] \[[0-9]+:[0-9]+:[0-9]+ [[:alpha:]]+ [[:alpha:]]+\]'
irssi_regex='^[0-9]+:[0-9]+'
hexchat_regex='^[[:alpha:]]+ [0-9]+ [0-9]+:[0-9]+:[0-9]+'
dn="/dev/shm/rm_dup_lines-${RANDOM}-${RANDOM}"
mkdir "$dn"
mapfile -t files < <(find . -type f -iname "*.log" -o -iname "*.txt" 2>&-)
for (( i = 0; i < ${#files[@]}; i++ )); do
fn="${files[${i}]}"
bn=$(basename "$fn")
fn_out="${dn}/${bn%.[^.]*}-${RANDOM}-${RANDOM}.log"
touch "$fn_out"
unset -v previous
mapfile -t lines <"$fn"
for (( j = 0; j < ${#lines[@]}; j++ )); do
line="${lines[${j}]}"
unset -v line_tmp
if [[ $line =~ $konversation_regex ]]; then
line_tmp=$(sed -E "s/${konversation_regex}//" <<<"$line")
elif [[ $line =~ $irssi_regex ]]; then
line_tmp=$(sed -E "s/${irssi_regex}//" <<<"$line")
elif [[ $line =~ $hexchat_regex ]]; then
line_tmp=$(sed -E "s/${hexchat_regex}//" <<<"$line")
fi
if [[ -z $line_tmp ]]; then
line_tmp="$line"
fi
if [[ $j -ge 1 ]]; then
if [[ $line_tmp == "$previous" ]]; then
continue
fi
fi
previous="$line_tmp"
printf '%s\n' "$line" >> "$fn_out"
done
touch -r "$fn" "$fn_out"
mv "$fn_out" "$fn"
done
rm -rf "$dn"