-
Notifications
You must be signed in to change notification settings - Fork 1
/
randomMerge.sh
executable file
·70 lines (58 loc) · 1.5 KB
/
randomMerge.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Randomly merge multiple files
# e.g. for each line randomly pick a version of that line from one of the files
# Files must be of equal length
if [ "$#" -lt 1 ]; then
echo "Usage: $0 [FILES]"
exit 1
fi
inputFiles=("$@")
numLines=$(wc -l "${inputFiles[0]}" | awk "{print \$1}")
numFiles=${#inputFiles[@]}
get_fair_partition()
{
# Divide len(shuffledLines) as evenly as possible across $numFiles
idx=$1
var=${#lineIdxs[@]}
slots=$numFiles
result=$((var / slots))
k=$((var % slots ))
for ((i=0; i<k; i++)); do
partitionLen[i]=$(( result + 1 ))
done
for ((i=k; i < slots; i++)); do
partitionLen[i]=$result
done
#Get cumulative sum
sum=0
for ((i=0; i < idx; i++)); do
sum=$(($sum+${partitionLen[$i]}))
done
echo $sum
}
get_split()
{
index=$1
startPoint=$(get_fair_partition $index)
endPoint=`get_fair_partition $(($index+1))`
#echo "$startPoint to $endPoint"
outputStr=""
for ((i=$startPoint; i < $endPoint; i++)); do
l=${lineIdxs[$i]}
outputStr+="-e ${l}p "
done
echo "$outputStr"
}
get_seeded_random()
{
seed="$1"
openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
</dev/zero 2>/dev/null
}
# Get a randomly shuffled array from 1 - numlines
readarray -t lineIdxs < <(shuf -i1-$numLines --random-source=<(get_seeded_random 42))
j=0
for f in "${inputFiles[@]}"
do
cat -n $f | sed -n `get_split $j`
j=$((j+1))
done | sort -n -k 1 | cut -f 2- #Sort back into order at the end