-
Notifications
You must be signed in to change notification settings - Fork 1
/
mkPopFile
executable file
·64 lines (51 loc) · 1.65 KB
/
mkPopFile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
set -u # Check for undefined variables
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o nounset # Exposes unset variables
set -eo pipefail # Fail fast and be aware of exit codes
print_basic_help() {
local program_name=$(basename "$0")
printf "Usage: %s <ped_filename> <species_name>\n" "$program_name"
}
print_usage() {
cat << EOF
Generate a population file suitable for Structure_threader analysis
EOF
print_basic_help
cat << EOF
The options should specify
- The PED file name WITHOUT the .ped extension
- The species name valid for PLINK input
- The output file is named popfile
This software is licensed under the MIT License.
EOF
}
writePopFile() {
local inped="$1"
local species="$2"
local popfile="popfile"
local index=1
for fam in $(cut -f 1 "$inped".ped | uniq); do
tmpFilePrefix="$inped"_$fam
# Generate a .log file for each family with count information
# It could be done without PLINK but there is no guarantee that pedigree/individual information is sorted
echo $fam | plink \
--file "$inped" \
--out "$tmpFilePrefix" \
--"$species" \
--make-bed \
--keep-fam /dev/stdin
# Extract number of individuals remaining from the log file
reminds=$(grep "^\--keep-fam" "$tmpFilePrefix".log | tr -dc '0-9')
# Write population file line as described in https://github.com/StuntsPT/Structure_threader/blob/master/docs/usage.md
echo -e "$fam\t$reminds\t$index" >> "$popfile"
# Remove the temporary files
rm -f "$tmpFilePrefix".{log,bed,bim,fam,nosex}
index=$((index+1))
done
}
main() {
[[ $# -ne 2 ]] && { print_usage; exit 1; }
writePopFile "$@"
}
main "$@"