/
03-satsuma.sh
34 lines (23 loc) · 1.64 KB
/
03-satsuma.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash
# Running Satsuma Synteny on NBW genome using the blue whale genome as model (from NCBI)
# download
#wget ftp://ftp.broadinstitute.org/distribution/software/spines/satsuma-3.0.tar.gz
#tar xvzf satsuma-3.0.tar.gz
# Because genome is larger than 1.5GB, going to do chromosome by chromosome
# t=target genome fasta; q=query genome fasta; n=number of cpus; o=output directory
cd /home/degreefe/NBW/reference/satsuma
model_ref=/home/degreefe/NBW/reference/bluewhale/bluewhale_chr1.fasta
NBW_genome=/home/degreefe/NBW/reference/Northern_bottlenose_whale_051018_shortLabel.fasta
out_dir=satsuma_out_chr1
block_display_out=BlockDisplaySatsuma_chr1
# run SatsumaSynteny
/home/degreefe/programs/satsuma-code-0/SatsumaSynteny -t $model_ref -q $NBW_genome -n 12 -o $out_dir
# convert satsuma summary output (satsuma_summary.chained.out) to MizBee format
/home/degreefe/programs/satsuma-code-0/BlockDisplaySatsuma -i $out_dir/satsuma_summary.chained.out -t $model_ref -q $NBW_genome > $block_display_out
# I ended up using the satsuma_summary.chained outputs into R, but below are some old notes for editing the blockdisplay outputs to run in MizBee:
# may need to adjust chr name from target/model ref so it says "chr1", or "chr2" etc, instead of "NC_......."
# running this part manually on command line for each chr b/c each one has different names
# can also use sed to extract full chr name on line 3:
#sed -n '3p' BlockDisplaySatsuma_chr1 | awk '{print $1}'
# Then replace the name with "chr#"
#sed -i 's/NC_045785.1_Balaenoptera_musculus_isolate_JJ_BM4_2016_0621_chromosome_1,_mBalMus1.pri.v3,_whole_genome_shotgun_sequence/chr1/g' BlockDisplaySatsuma_chr1