forked from AlexsLemonade/OpenPBTA-analysis
-
Notifications
You must be signed in to change notification settings - Fork 13
/
run-for-subtyping.sh
126 lines (97 loc) · 3.5 KB
/
run-for-subtyping.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/bin/sh
set -e
set -o pipefail
printf "Start molecular subtyping...\n\n"
# Set locations for s3 bucket that contains release file
URL="s3://d3b-openaccess-us-east-1-prd-pbta/open-targets"
RELEASE="v14"
# Set the working directory to the directory of this file
cd "$(dirname "${BASH_SOURCE[0]}")"
# Get base directory of project
cd ..
BASEDIR="$(pwd)"
cd -
analyses_dir="$BASEDIR/analyses"
data_dir="$BASEDIR/data/$RELEASE"
scratch_dir="$BASEDIR/scratch"
# Compile all the files that need to be included in the release in one place
# in the scratch directory
release_dir="${scratch_dir}/analysis-files-pre-release"
mkdir -p ${release_dir}
## Run subtyping modules
# Run MB subtyping
echo "Run MB subtyping"
cd ${analyses_dir}/molecular-subtyping-MB
bash run-molecular-subtyping-mb.sh
# Run CRANIO subtyping
echo "Run CRANIO subtyping"
cd ${analyses_dir}/molecular-subtyping-CRANIO
bash run-molecular-subtyping-cranio.sh
# Run EPN subtyping
echo "Run EPN subtyping"
cd ${analyses_dir}/molecular-subtyping-EPN
bash run-molecular-subtyping-EPN.sh
# Run Embryonal subtyping
echo "Run Embryonal subtyping"
cd ${analyses_dir}/molecular-subtyping-embryonal
bash run-embryonal-subtyping.sh
# Run chordoma subtyping
echo "Run chordoma subtyping"
cd ${analyses_dir}/molecular-subtyping-chordoma
bash run-molecular-subtyping-chordoma.sh
# Run EWS subtyping
echo "Run EWS subtyping"
cd ${analyses_dir}/molecular-subtyping-EWS
bash run_subtyping.sh
# Run neurocytoma subtyping
echo "Run neurocytoma subtyping"
cd ${analyses_dir}/molecular-subtyping-neurocytoma
bash run_subtyping.sh
# Run ATRT subtyping
echo "Run ATRT subtyping"
cd ${analyses_dir}/molecular-subtyping-ATRT
bash run-molecular-subtyping-ATRT.sh
# Run PB subtyping
echo "Run PB subtyping"
cd ${analyses_dir}/molecular-subtyping-PB
bash run-molecular-subtyping-PB.sh
# Run HGG subtyping
echo "Run HGG subtyping"
cd ${analyses_dir}/molecular-subtyping-HGG
bash run-molecular-subtyping-HGG.sh
# Run LGAT subtyping
echo "Run LGAT subtyping"
cd ${analyses_dir}/molecular-subtyping-LGAT
bash run_subtyping.sh
# Run NBL subtyping
echo "Run NBL subtyping"
cd ${analyses_dir}/molecular-subtyping-NBL
bash run-molecular-subtyping-NBL.sh
# Run compile subtyping
echo "Run compile subtyping"
cd ${analyses_dir}/molecular-subtyping-pathology
bash run-subtyping-aggregation.sh
# Run integrate subtyping
echo "Run integrate subtyping"
cd ${analyses_dir}/molecular-subtyping-integrate
bash run-subtyping-integrate.sh
# Copy over integrated subtyping - the *FULL* histology file
cp ${analyses_dir}/molecular-subtyping-integrate/results/histologies.tsv ${data_dir}
cp ${analyses_dir}/molecular-subtyping-integrate/results/histologies.tsv ${release_dir}
# Create the independent sample list using the *FULL* histology file (i.e. - histologies.tsv)
echo "Create independent sample list"
cd ${analyses_dir}/independent-samples
bash run-independent-samples.sh
# Copy over independent specimen lists
cp ${analyses_dir}/independent-samples/results/independent-specimens.* ${data_dir}
cp ${analyses_dir}/independent-samples/results/independent-specimens.* ${release_dir}
# Create an md5sum file for all the files in the directories where the analysis
# files are compiled
cd ${release_dir}
# Remove old md5sum release file if it exists
rm -f analysis_files_release_md5sum.txt
# Create a new md5sum release file
md5sum * > analysis_files_release_md5sum.txt
# Upload all release files s3 bucket in their respective folders
#aws s3 cp ${release_dir}/ $URL/$RELEASE/ --recursive
printf "\nDone running molecular subtyping...\n\n"