# This notebook describes the analysis of the TB dataset

In [None]:
# Convert biom file to genus file
# Copied from /sc/arion/projects/clemej05a/adam/downloads/southafrica_longitudinal_tb/ 
# to
# /sc/arion/projects/clemej05a/hilary/longitudinal_tb

# Generate taxa summaries - performed by Adam in Qiime2. Final relative abundance artifact: relative_taxa_table_L6.qza
# Unzipped
mkdir relative_taxa_table_L6
unzip relative_taxa_table_L6.qza
mv 7c7c0ad1-cda3-47af-8bdb-90454df84550 /relative_taxa_table_L6/

# On Minerva
source activate qiime2-2020.8.0

cd 7c7c0ad1-cda3-47af-8bdb-90454df84550/data/
biom convert -i feature-table.biom -o feature-table-all-L6.tsv --to-tsv
cp feature-table-all-L6.tsv /sc/arion/projects/clemej05a/hilary/longitudinal_tb/

In [None]:
# Convert genus level table to trajectories

python sc_make_tb_trajs_Adam_relabd_parse-revamp.py

# Data file: feature-table-all-L6.tsv
# Metadata file: 20211117.bar.map.tsv

# Final trajectories saved in folder /taxa_trajs/time-corrected-trajs/
# These trajs transfered to Minerva for clustering into /sc/arion/projects/clemej05a/hilary/longitudinal_tb/time-corrected-trajs/

In [None]:
# Cluster trajectories

# Make cluster commands
python sc_create_j_clust.py -i "/sc/arion/projects/clemej05a/hilary/longitudinal_tb/time-corrected-trajs/" -b "/sc/arion/projects/clemej05a/hilary/longitudinal_tb/" -s "/sc/arion/projects/clemej05a/hilary/repos/loclust/scripts" -o TB_relabd_L6_commands_for_jobs.txt
# Make and submit jobs
python generate_lsf_loclust.py -e loclust3pt8 -c TB_relabd_L6_commands_for_jobs.txt -o /sc/arion/projects/clemej05a/hilary/longitudinal_tb/jobs/ -py /hpc/users/monach01/.conda/envs/loclust3pt8/lib/python3.8/ --submit

In [None]:
# Calculate silhouette scores
python sc_stdout_parse.py -i '/sc/arion/projects/clemej05a/hilary/longitudinal_tb/jobs_lego7/'

# Calculate v-measure
python sc_vmeasure_with_mdata.py -i '/sc/arion/projects/clemej05a/hilary/longitudinal_tb/outputs/time-corrected-trajs_lego_interp/' -m "prior_tb_c" -m "outcome_c" -m "hiv" -m "current_smoker" -m "tb_score_class"

# Calculate chi squared results
python sc_chiSq.py -i '/sc/arion/projects/clemej05a/hilary/longitudinal_tb/outputs/time-corrected-trajs_lego_interp/' -m "prior_tb_c" -m "outcome_c" -m "hiv" -m "current_smoker" -m "tb_score_class"

In [None]:
# Selection of taxa for further analysis

# Merge silhouette score and v-measure m-data for plotting
python sc_vmeasure_with_mdata_merge.py
# Make the silhouette score vs v-measure plot to pick taxa -- note this filters down to only plot results for the "ibd_subtype" metadata category
R sc_tb_lego7_ibd_silhouette_vs_vmeasure.R

Review sc_stdout_parse.py outputs for selection of clusters by silhouette score. 

Corresponding info the clusters of interest will be in the output files form sc_vmeasure_with_mdata.py and sc_chiSq.py

Transfer relevant cluster files as appropriate before plotting. 

In [None]:
# Plotting

python sc_convert_trajs_for_R_plotting.py -i <input folder location> -m 'prior_tb' -m 'cluster'

python sc_convert_traj_for_R_plotting.py -i 'C:/Users/mathr/Documents/GitHub/tb_mmeds_initial/outputs_targeted/sputum_trajs_shannon_trajs_timeInMonths_pc_4_num_clusters_9' -m 'prior_tb' -m 'cluster' -m 'hiv'

# Plot taxa trajs
R sc_plot_tb_lego7_data.R
# Plot alpha diversity trajs
R sc_plot_tb_alpha_data.R