# This notebook describes the analysis of the Knight lab IBD dataset

In [None]:
# Base file location in Minerva
# /sc/arion/projects/clemej05a/hilary/knight_ibd

# Generate taxa summaries - performed by Adam in Qiime2. Original files in: /sc/arion/projects/clemej05a/adam/downloads/knight_longitudinal_ibd/
# Final relative abundance artifact: relative_taxa_table_L6.qza, moved to /sc/arion/projects/clemej05a/hilary/knight_ibd
# Unzipped
mkdir relative_taxa_table_L6
unzip relative_taxa_table_L6.qza
mv 90c1865a-d66a-4387-b7c7-1a5bafc9801e  /relative_taxa_table_L6/


# On Minerva
source activate qiime2-2020.8.0

cd 90c1865a-d66a-4387-b7c7-1a5bafc9801e /data/
biom convert -i feature-table.biom -o feature-table-all-L6.tsv --to-tsv
cp feature-table-all-L6.tsv /sc/arion/projects/clemej05a/hilary/longitudinal_tb/

In [None]:
# Convert genus level table to trajectories

python sc_create_trajs_knight.py

# Data file: rel-table-L6.tsv
# Metadata file: mapping-file.tsv

# Final trajectories saved in folder /taxa_trajs/
# These trajs transfered to Minerva for clustering into /sc/arion/projects/clemej05a/hilary/knight_ibd/taxa_trajs/

In [None]:
# Smooth trajs
python sc_interp_knight_ibd_trajs_lego.py

# Plot the results of the smoothing to pick a smoothing factor
R sc_smoothing_analysis.R

In [None]:
# Cluster trajectories
# NOTE: These scripts are located in the analysis_scripts folder in loclust

# Make cluster commands
python sc_create_j_clust.py -i "/sc/arion/projects/clemej05a/hilary/knight_ibd/taxa_trajs/" -b "/sc/arion/projects/clemej05a/hilary/knight_ibd/" -s "/sc/arion/projects/clemej05a/hilary/repos/loclust/scripts" -o Knight_commands_for_jobs.txt
python sc_create_j_clust.py -i "/sc/arion/projects/clemej05a/hilary/knight_ibd/taxa_trajs_lego_interp/" -b "/sc/arion/projects/clemej05a/hilary/knight_ibd/" -s "/sc/arion/projects/clemej05a/hilary/repos/loclust/scripts" -o Knight_commands_lego_for_jobs.txt
# Make and submit jobs
python generate_lsf_loclust.py -e loclust3pt8 -c Knight_commands_for_jobs.txt -o /sc/arion/projects/clemej05a/hilary/knight_ibd/jobs/ -py /hpc/users/monach01/.conda/envs/loclust3pt8/lib/python3.8/ --submit
python generate_lsf_loclust.py -e loclust3pt8 -c Knight_commands_lego_for_jobs.txt -o /sc/arion/projects/clemej05a/hilary/knight_ibd/jobs_lego/ -py /hpc/users/monach01/.conda/envs/loclust3pt8/lib/python3.8/ --submit

In [None]:
# Calculate silhouette scores
# NOTE: These scripts are located in the analysis_scripts folder in loclust
python sc_stdout_parse.py -i '/sc/arion/projects/clemej05a/hilary/knight_ibd/jobs/'
python sc_stdout_parse.py -i '/sc/arion/projects/clemej05a/hilary/knight_ibd/jobs_lego/'

# Calculate v-measure
python sc_vmeasure_with_mdata.py -i '/sc/arion/projects/clemej05a/hilary/knight_ibd/outputs/taxa_trajs/' -m "uc_extent" -m "year_diagnosed" -m "ibd_subtype"
python sc_vmeasure_with_mdata.py -i '/sc/arion/projects/clemej05a/hilary/knight_ibd/outputs/taxa_trajs_lego_interp/' -m "uc_extent" -m "year_diagnosed" -m "ibd_subtype"

# Calculate chi squared results
python sc_chiSq.py -i '/sc/arion/projects/clemej05a/hilary/knight_ibd/outputs/taxa_trajs/' -m "uc_extent" -m "year_diagnosed" -m "ibd_subtype"
python sc_chiSq.py -i '/sc/arion/projects/clemej05a/hilary/knight_ibd/outputs/taxa_trajs_lego_interp/' -m "uc_extent" -m "year_diagnosed" -m "ibd_subtype"


In [None]:
# Selection of taxa for further analysis

# Merge silhouette score and v-measure m-data for plotting
python sc_vmeasure_with_mdata_merge.py
# Make the silhouette score vs v-measure plot to pick taxa -- note this filters down to only plot results for the "ibd_subtype" metadata category
R sc_knight_ibd_silhouette_vs_vmeasure.R

Review sc_stdout_parse.py outputs for selection of clusters by silhouette score. 

Corresponding info the clusters of interest will be in the output files form sc_vmeasure_with_mdata.py and sc_chiSq.py

Transfer relevant cluster files as appropriate before plotting. 

In [None]:
# Plotting

# Check the chi^2 results, pick taxa for visualization, transfer relevant files to the directory where you're storing final cluster results, convert the cluster outputs to R dataframes
# NOTE: This script is located in the analysis_scripts folder in loclust
python sc_convert_traj_for_R_plotting.py -i 'C:/Users/mathr/Documents/GitHub/knight_ibd/outputs/taxa_trajs_lego_interp/' -m "uc_extent" -m "year_diagnosed" -m "ibd_subtype" -m "cluster"

# Plot taxa figures
R sc_plot_knight_ibd_data.R

