# NeuFit Modeling Pipeline

#### Assumptions

- This Jupyter notebook runs best in Kernel: **neufit**

##### Import Packages

In [None]:
#Imports needed for Caitlin Functions
import pandas as pd
import qiime2 as q2
from biom import load_table #https://biom-format.org/documentation/generated/biom.load_table.html
from datetime import datetime
import os

#Imports needed for Neufit
import numpy as np
from lmfit import Parameters, Model, fit_report #conda-forge/lmfit
from scipy.stats import beta #[already installed]
from statsmodels.stats.proportion import proportion_confint #[already installed]
from os.path import splitext #[already installed]
from matplotlib import pyplot #[already installed]
from math import log10 #[already installed]
import scipy

##### File Locations

In [None]:
#Datasets: keynameDatatypeCollob
#hutchKraken
hutchKrakenAlex_biom = '/home/cguccion/rawData/01_11_2021_Hutch340_BE_Samples_LudmilAlexandrov/biom' #location of biom files from kraken->biom pipeline

#TCGA_WGS
tcgaEhnWGSgreg_ = '/home/cguccion/rawData/April2021_Greg_TCGA_WGS/raw_from_Greg'#Location of raw data

##Two metadata options here: #1 from Greg orginally, #2 With TCGA Incorperated
#tcgaEhnWGSgreg_meta = str(tcgaEhnWGSgreg_ + '/' + '13722_20210405-101126-TCGA-WGS-Qiita-sample-metadata.txt') #Greg OG
#tcgaEhnWGSgreg_meta = str('/home/cguccion/rawData/April2021_Greg_TCGA_WGS/meta_expansion/13722_20210405-101126-TCGA-WGS-Qiita-sample-metadata_esoph_hnc_metaExpand.txt') #TCGA
tcgaEhnWGSgreg_meta = str('/home/cguccion/rawData/April2021_Greg_TCGA_WGS/meta_expansion/13722_20210405-101126-TCGA-WGS-Qiita-sample-metadata_esoph_hnc_MetaExpand.txt') #TCGA

tcgaEhnWGSgreg_taxa = str(tcgaEhnWGSgreg_ + '/' + 'wol_gotu_taxonomy.csv')

#Intermediate Files
neufit_input_path = '/home/cguccion/NeutralEvolutionModeling/ipynb/data_tax_csv' #location of _data.csv and _tax.csv files for Neufit input

#Output Files
neufit_output_path = '/home/cguccion/NeutralEvolutionModeling/ipynb/neufit_output' #location of all graphs and command line outputs from running Neufit

##### Import Functions

In [None]:
from nevo.neutral_fit import neufit
from nevo.utils import biom2data_tax

import matplotlib
%matplotlib inline

## Choose Dataset + Run Pipeline

### Hutch Kraken Dataset: Ludmil Alexandrov

##### Dataset Info

#### Combined Data (Everything in the dataset)

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Run with no output files
Neufit_Pipeline_Main('combined', 'hutchKraken', 'combined_biome', norm_graph, colored_graph, non_neutral, non_save = True)

#Run with all possible output files
#Neufit_Pipeline_Main('combined', 'hutchKraken', 'combined_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### Progressors Data (Patients who went onto progressor to EAC)

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Run with all possible output files
Neufit_Pipeline_Main('progressors', 'hutchKraken', 'P_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### Progressor Data Time Point 1 Only (Patients who went onto progressor to EAC, but were healthy at this timepoint (timepoint 1))

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Run with all possible output files
Neufit_Pipeline_Main('progressorsT1', 'hutchKraken', 'P_T1_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### Progressor Data Time Point 2 Only (Patients who went onto progressor to EAC, and had cancer at this timepoint (timepoint 2))

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Run with all possible output files
Neufit_Pipeline_Main('progressorsT2', 'hutchKraken', 'P_T2_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### NonProgressor Data  (Patients who went never progressed to EAC

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Run with all possible output files
Neufit_Pipeline_Main('nonProgressors', 'hutchKraken', 'NP_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### NonProgressor Data Time Point 1 Only (Patients who did not progress to EAC ever (timepoint 1)

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Run with all possible output files
Neufit_Pipeline_Main('nonProgressorsT1', 'hutchKraken', 'NP_T1_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### NonProgressor Data Time Point 2 Only (Patients who did not progress to EAC ever, now at thier second check (timepoint 2)

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Run with all possible output files
Neufit_Pipeline_Main('nonProgressorsT2', 'hutchKraken', 'NP_T2_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

### TCGA WGS Esophagus / Head and Neck Cancer Dataset: Greg Poore

#### Esophagus Normal (Paired Tissue Normal)

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True

#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck)

#Run with no output files
#Neufit_Pipeline_Main('normal', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, non_save = True)

#Run with all possible output files
Neufit_Pipeline_Main('normal', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, full_non_neutral = True)



#### Esophgous Cancer: Squamous Cell Carcinoma

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True
#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck) / e_scc (esoph, squmous cell)

#Run with all possible output files
Neufit_Pipeline_Main('cancer', 'TCGA_WGS', 'e_scc', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### Head and Neck Normal (Paired Tissue Normal)

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True
#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck)

#Run with all possible output files
Neufit_Pipeline_Main('normal', 'TCGA_WGS', 'hn', norm_graph, colored_graph, non_neutral, full_non_neutral = True)

#### Head and Neck Cancer

In [None]:
'''Choose True or False for the following parameters:
               norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring
               non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file
'''
norm_graph = True
colored_graph = True
non_neutral = True
#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck)

Neufit_Pipeline_Main('cancer', 'TCGA_WGS', 'hn', norm_graph, colored_graph, non_neutral, full_non_neutral = True)