In [1]:
%load_ext autoreload
%autoreload 2
import ribo_util
import ribo_main
import ribo_analysis
import ribo_plot



'''Settings and Inputs'''

library_creator = 'DG'        #FM, KS, CW, Menkin, Li, etc... (initial of who made it)
organism        = 'Volcanii'      #Coli, Subtilis, Tuberculosis etc...

inputs = {
    
    'files' : [library_creator + str(i) for i in range(1, 2)],       #Files to analyze
    # for data renaming: useful to rename files from seq_facility - can be ignored
    'order_name' : 'none',    # to rename/concat FASTQ if needed, else set to 'none' or ignore
    
    # select which functions to run: 'yes' and 'no' 
    'run_filtering': 'no',
    'run_bowtie'   : 'no',
    'run_density'  : 'yes',
    'run_readQC'   : 'no',
    
    # cuttoff for readsize and quality for filtering and density
    'minlength'    : 10,
    'maxlength'    : 40,
    'phred_cutoff' : 10,
                        
    # linker-1 for FM = CTGTAGGCACCATCAATAGATCGGAAGAGCACACGTCTGAACTCCAGTCA
    # for SM          = CTGTAGGCACCATCAATTCGTATGCCGTCTTCTGCTTG
    # Gross           = CTGTAGGCACCATCAATATCTCGTATGCCGTCTTCTGCTTG
    # Zoya            = ATCTCGTATGCCGTCTTCTGCTTG
    'linker'       :   'CTGTAGGCACCATCAATAGATCGGAAGAGCACACGTCTGAACTCCAGTCA',
                      
    
    # CPU information for multithreading applications, 
    'multiprocess' : 'yes',
    'threads'      : 8,
    'cores'        : 4, 
    
    }

'''Input directories'''
# Can be customized to your liking 

path_pc     = '/Volumes/Diego_2TB/ribosome_profiling_final_libs/libraries/H98_WT_ctrl/'
inpath      = path_pc + 'reads/'
path_script = '/Users/DRG/Desktop/ribosome_profiling_first_library/2nd_libraries/leucine_analysis/scripts/'

paths_in = {
    'fastq_download' : inpath  + 'FASTQ/downloaded/',
    'path_fastq'     : inpath  + 'FASTQ/' + library_creator + '/',
    'path_gff'       : path_pc + 'annotations/'+organism+'/'+organism+'.gff',
    'path_gff_dict'  : path_pc + 'annotations/'+organism+'/'+organism+'_dict_for_pause',   #will be made from GFF
    'path_bowtie'    : path_script + 'bowtie/bowtie',
    #'btindex_ladder' : path_script + 'bowtie/indexes/ladder/ladder',
    #'btindex_trna'   : path_script + 'bowtie/indexes/'+organism+'/'+organism+'_tRNA',
    'btindex_rrna'   : path_script + 'bowtie/indexes/'+organism+'/'+organism+'_rRNA',
    'btindex_chr'    : path_script + 'bowtie/indexes/'+organism+'/'+organism+'_genome',
    }


### Output directories
paths_out = {
    'path_filter'       : inpath  + 'density/filtering_bowtie/filterdata/',
    #'path_ladder'       : inpath  + 'density/filtering_bowtie/alignments/ladder/',
    #'path_trna'         : inpath  + 'density/filtering_bowtie/alignments/tRNA/',
    'path_rrna'         : inpath  + 'density/filtering_bowtie/alignments/rRNA/',
    'path_chr'          : inpath  + 'density/filtering_bowtie/alignments/chr/',
    'path_temp'         : inpath  + 'density/filtering_bowtie/tmpds/',
    'path_density'      : inpath  + 'density/density/',
    'path_log'          : inpath  + 'density/logs/',
    'path_analysis_log' : inpath  + 'analysis/logs/',
    'path_analysis'     : inpath  + 'analysis/individual/',
    'path_figures'      : inpath  + 'figures/',
    }

gff_settings = {
    'path_out'         : 0,
    'feat_of_interest' : 'protein_coding',         #all, protein_coding, tRNA, rRNA
    'name_qual'        : 'Name',
    'name_qual_alt'    : 'gene_id',
    'biotype_qual'     : 'protein_coding',          #if biotype qualifier NA, biotype_qual = 'all'
    'aSD_seq'          : 'TCCTCC'
    }

# Modify FASTQ files downloaded from server, renaming and concatonating if necessary
#ribo_util.rename_FASTQ(inputs, library_creator, paths_in, paths_out)

# Check inputs, create output paths, and make gff dictionary if needed
step = 'density'
ribo_util.check_inputs(inputs, paths_in, step)
ribo_util.createpath(inputs, paths_out)

#ribo_util.GFF_to_dict(paths_in, gff_settings)

In [2]:
filterreads = ribo_main.run_filter(inputs, paths_in, paths_out)
bowtiereads = ribo_main.run_align(inputs, paths_in, paths_out)
densityreads = ribo_main.run_density(inputs, paths_in, paths_out)
analyzereads = ribo_analysis.readQC(inputs, paths_in, paths_out)

ERROR: DG1 has not been filtered, change run setting
-----FILTER-----

Files to filter: DG1
Filter parameters are: 
min length = 10 
max length = 40 
phred cutoff = 10 

	Started filtering at 2020-02-24 23:37:08.824702
	Finished filtering at 2020-02-24 23:37:08.824754
	COMPLETED FILTERING
DG1 has been aligned

------ALIGN------

Files to align: DG1

	Started Bowtie alignment at 2020-02-24 23:37:08.825170
	Finished ladder removal at 2020-02-24 23:37:08.825215
	Finished tRNA removal at 2020-02-24 23:37:08.825626
	Finished rRNA removal at 2020-02-24 23:37:08.825693
	Finished chromosome alignment at 2020-02-24 23:37:08.825747
	COMPLETED ALIGNING

-----DENSITY-----

Files to condense: DG1

	Started density at 2020-02-24 23:37:08.827025
	Finished density at 2020-02-24 23:38:11.345759
	COMPLETED DENSITY
ERROR:DG1 has not been aligned


In [3]:
log_data = ribo_util.get_filter_bowtie_log(inputs, paths_in, paths_out)
ribo_plot.plot_alignment_allocation(inputs, paths_in, paths_out)

IOError: [Errno 2] No such file or directory: '/Volumes/Diego_2TB/ribosome_profiling_final_libs/libraries/H98_WT_ctrl/reads/density/logs/DG1_filter'

In [None]:
read_size_distribution = ribo_plot.size_dist(inputs, paths_in, paths_out)

In [None]:
read_composition = ribo_plot.read_comp(inputs, paths_in, paths_out)