In [None]:
import os

def random_kallisto(directory, files, res_dir, n=1):
    """A function to write a Kallisto command with standard parameter setup."""
    if type(directory) is not str:
        raise ValueError('directory must be a str')
    if type(files) is not list:
        raise ValueError('files must be a list')

    # parts of each kallisto statement

    # information
    info = '# kallisto command for {0}'.format(directory)
    # transcript file location:
    k_head = 'kallisto quant -i input/transcripts.idx -o '

    # output file location
    k_output = 'input/kallisto_all/' + res_dir + '/kallisto '
    # parameter info:
    k_params = '--single -s {0} -l {1} -b {2} -t {3}'.format(sigma, length,
                                                             btstrp, thrds)

    # what files to use:
    k_files = ''
    
    # remove the 'SampleSheet.csv' entry from files:
    if 'SampleSheet.csv' in files:
        files.remove('SampleSheet.csv')
    
    # randomly select n files:
    selected = np.random.choice(files, n)
    for i, y in enumerate(selected):
        if y != 'SampleSheet.csv':
            if directory[:3] == '../':
                d = directory[3:]
            else:
                d = directory[:]
            k_files += ' ' + d + '/' + y
    # all together now:
    kallisto = k_head + k_output + k_params + k_files + ';'
    return info, kallisto

def walk_seq_directories(directory):
    """
    Given a directory, find all the rna-seq folders and make kallisto commands.

    Directory format is predefined and must follow my rules.
    """
    kallisto = ''
    # directory contains all the projects, walk through it:
    for x in os.walk(directory):
        # first directory is always parent
        # if it's not the parent, move forward:
        if x[0] != directory:
            # cut the head off and get the project name:
            res_dir = x[0][len(directory)+1:]

            # if this project has attributes explicitly written in
            # use those parameter specs:
            if 'Kallisto_Info.csv' in x[2]:
                info, command = explicit_kallisto(x[0], x[2], res_dir)
                continue

            # otherwise, best guesses:
            info, command = random_kallisto(x[0], x[2], res_dir, 1)
            kallisto += info + '\n' + command + '\n'

            if not os.path.exists('../input/kallisto_sampler/' + res_dir):
                os.makedirs('../input/kallisto_sampler/' + res_dir)
    return kallisto

with open('../kallisto_downsampled_commands.sh', 'w') as f:
    f.write('#!/bin/bash\n')
    f.write('# make transcript index\n')
    s1 = 'kallisto index -i '
    s2 = 'input/transcripts.idx input/c_elegans_WBcel235.rel79.cdna.all.fa;\n'
    f.write(s1+s2)
    kallisto = walk_seq_directories(directory)
    f.write(kallisto)