## Mounting Ukko2 folders on the local computer

On Linux (and probably also on Mac) you can use sshfs:
```
mkdir /home/hajaalin/ukko2-proj
mkdir /home/hajaalin/ukko2-wrk/
sshfs ukko2.cs.helsinki.fi:/wrk/hajaalin /home/hajaalin/ukko2-wrk/
sshfs ukko2.cs.helsinki.fi:/proj/hajaalin /home/hajaalin/ukko2-proj/

```
On Windows:
- To mount \$WRKDIR, type \\\\ukko2-smb.cs.helsinki.fi\\YOUR_UH_USERNAME in File Explorer.

- To mount \$PROJ, type \\\\nas-fs2.cs.helsinki.fi\\proj\\YOUR_UH_USERNAME in File Explorer.

## Project settings
You might change these back and forth between different projects/runs as you run the cells below.

In [60]:
from jinja2 import Environment, FileSystemLoader
from pathlib import Path,PosixPath,PurePosixPath,WindowsPath

user = 'hajaalin'
email = 'harri.jaalinoja@helsinki.fi'

# Ukko2 $PROJ and $WRKDIR folders mounted on the local computer.
proj_local = PosixPath('/home/%s/ukko2-proj' % (user))
#proj_local = WindowsPath('\\\\nas-fs2.cs.helsinki.fi\\proj\\%s' % (user))
wrk_local = PosixPath('/home/%s/ukko2-wrk' % (user))
#wrk_local = WindowsPath('\\\\ukko2-smb.cs.helsinki.fi\\%s' % (user))
# $PROJ path on Ukko2.
proj_remote = PurePosixPath('/proj/%s' % (user))
# $WRKDIR path on Ukko2.
wrk_remote = PurePosixPath('/wrk/%s' % (user))

project = 'tskarhu'
#run = '20190204b'
run = '20190205a'
run = '20190205b'

# number of wells and sites per well
nwells = 36
sites_per_well = 4

# objects the measurements of which to combine
objects = 'Image Nuclei Cells'


results_prefix = project + '_' + run + '_'

# define directories
sbatch_dir_local = proj_local / 'Projects' / 'CellProfiler' / project / run
cp_batch_dir_local = wrk_local / 'CellProfiler' / 'cp_batch_files' / project / run
cp_output_dir_local = wrk_local / 'CellProfiler' / 'output' / project / run

# these are the same directories but as seen on the cluster
sbatch_dir_remote = proj_remote / 'Projects' / 'CellProfiler' / project / run
cp_batch_dir_remote = wrk_remote / 'CellProfiler' / 'cp_batch_files' / project / run
cp_output_dir_remote = wrk_remote / 'CellProfiler' / 'output' / project / run

# print a directory name to show the settings
print(cp_output_dir_remote)

/wrk/hajaalin/CellProfiler/output/tskarhu/20190205b


## Create batch job script

In [61]:

###
### You should not need to modify anything below this line. ###
###

nsites = nwells * sites_per_well


#
# Memory options
#mem_per_cpu = 2048
#mem_per_cpu = 4096
mem_per_cpu = 6144
java_opts = "-Xmx512m"


#
# Parameters for splitting the run in batches.

# number of threads available, assuming we request 4 nodes, but using only 1 thread per core
# (see https://wiki.helsinki.fi/display/it4sci/Technical+Specifications)
nnodes = 4
cpus_per_node = 2
cores_per_cpu = 14
threads_available = nnodes * cpus_per_node * cores_per_cpu
# something on Ukko2 seems to not work when trying to use maximum number of cores, 
# so let's settle for half
threads_available = threads_available / 2

#https://stackoverflow.com/questions/9761562/how-many-factors-in-an-integer
def factors(n):
    result = []

    for i in range(1, n + 1):
        if n % i == 0:
            result.append(i)

    return result

# to keep all batches the same size, the number of batches must be a factor of nsites...
f = factors(nsites)
# ... and smaller than nthreads
f2 = [i for i in f if i <= threads_available]
nbatches = max(f2)
#print(f)
#print(f2)
batch_size = int(nsites / nbatches)

# manual batch size settings
#nbatches = 36
#batch_size = 4

ntasks = nbatches
cpus_per_task = 1
threads_requested = ntasks * cpus_per_task

batch_last_start = (nbatches - 1) * batch_size + 1
nsites_sanity_check = int(nbatches*batch_size)
print("threads_available: %d" % threads_available)
print("threads_requested: %d" % threads_requested)
print("nsites: %d" % nsites)
print("nbatches: %d" % nbatches)
print("nbatch_size: %d" % batch_size)
print("nsites_sanity_check: %d" % nsites_sanity_check)

#import os
#print(os.listdir(wrk_local))
#print(os.listdir(proj_local))
#print(os.listdir(proj_local / 'Projects' / 'CellProfiler'))

# create directories for sbatch script, batch data file (.h5) and output 
sbatch_dir_local.mkdir(parents=True, exist_ok=False)
cp_batch_dir_local.mkdir(parents=True, exist_ok=True)
cp_output_dir_local.mkdir(parents=True, exist_ok=True)


context = { 'email' : email, \
            'nnodes' : nnodes, \
            'ntasks' : ntasks, \
            'cpus_per_task' : cpus_per_task, \
            'mem_per_cpu' : mem_per_cpu, \
            'java_opts' : java_opts, \
            'workdir' : cp_output_dir_remote, \
            'outputroot' : cp_output_dir_remote, \
            'resultsdir' : sbatch_dir_remote, \
            'project' : project, \
            'run' : run, \
            'resultsprefix' : results_prefix, \
            'cp_batchfile' : cp_batch_dir_remote.joinpath(Path('Batch_data.h5')), \
            'nwells' : nwells, \
            'sites_per_well' : sites_per_well, \
            'nsites' : nsites, \
            'nbatches' : nbatches, \
            'nsites_sanity_check' : nsites_sanity_check, \
            'batch_size' : batch_size, \
            'batch_last_start' : batch_last_start, \
            'objects' : objects }

# Directory with sbatch script template
templatedir = '.'

# Create the jinja2 environment.
j2_env = Environment(loader=FileSystemLoader(templatedir))
script = j2_env.get_template('cp_sbatch_template.sh.j2').render(context)

# save the results
scriptfilename = Path("sbatch_%s_%s_%d_%d.sh" % (project, run, nsites, batch_size ))
scriptfile = sbatch_dir_local.joinpath(scriptfilename)
with open(scriptfile, "w", newline='\n') as fh:
    fh.write(script)   
#print(scriptfile)

# print sbatch command
print("# To submit the batch job, run the following command on Ukko2:")
print("sbatch --exclude=ukko2-paavo,ukko2-pekka " + str(sbatch_dir_remote.joinpath(scriptfilename)))
print("# To check the job que:")
print("squeue | grep " + user)




threads_available: 56
threads_requested: 48
nsites: 144
nbatches: 48
nbatch_size: 3
nsites_sanity_check: 144
# To submit the batch job, run the following command on Ukko2:
sbatch /proj/hajaalin/Projects/CellProfiler/tskarhu/20190205b/sbatch_tskarhu_20190205b_144_3.sh
# To check the job que:
squeue | grep hajaalin


## Create result files with local pathnames

In [62]:
import pandas as pd

# data location mappings, same as specified in CreateBatchFiles
datamap = {'/wrk/hajaalin/data/tskarhu' : '/mnt/lmu-netapp/instruments/Nano/MDCStore/tskarhu'}

for f in sbatch_dir_local.glob('*.csv'):
    if 'local.csv' in str(f):
        continue
    print(f)
    
    df = pd.read_csv(f, float_precision='round_trip')
    paths = [k for k in list(df.columns.values) if "PathName" in k]

    for p in paths:
        for k in datamap.keys():
            df[p] = df[p].str.replace(k, datamap[k])

    df.to_csv(f.with_name(f.stem + "_local" + f.suffix), index=False)


/home/hajaalin/ukko2-proj/Projects/CellProfiler/tskarhu/20190205b/tskarhu_20190205b_Combined_Image.csv
/home/hajaalin/ukko2-proj/Projects/CellProfiler/tskarhu/20190205b/tskarhu_20190205b_Combined_Nuclei.csv
/home/hajaalin/ukko2-proj/Projects/CellProfiler/tskarhu/20190205b/tskarhu_20190205b_Combined_Cells.csv


## Create .properties file for CellProfiler Analyst

In [66]:
#
# Settings for CellProfiler Analyst
#

image_csv_file = sbatch_dir_local.joinpath(Path(results_prefix + 'Combined_Image_local.csv'))
object_csv_file = sbatch_dir_local.joinpath(Path(results_prefix + 'Combined_Cells_local.csv'))
image_csv_file = sbatch_dir_local.joinpath(Path(results_prefix + 'Combined_Image.csv'))
object_csv_file = sbatch_dir_local.joinpath(Path(results_prefix + 'Combined_Cells.csv'))

image_names = 'Blue,Red,NucleiOverlay,CellsOverlay'
image_path_cols = ''
image_file_cols = ''
for i in image_names.split(','):
    image_path_cols = image_path_cols + 'PathName_' + i + ','
    image_file_cols = image_file_cols + 'FileName_' + i + ','

# create a properties file for CPA
context = { 'image_csv_file' : image_csv_file, \
            'object_csv_file' : object_csv_file, \
            'cell_x_loc' : 'Location_Center_X', \
            'cell_y_loc' : 'Location_Center_Y', \
            'image_path_cols' : image_path_cols, \
            'image_file_cols' : image_file_cols, \
            'image_names' : image_names, \
            'image_channel_colors' : 'blue,red,gray,gray', \
            'object_name' : 'cell, cells,', \
            'plate_type' : 96, \
            'image_tile_size' : 50}
properties = j2_env.get_template('DefaultDB_MyExpt.properties').render(context)

# save the results
properties_filename = Path("%s_%s_%d_%d.properties" % (project, run, nsites, batch_size ))
properties_file = sbatch_dir_local.joinpath(properties_filename)
with open(properties_file, "w", newline='\n') as fh:
    fh.write(properties)   
print(properties_file)

/home/hajaalin/ukko2-proj/Projects/CellProfiler/tskarhu/20190205b/tskarhu_20190205b_144_3.properties
