## Mounting Ukko2 folders on the local computer

On Linux (and probably also on Mac) you can use sshfs:
```
mkdir /home/hajaalin/ukko2-proj
mkdir /home/hajaalin/ukko2-wrk/
sshfs ukko2.cs.helsinki.fi:/wrk/hajaalin /home/hajaalin/ukko2-wrk/
sshfs ukko2.cs.helsinki.fi:/proj/hajaalin /home/hajaalin/ukko2-proj/

```
On Windows:
- To mount \$WRKDIR, type \\\\ukko2-smb.cs.helsinki.fi\\YOUR_UH_USERNAME in File Explorer.

- To mount \$PROJ, type \\\\nas-fs2.cs.helsinki.fi\\proj\\YOUR_UH_USERNAME in File Explorer.

In [25]:
from jinja2 import Environment, FileSystemLoader
from pathlib import Path,PosixPath,PurePosixPath,WindowsPath

# Ukko2 $PROJ and $WRKDIR folders mounted on the local computer.
#proj_local = PosixPath('/home/hajaalin/ukko2-proj')
proj_local = WindowsPath('\\\\nas-fs2.cs.helsinki.fi\\proj\\hajaalin')
#wrk_local = PosixPath('/home/hajaalin/ukko2-wrk')
wrk_local = WindowsPath('\\\\ukko2-smb.cs.helsinki.fi\\hajaalin')
# $WRKDIR path on Ukko2.
wrk_remote = PurePosixPath('/wrk/hajaalin')

user = 'tskarhu'
run = '201901231b_win'

# objects the measurements of which to combine
objects = 'Image Nuclei Cells'

# number of wells and sites per well
nwells = 36
sites_per_well = 4
nsites = nwells * sites_per_well



#
# Memory options
#mem_per_cpu = 2048
#mem_per_cpu = 4096
mem_per_cpu = 6144
java_opts = "-Xmx512m"


#
# Parameters for splitting the run in batches.

# number of threads available, assuming we request 4 nodes, but using only 1 thread per core
# (see https://wiki.helsinki.fi/display/it4sci/Technical+Specifications)
nnodes = 4
cpus_per_node = 2
cores_per_cpu = 14
threads_available = nnodes * cpus_per_node * cores_per_cpu
# something on Ukko2 seems to not work when trying to use maximum number of cores, 
# so let's settle for half
threads_available = threads_available / 2

#https://stackoverflow.com/questions/9761562/how-many-factors-in-an-integer
def factors(n):
    result = []

    for i in range(1, n + 1):
        if n % i == 0:
            result.append(i)

    return result

# to keep all batches the same size, the number of batches must be a factor of nsites...
f = factors(nsites)
# ... and smaller than nthreads
f2 = [i for i in f if i <= threads_available]
nbatches = max(f2)
#print(f)
#print(f2)
batch_size = int(nsites / nbatches)

# manual batch size settings
#nbatches = 36
#batch_size = 4

ntasks = nbatches
cpus_per_task = 1
threads_requested = ntasks * cpus_per_task

batch_last_start = (nbatches - 1) * batch_size + 1
nsites_sanity_check = int(nbatches*batch_size)
print("threads_available: %d" % threads_available)
print("threads_requested: %d" % threads_requested)
print("nsites: %d" % nsites)
print("nbatches: %d" % nbatches)
print("nbatch_size: %d" % batch_size)
print("nsites_sanity_check: %d" % nsites_sanity_check)

#import os
#print(os.listdir(wrk_local))
#print(os.listdir(proj_local))
#print(os.listdir(proj_local / 'Projects' / 'CellProfiler'))

# create directories for sbatch script, batch data file (.h5) and output 
sbatch_dir_local = proj_local / 'Projects' / 'CellProfiler' / user / run
sbatch_dir_local.mkdir(parents=True, exist_ok=False)
cp_batch_dir_local = wrk_local / 'CellProfiler' / 'cp_batch_files' / user / run
cp_batch_dir_local.mkdir(parents=True, exist_ok=True)
cp_output_dir_local = wrk_local / 'CellProfiler' / 'output' / user / run
cp_output_dir_local.mkdir(parents=True, exist_ok=True)

# these are the same directories but as seen on the cluster
cp_batch_dir_remote = wrk_remote / 'CellProfiler' / 'cp_batch_files' / user / run
cp_output_dir_remote = wrk_remote / 'CellProfiler' / 'output' / user / run

context = { 'nnodes' : nnodes, \
            'ntasks' : ntasks, \
            'cpus_per_task' : cpus_per_task, \
            'mem_per_cpu' : mem_per_cpu, \
            'java_opts' : java_opts, \
            'workdir' : cp_output_dir_remote, \
            'outputroot' : cp_output_dir_remote, \
            'user' : user, \
            'run' : run, \
            'cp_batchfile' : cp_batch_dir_remote.joinpath(Path('Batch_data.h5')), \
            'nwells' : nwells, \
            'sites_per_well' : sites_per_well, \
            'nsites' : nsites, \
            'nbatches' : nbatches, \
            'nsites_sanity_check' : nsites_sanity_check, \
            'batch_size' : batch_size, \
            'batch_last_start' : batch_last_start, \
            'objects' : objects }

# Directory with sbatch script template
templatedir = '.'

# Create the jinja2 environment.
j2_env = Environment(loader=FileSystemLoader(templatedir))
script = j2_env.get_template('cp_sbatch_template.sh.j2').render(context)

# save the results
scriptfile = sbatch_dir_local.joinpath(Path("sbatch_%s_%s_%d_%d.sh" % (user, run, nsites, batch_size )))
with open(scriptfile, "w", newline='\n') as fh:
    fh.write(script)   
print(scriptfile)


threads_available: 56
threads_requested: 48
nsites: 144
nbatches: 48
nbatch_size: 3
nsites_sanity_check: 144
\\nas-fs2.cs.helsinki.fi\proj\hajaalin\Projects\CellProfiler\tskarhu\201901231b_win\sbatch_tskarhu_201901231b_win_144_3.sh


In [24]:
print(script)

#!/bin/bash

#
## This template and the script to combine the results (see the end of the file) are based 
## on the example on https://portal.biohpc.swmed.edu/content/guides/cellprofiler-biohpc/
#

##SBATCH -p test
#SBATCH --workdir=/wrk/hajaalin/CellProfiler/output/tskarhu/201901231b_win
#SBATCH --output=tskarhu_201901231b_win.out

# Use 4 nodes total
#SBATCH -N 4

#SBATCH --ntasks=48

# Time Limit of 2 hours
#SBATCH -t 02:00:00

# Memory request per core, maximum 256000MB / 28 ~ 9140MB
#SBATCH --mem-per-cpu=6144  

JAVA_OPTS='-Xmx512m'

# CellProfiler creates lots of threads, raise our process limit
# or batches may fail
ulimit -u 4096

# Load the cellprofiler module
module use /proj/hajaalin/LMUModules/
module load Miniconda2
source activate cellprofiler-3.1.8

batch_data="/wrk/hajaalin/CellProfiler/cp_batch_files/tskarhu/201901231b_win/Batch_data.h5"
outputroot="/wrk/hajaalin/CellProfiler/output/tskarhu/201901231b_win"

# dataset size: 36 wells, 4 sites per well. 
# 36x4 = 144
# n