# Dividing the parameter space in different Jobs, each of them running H hours.

In [5]:
import numpy as np
import itertools

steps = 16
S_vals = np.round(np.linspace(0, 0.5, steps), 3)
b_vals = np.round(np.linspace(0, 120, steps), 3)
E_L_e_vals = np.round(np.linspace(-80, -60, steps), 3)
E_L_i_vals = np.round(np.linspace(-80, -60, steps), 3)
T_vals = np.round(np.linspace(5, 40, steps), 3)

S_vals = list(S_vals)
b_vals = list(b_vals)
E_L_i_vals = list(E_L_i_vals)
E_L_e_vals = list(E_L_e_vals)
T_vals = list(T_vals)

lst = [S_vals, b_vals, E_L_i_vals, E_L_e_vals, T_vals]

combinaison = np.array(list(itertools.product(*lst)))

print(combinaison.shape)

(1048576, 5)


In [8]:
# Let's divide it into different jobs
comb_div = np.array_split(combinaison, 22)

folder_chunks = './JUSUFlike/Data/Data/chunks_parsweep/'
for chunk_id, chunk in enumerate(comb_div):
    #print(chunk.shape)
    file_name = folder_chunks + 'chunk_' + str(chunk_id) + '.npy'
    np.save(file_name, chunk)
    combination = np.load(file_name).tolist()
    print(len(combination))
    
print(len(comb_div))

47663
47663
47663
47663
47663
47663
47663
47663
47663
47663
47663
47663
47662
47662
47662
47662
47662
47662
47662
47662
47662
47662
22


In [62]:
chunk = np.load(file_name)
print(chunk.shape)
print(chunk[-3000])

(47662, 5)
[  0.5   120.    -74.667 -74.667  23.667]


#### Chunks of parameter space have been divided and seem to be working. Now how to manage all of them manually and correctly will have to be done with caution

# How to manage files in order to account for errors in simulations and everything

## Comparing lengths of results and completed file folders to see if there is a need to scan them all

In [35]:
# Difference in time between len of long list and add iterator
steps = 10000000

In [36]:
%%timeit
list_1 = [*range(steps)]
len_list = len(list_1)

234 ms ± 1.58 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [37]:
%%timeit
len_list = 0

for i in range(steps):
    len_list += 1

349 ms ± 3.17 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


**There's little difference and I am not worried about time of computation but RAM. Since each element of the list will be a long string with numbers and letters, it will probably take a considerable amount of RAM. I prefer a method that might be sligthly slower but that  for sure won't give us memory problems.**

## Comparing the time needed to save a long .npy vector (the one with the metrics) vs the time to save an almost empty one saying COMPLETED vs a .txt file


In [39]:
file_folder = './test_files/'

In [41]:
vec1 = np.random.rand(55)
vec2 = np.array([])

In [42]:
%%timeit
np.save(file_folder + 'long.npy', vec1)

87.1 µs ± 857 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [43]:
%%timeit
np.save(file_folder + 'COMPLETED.npy', vec2)

83.8 µs ± 1.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [44]:
%%timeit
open(file_folder + 'COMPLETED.txt', 'a').close()

9.17 µs ± 620 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


Now that we know what we need to do, I will delete the folder. If this is to be re-run, create the folder again.
**In this case, it is evident that the empty .txt folder is superior. It weighs 0 bytes and it is created ~10 times faster than the other alternatives, making it way less possible to have errors. For this reason we will use this one as the completed indicator.**

### I will now modify the HPC_sim.py script with this indicator. It will need to have an indicator folder created in the same folder as the results.

## Comparing time taken in different types of indexing

In [55]:
S_vals = np.round(np.linspace(0, 0.5, steps), 3)
b_vals = np.round(np.linspace(0, 120, steps), 3)
E_L_e_vals = np.round(np.linspace(-80, -60, steps), 3)
E_L_i_vals = np.round(np.linspace(-80, -60, steps), 3)
T_vals = np.round(np.linspace(5, 40, steps), 3)

# Choose random values

In [90]:
%%timeit
random_choices = [np.random.choice(S_vals), np.random.choice(b_vals), np.random.choice(E_L_i_vals),
                  np.random.choice(E_L_e_vals), np.random.choice(T_vals)]
S_idx = combinaison[:, 0] == random_choices[0]
b_idx = combinaison[:, 1] == random_choices[1]
E_L_i_idx = combinaison[:, 2] == random_choices[2]
E_L_e_idx = combinaison[:, 3] == random_choices[3]
T_idx = combinaison[:, 4] == random_choices[4]
all_idx =  np.logical_and.reduce((S_idx, b_idx, E_L_i_idx, E_L_e_idx, T_idx))

15.4 ms ± 387 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [84]:
# Let's make sure that what we obtain is correct
for ii, val in enumerate(random_choices):
    print(val==combinaison[all_idx, :][0][ii])

True
True
True
True
True


In [91]:
# Looks like it. How much time would it take to sweep over all the elements in a chunk if done sequentially
15e-3*47663/60

# 12 minutes is quite a significant time. I think this process could also be parallelized but I don't know
# if it's worth it to do it rn.

11.91575

In [115]:
# How does scandir() work
import os

results_folder = '/home/master/Desktop/tests_hpc/JUSUFlike/Scratch/results/'

for file in os.scandir(results_folder):
    #print(type(file.name)) file.name is the string name of the file
    #print(file.name.split('_'))  # Returns a list with separated elements by '_'
    split_str = file.name.split('_')
    a = float(split_str[2])
    b = float(split_str[4])
    E_L_i = float(split_str[6])
    E_L_e = float(split_str[8])
    T = float(split_str[10][:-4])  # .npy is not separated.
    #print(a, b, E_L_i, E_L_e, T)

    S_idx = combinaison[:, 0] == a
    b_idx = combinaison[:, 1] == b
    E_L_i_idx = combinaison[:, 2] == E_L_i    
    E_L_e_idx = combinaison[:, 3] == E_L_e
    T_idx = combinaison[:, 4] == T - 1 
    all_idx =  np.logical_and.reduce((S_idx, b_idx, E_L_i_idx, E_L_e_idx, T_idx))
    print(all_idx.shape)
    print(np.sum(all_idx))

(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0
(1048576,)
0


In [117]:
results_folder = '/home/master/Desktop/tests_hpc/JUSUFlike/Data/Data/chunks_parsweep/'

for file in os.scandir(results_folder):
    #print(type(file.name)) file.name is the string name of the file
    chunk_n = int(file.name.split('_')[-1][:-4])
    print(chunk_n)

<class 'str'>
0
<class 'str'>
12
<class 'str'>
6
<class 'str'>
7
<class 'str'>
4
<class 'str'>
18
<class 'str'>
16
<class 'str'>
3
<class 'str'>
10
<class 'str'>
20
<class 'str'>
8
<class 'str'>
1
<class 'str'>
5
<class 'str'>
15
<class 'str'>
9
<class 'str'>
21
<class 'str'>
11
<class 'str'>
13
<class 'str'>
19
<class 'str'>
14
<class 'str'>
17
<class 'str'>
2
