In [3]:
import numpy as np
import pandas as pd
import sys, os
import time
import multiprocessing
from itertools import repeat, product

In [4]:
f_euclid_dist = lambda a,b: np.linalg.norm(a-b)

def f_h_step(x, a):
    return 1 if (x<=a) else 0

f_y = lambda k : -np.log10(k)

### Y Data Processor

In [5]:
'''
Create dataframes of log_10^y
'''
path = 'C:/Users/beryl/Documents/Computational Science/Kanazawa/Thesis/Dataset/PP/index/INDEX_general_PP.2018'
mol_units = {'uM':1.e-6, 'pM':1.e-12, 'fM':1.e-15, 'nM':1.e-9, 'mM':1.e-3}

#load the index file
l = []
with open(path, 'r') as f:
    for line in f:
        if not line.startswith('#'):
            clean_line = line.rstrip()
            l.append((line.rstrip()).split())
df_idx = (pd.DataFrame(l)).rename(columns={0:'id',3:'k'})
#print(df_idx)

#generate the -log_10k values
op_tokens = ['=','~','>','<']
logys = np.zeros(df_idx.shape[0])
for i in range(df_idx.shape[0]):
    string = df_idx.loc[i]['k']
    for s in string:
        if s in op_tokens:
            split_str = string.split(s)
            break
    logys[i] = f_y( float(split_str[-1][:-2]) * mol_units[split_str[-1][-2:]] )
df_idx["log_y"] = logys

print(df_idx.loc[9])
print(len(df_idx))

id              2pcb
1               2.80
2               1993
k            Kd=10uM
4                 //
5           2pcb.pdf
6          (104-mer)
7         cytochrome
8                 c,
9        Ka=10^5M-1,
10          Kd=10-5M
11              None
12              None
13              None
14              None
15              None
16              None
17              None
18              None
19              None
20              None
21              None
22              None
23              None
24              None
25              None
26              None
27              None
28              None
29              None
30              None
31              None
32              None
33              None
34              None
35              None
36              None
37              None
log_y              5
Name: 9, dtype: object
2416


### Complex Data Processor

In [6]:
'''
files loader
'''
path = 'C:/Users/beryl/Documents/Computational Science/Kanazawa/Thesis/Dataset/PP'


complex_files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
print(len(complex_files))

test_file = path+'/'+complex_files[2]
print(test_file)

2416
C:/Users/beryl/Documents/Computational Science/Kanazawa/Thesis/Dataset/PP/1a3b.ent.pdb


In [8]:
'''
atom dataframe generator
'''

l =[]
with open(test_file, 'r') as f:
    for line in f:
        if line.startswith('ATOM') or line.startswith('TER'):
            clean_line = (line.rstrip()).split()
            #check for alignment mistakes within data, a row with spacing alignment error has 11 length after splitted by whitespace
            if len(clean_line) == 11:
                #split the 2nd last column by the 4th index (this inference is according to PDB file formatting)
                split = [clean_line[-2][:4], clean_line[-2][4:]]
                clean_line[-2] = split[1]
                clean_line.insert(-2, split[0])
            l.append(clean_line)
df_atoms = (pd.DataFrame(l)).rename(columns={0:'record', 6:'x_coor', 7:'y_coor', 8:'z_coor', 11:'atom_type'})

'''
print(len(l[2314]))
spl = [l[2314][-2][:4], l[2314][-2][4:]]
l[2314][-2] = spl[1]
l[2314].insert(-2, spl[0])
l[2314].insert(-2, l[2314][-2][4:])
print(l[2314])
'''
print(l[2241])

df_atoms

['ATOM', '2242', 'N', 'GLY', 'I', '9', '20.469', '-13.494', '37.284', '1.00', '107.45', 'N']


Unnamed: 0,record,1,2,3,4,5,x_coor,y_coor,z_coor,9,10,atom_type
0,ATOM,1,N,ALA,L,1B,15.110,18.115,15.087,1.00,36.99,N
1,ATOM,2,CA,ALA,L,1B,15.151,19.655,15.324,1.00,44.33,C
2,ATOM,3,C,ALA,L,1B,16.619,19.913,15.465,1.00,31.21,C
3,ATOM,4,O,ALA,L,1B,17.199,20.722,14.728,1.00,27.87,O
4,ATOM,5,CB,ALA,L,1B,14.328,19.790,16.582,1.00,36.57,C
5,ATOM,6,N,ASP,L,1A,17.075,19.131,16.396,1.00,24.26,N
6,ATOM,7,CA,ASP,L,1A,18.455,18.973,16.762,1.00,24.64,C
7,ATOM,8,C,ASP,L,1A,18.725,17.483,16.620,1.00,18.66,C
8,ATOM,9,O,ASP,L,1A,19.822,16.999,16.911,1.00,19.27,O
9,ATOM,10,CB,ASP,L,1A,18.645,19.446,18.202,1.00,31.66,C


In [9]:
'''
split dataframes based on chains ended by "TER"
'''
l_df = []
last_idx = 0
for idx in df_atoms.index[df_atoms['record'] == 'TER'].tolist():
    l_df.append(df_atoms.iloc[last_idx:idx])
    last_idx = idx+1

print(df_atoms.index[df_atoms['record'] == 'TER'].tolist())
print(l_df)


[222, 2240, 2327]
[    record    1    2    3  4    5  x_coor  y_coor  z_coor     9     10  \
0     ATOM    1    N  ALA  L   1B  15.110  18.115  15.087  1.00  36.99   
1     ATOM    2   CA  ALA  L   1B  15.151  19.655  15.324  1.00  44.33   
2     ATOM    3    C  ALA  L   1B  16.619  19.913  15.465  1.00  31.21   
3     ATOM    4    O  ALA  L   1B  17.199  20.722  14.728  1.00  27.87   
4     ATOM    5   CB  ALA  L   1B  14.328  19.790  16.582  1.00  36.57   
5     ATOM    6    N  ASP  L   1A  17.075  19.131  16.396  1.00  24.26   
6     ATOM    7   CA  ASP  L   1A  18.455  18.973  16.762  1.00  24.64   
7     ATOM    8    C  ASP  L   1A  18.725  17.483  16.620  1.00  18.66   
8     ATOM    9    O  ASP  L   1A  19.822  16.999  16.911  1.00  19.27   
9     ATOM   10   CB  ASP  L   1A  18.645  19.446  18.202  1.00  31.66   
10    ATOM   11   CG  ASP  L   1A  20.085  19.313  18.682  1.00  39.01   
11    ATOM   12  OD1  ASP  L   1A  21.043  19.254  17.822  1.00  60.26   
12    ATOM   13  OD

In [7]:
'''
calculate the combination of euclidian distance and heaviside step between chains in a protein, 
e.g chains=[A,B,C,D], hence the interactions are: [[A-B],[A-C],[A-D],[B-C],[B-D],[C-D]]

'atom_types' are the type of atoms used for calculation
'cutoff' is the distance cutoff between atoms for heaviside step function (in Angstrom)
'''

'''a = l_df[0].loc[l_df[0]['atom_type'] == 'N']
print(float(a.iloc[1]['x_coor'])*2)
print(a.shape)'''

atom_types = ['C','N','O','F','P','S','Cl','Br','I']
#atom_types = ['C','N','O','S']
cutoff = 12

def protein_interaction(df_protein_A, df_protein_B, atom_types, cutoff):
    type_len = len(atom_types)
    x_vector = np.zeros(type_len**2)
    idx = 0
    for a_type in atom_types:
        for b_type in atom_types:
            #calculate the interaction of each atoms:
            sum_interaction = 0
            a_atoms = df_protein_A.loc[df_protein_A['atom_type'] == a_type]
            b_atoms = df_protein_B.loc[df_protein_B['atom_type'] == b_type]
            for i in range(a_atoms.shape[0]):
                for j in range(b_atoms.shape[0]):
                    #get the (x,y,z):
                    a_atom = a_atoms.iloc[i]
                    b_atom = b_atoms.iloc[j]
                    a_coord = np.array([float(a_atom['x_coor']), float(a_atom['y_coor']), float(a_atom['z_coor'])]) 
                    b_coord = np.array([float(b_atom['x_coor']), float(b_atom['y_coor']), float(b_atom['z_coor'])])
                    #calculate the euclidean distance and heaviside step value:
                    sum_interaction += f_h_step(x=f_euclid_dist(a_coord, b_coord), a=cutoff) 
            x_vector[idx] = sum_interaction
            idx+=1
            print(x_vector)
    return x_vector




curr_time = time.time()

'''x_vector = np.zeros(len(atom_types)**2)
length = len(l_df)
for i in range(length):
    for j in range(length):
        if j>i:
            #sum each chain interaction values:
            print('protein chain :', i, j)
            x_vector += protein_interaction(l_df[i], l_df[j], atom_types)'''

print('value of x vector (R^N) = ', x_vector)
end_time = time.time()
print('time elapsed =',end_time-curr_time)

NameError: name 'x_vector' is not defined

In [26]:
'''
multiple protein data processor calculation,
do the steps of file loading until vector processing.
uses parallelization (soon)
'''

def minus(x, y):
    return x-y

list(map(minus, [1,2,3], repeat(2)))

sample_files = complex_files[0:5]

########################################

def data_processing(path,id_name, atom_types, cutoff):
    #dataframe loader:
    path_file = path+'/'+id_name
    l =[]
    with open(path_file, 'r') as f:
        for line in f:
            if line.startswith('ATOM') or line.startswith('TER'):
                clean_line = (line.rstrip()).split()
                #check for alignment mistakes within data, a row with spacing alignment error has 11 length after splitted by whitespace
                if len(clean_line) == 11:
                    #split the 2nd last column by the 4th index (this inference is according to PDB file formatting)
                    split = [clean_line[-2][:4], clean_line[-2][4:]]
                    clean_line[-2] = split[1]
                    clean_line.insert(-2, split[0])
                l.append(clean_line)
    df_atoms = (pd.DataFrame(l)).rename(columns={0:'record', 6:'x_coor', 7:'y_coor', 8:'z_coor', 11:'atom_type'})
    
    #dataframe splitter:
    l_df = []
    last_idx = 0
    for idx in df_atoms.index[df_atoms['record'] == 'TER'].tolist():
        l_df.append(df_atoms.iloc[last_idx:idx])
        last_idx = idx+1
        
    #vector calculation:
    x_vector = np.zeros(len(atom_types)**2)
    length = len(l_df)
    for i in range(length):
        for j in range(length):
            if j>i:
                #sum each chain interaction values:
                print('protein chain :', i, j)
                x_vector += protein_interaction(l_df[i], l_df[j], atom_types, cutoff)
    return {'id':id_name, 'x_vector':x_vector}


path = 'C:/Users/beryl/Documents/Computational Science/Kanazawa/Thesis/Dataset/PP'
id_file = complex_files[2]
atom_types = ['C','N','O','F','P','S','Cl','Br','I']
cutoff = 12

print(complex_files)
curr_time = time.time()
x_vector = data_processing(path, id_file, atom_types, cutoff)
print('value of x vector (R^N) = ', x_vector)
end_time = time.time()
print('time elapsed =',end_time-curr_time,'seconds')


['1a22.ent.pdb', '1a2k.ent.pdb', '1a3b.ent.pdb', '1a4y.ent.pdb', '1acb.ent.pdb', '1ahw.ent.pdb', '1ak4.ent.pdb', '1akj.ent.pdb', '1an1.ent.pdb', '1atn.ent.pdb', '1ava.ent.pdb', '1avg.ent.pdb', '1avx.ent.pdb', '1avz.ent.pdb', '1axi.ent.pdb', '1ay7.ent.pdb', '1azz.ent.pdb', '1b27.ent.pdb', '1b2s.ent.pdb', '1b2u.ent.pdb', '1b3s.ent.pdb', '1b6c.ent.pdb', '1bj1.ent.pdb', '1bql.ent.pdb', '1brs.ent.pdb', '1bth.ent.pdb', '1buh.ent.pdb', '1bvk.ent.pdb', '1bvn.ent.pdb', '1c9p.ent.pdb', '1c9t.ent.pdb', '1ca0.ent.pdb', '1cbw.ent.pdb', '1clv.ent.pdb', '1cmx.ent.pdb', '1cn4.ent.pdb', '1cz8.ent.pdb', '1d6r.ent.pdb', '1de4.ent.pdb', '1dee.ent.pdb', '1dfj.ent.pdb', '1dhk.ent.pdb', '1dit.ent.pdb', '1djs.ent.pdb', '1dp5.ent.pdb', '1dpj.ent.pdb', '1dqj.ent.pdb', '1du3.ent.pdb', '1dzb.ent.pdb', '1e3u.ent.pdb', '1e4k.ent.pdb', '1e6e.ent.pdb', '1e6j.ent.pdb', '1e96.ent.pdb', '1eaw.ent.pdb', '1eer.ent.pdb', '1ees.ent.pdb', '1efn.ent.pdb', '1efx.ent.pdb', '1eja.ent.pdb', '1emv.ent.pdb', '1eo8.ent.pdb', '1es0.e

[9766.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[9766. 2688.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[9766. 2688.

[9766. 2688. 2506.    0.    0.  122.    0.    0.    0. 2392.  674.  631.
    0.    0.   31.    0.    0.    0. 3023.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[9766. 2688. 2506.    0.    0.  122.    0.    0.    0. 2392.  674.  631.
    0.    0.   31.    0.    0.    0. 3023.  856.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[9766. 2688.

    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[9766. 2688. 2506.    0.    0.  122.    0.    0.    0. 2392.  674.  631.
    0.    0.   31.    0.    0.    0. 3023.  856.  797.    0.    0.   36.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[9766. 2688. 2506.    0.    0.  122.    0.    0.    0. 2392.  674.  631.
    0.    0.   31.    0.    0.    0. 3023.  856.  797.    0.    0.   36.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0

[4088.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[4088.  645.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[4088.  645.

[4088.  645. 1112.    0.    0.    0.    0.    0.    0. 1144.  180.  323.
    0.    0.    0.    0.    0.    0. 1069.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[4088.  645. 1112.    0.    0.    0.    0.    0.    0. 1144.  180.  323.
    0.    0.    0.    0.    0.    0. 1069.  177.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[4088.  645.

    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[4088.  645. 1112.    0.    0.    0.    0.    0.    0. 1144.  180.  323.
    0.    0.    0.    0.    0.    0. 1069.  177.  289.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.   48.    8.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.]
[4088.  645. 1112.    0.    0.    0.    0.    0.    0. 1144.  180.  323.
    0.    0.    0.    0.    0.    0. 1069.  177.  289.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.   48.    8.   15.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
    0.    0.

In [23]:
'''
multiprocessing on interaction calculation
'''
def protein_interaction_mp(df_protein_A, df_protein_B, atom_types, cutoff):
    type_len = len(atom_types)
    x_vector = np.zeros(type_len**2)
    idx = 0
    for a_type in atom_types:
        for b_type in atom_types:
            #calculate the interaction of each atoms:
            sum_interaction = 0
            a_atoms = df_protein_A.loc[df_protein_A['atom_type'] == a_type].to_dict('records')
            b_atoms = df_protein_B.loc[df_protein_B['atom_type'] == b_type].to_dict('records')
            a_coords = np.array([[a_atom['x_coor'], a_atom['y_coor'], a_atom['z_coor']] for a_atom in a_atoms], dtype=float)
            b_coords = np.array([[b_atom['x_coor'], b_atom['y_coor'], b_atom['z_coor']] for b_atom in b_atoms], dtype=float)
            
            
            for i in range(a_atoms.shape[0]):
                for j in range(b_atoms.shape[0]):
                    #get the (x,y,z):
                    a_atom = a_atoms.iloc[i]
                    b_atom = b_atoms.iloc[j]
                    a_coord = np.array([float(a_atom['x_coor']), float(a_atom['y_coor']), float(a_atom['z_coor'])]) 
                    b_coord = np.array([float(b_atom['x_coor']), float(b_atom['y_coor']), float(b_atom['z_coor'])])
                    #calculate the euclidean distance and heaviside step value:
                    sum_interaction += f_h_step(x=f_euclid_dist(a_coord, b_coord), a=cutoff) 
            x_vector[idx] = sum_interaction
            idx+=1
            print(x_vector)
    return x_vector

def f_euc_mp(params):
    return np.linalg.norm(params[0]-params[1])

a = l_df[0].loc[l_df[0]['atom_type'] == 'C'].to_dict('records')
b = l_df[1].loc[l_df[1]['atom_type'] == 'C'].to_dict('records')
a_c = np.array([[a_['x_coor'], a_['y_coor'], a_['z_coor']] for a_ in a], dtype=float)
b_c = np.array([[b_['x_coor'], b_['y_coor'], b_['z_coor']] for b_ in b], dtype=float)
paramlist = list(product(a_c, b_c))
pool = multiprocessing.Pool(processes=6)

start_time = time.time()
euclid_dists = map(f_euc_mp, paramlist)

#print(list(euclid_dists)[0:100], len(paramlist))
a = list(euclid_dists)
print(np.array(a).shape)
end_time = time.time()
print('time elapsed =',end_time-start_time,'seconds')

(180320,)
time elapsed = 1.0159235000610352 seconds


In [None]:
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""

#!/usr/bin/env python3
import itertools
import multiprocessing
#Generate values for each parameter
a = range(10)
b = range(10)
c = range(10)
d = range(10)

#Generate a list of tuples where each tuple is a combination of parameters.
#The list will contain all possible combinations of parameters.
paramlist = list(itertools.product(a,b,c,d))

#A function which will process a tuple of parameters
def func(params):
  a = params[0]
  b = params[1]
  c = params[2]
  d = params[3]
  return a*b*c*d

if __name__ == '__main__':
    #Generate processes equal to the number of cores
    pool = multiprocessing.Pool()
    
    #Distribute the parameter sets evenly across the cores
    res  = pool.map(func,paramlist)
    print(res)