In [1]:
import pandas as pd
import numpy as np

#import rdkit
#from rdkit import Chem

import biopandas
from biopandas.pdb import PandasPdb
from biopandas.mol2 import PandasMol2

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import sys
from collections import Counter
import time

In [2]:
ANION_MOLECULE = "T2N"
ANION_ATOM_COUNT = 15
ANION_weight = 139.22

CATION_MOLECULE = "BMI"
CATION_ATOM_COUNT = 25
CATION_weight = 281.16

WATER_MOLECULE = "T5P"
WATER_ATOM_COUNT = 3
WATER_weight = 18.02


SODIUM_ATOM = "NA"
CHLORIDE_ATOM = "CL"

SURFACE_RANGE = 5
DIGGER_RANGE = 6

In [3]:
INPUT_FILE = "./FILE/BOX_Bmim_Tf2N_0_5M_all_components_amorphous_-_Frame_51.mol2"
INPUT_FILE_PATH = "./FILE/BOX_Bmim_Tf2N_0_5M_all_components_amorphous_-_Frame_51.mol2"

In [4]:
cnt = 0
find_word = "@<TRIPOS>BOND"
with open(INPUT_FILE, 'r') as file:    # hello.txt 파일을 읽기 모드(r)로 열기
    line = None    # 변수 line을 None으로 초기화
    while line != '':
        line = file.readline()
        cnt += 1
        if "@<TRIPOS>BOND" in line:
            FN = cnt
            
        if "@<TRIPOS>SUBSTRUCTURE" in line:
            EN = cnt
#===============================================================================           
f = open(INPUT_FILE_PATH, 'r')
F = f.readlines()[FN:EN-1]
Bond_list = []
for i in range(EN-FN-1):
    DATA = F[int(i)].strip().split()
    Bond_list.append(DATA)
#===============================================================================  
column_name=['Bond_Index', 'Bond_Atom1', 'Bond_Atom2', 'Bond_Case']
Bond_Data_Frame = pd.DataFrame(Bond_list, columns = column_name)
#===============================================================================
Bond_Data_Frame = Bond_Data_Frame.astype({'Bond_Atom1': int, 'Bond_Atom2': int})

In [5]:
pmol = PandasMol2().read_mol2(INPUT_FILE_PATH)  ### input
Total_system = pmol.df

In [6]:
condition = (pmol.df.subst_name == CATION_MOLECULE) 
Data_Selecte_subst_CATION = pmol.df[condition]
Data_Selecte_subst_CATION_index = pmol.df[condition].index

condition = (pmol.df.subst_name == ANION_MOLECULE) 
Data_Selecte_subst_ANION = pmol.df[condition]
Data_Selecte_subst_ANION_index = pmol.df[condition].index

condition = (pmol.df.subst_name == WATER_MOLECULE) 
Data_Selecte_subst_WATER = pmol.df[condition]
Data_Selecte_subst_WATER_index = pmol.df[condition].index

condition = (pmol.df.subst_name == SODIUM_ATOM) 
Data_Selecte_subst_NA = pmol.df[condition]
Data_Selecte_subst_NA_index = pmol.df[condition].index


condition = (pmol.df.subst_name == CHLORIDE_ATOM) 
Data_Selecte_subst_CL = pmol.df[condition]
Data_Selecte_subst_CL_index = pmol.df[condition].index

In [7]:
CATION_Residue_LIST = []
for i in range(1, int(len(Data_Selecte_subst_CATION_index)/CATION_ATOM_COUNT)+1):
    ATOM_COUNT = 0  ## while 반복문 초기화
    while ATOM_COUNT < 25:    ## 수치 적정 변수로 생각하기
        CATION_Residue_LIST.append("CATION_Res_"+str(i))
        ATOM_COUNT = ATOM_COUNT+1
        
        
ANION_Residue_LIST = []
for i in range(1, int(len(Data_Selecte_subst_ANION_index)/ANION_ATOM_COUNT)+1):
    ATOM_COUNT = 0  ## while 반복문 초기화
    while ATOM_COUNT < 15:    ## 수치 적정 변수로 생각하기
        ANION_Residue_LIST.append("ANION_Res_"+str(i))
        ATOM_COUNT = ATOM_COUNT+1       
        
        
WATER_Residue_LIST = []
for i in range(1, int(len(Data_Selecte_subst_WATER_index)/WATER_ATOM_COUNT)+1):
    ATOM_COUNT = 0  ## while 반복문 초기화
    while ATOM_COUNT < 3:    ## 수치 적정 변수로 생각하기
        WATER_Residue_LIST.append("WATER_Res_"+str(i))
        ATOM_COUNT = ATOM_COUNT+1        
        
        
NA_Residue_LIST = []
for i in range(1, int(len(Data_Selecte_subst_NA_index)/1)+1):
    ATOM_COUNT = 0  ## while 반복문 초기화
    while ATOM_COUNT < 1:    ## 수치 적정 변수로 생각하기
        NA_Residue_LIST.append("NA_Res_"+str(i))
        ATOM_COUNT = ATOM_COUNT+1        
        
        
CL_Residue_LIST = []
for i in range(1, int(len(Data_Selecte_subst_CL_index)/1)+1):
    ATOM_COUNT = 0  ## while 반복문 초기화
    while ATOM_COUNT < 1:    ## 수치 적정 변수로 생각하기
        CL_Residue_LIST.append("CL_Res_"+str(i))
        ATOM_COUNT = ATOM_COUNT+1        

In [8]:
SYSTEM_Reside_LIST = CATION_Residue_LIST + ANION_Residue_LIST + WATER_Residue_LIST + NA_Residue_LIST + CL_Residue_LIST

In [9]:
SYSTEM_Reside_DF = pd.DataFrame(SYSTEM_Reside_LIST)
SYSTEM_Reside_DF.columns=["Residue_number"]

In [10]:
Total_system_SYSTEM_Reside_DF = pd.concat([Total_system,SYSTEM_Reside_DF],axis=1)

In [11]:
### 1. 특정 ATOM을 선택하면 해당 index를 뽑고 해당 Residue_number를 추출한다.
### 2. 해당 Residue_number에 해당하는 분자를 찾는다.

In [12]:
len(Total_system_SYSTEM_Reside_DF)

42660

In [13]:
len(Total_system_SYSTEM_Reside_DF[(Total_system_SYSTEM_Reside_DF['subst_name'] == CATION_MOLECULE)])

9675

## Define Cluster

#### step 1. 선택한 molecule의 인근 (3A) ATOM 선택하기

In [14]:
condition = (pmol.df.subst_name == ANION_MOLECULE)
Data_Selecte_subst = pmol.df[condition]
Data_Selecte_subst_index = pmol.df[condition].index

In [15]:
start = time.time()

find_range = 3    ### input
list_set = []
list_set_array = np.array([], dtype= int)


for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system_SYSTEM_Reside_DF[(Total_system_SYSTEM_Reside_DF['x'] <= sq1_range["x"]) &
                                                       (Total_system_SYSTEM_Reside_DF['x'] >= sq3_range["x"]) &
                                                       (Total_system_SYSTEM_Reside_DF['y'] <= sq1_range["y"]) &
                                                       (Total_system_SYSTEM_Reside_DF['y'] >= sq3_range["y"]) &
                                                       (Total_system_SYSTEM_Reside_DF['z'] <= sq1_range["z"]) &
                                                       (Total_system_SYSTEM_Reside_DF['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List = Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()
        
print("time :",time.time() - start)

time : 16.0209538936615


#### step 2. 선택한 molecule에서 Cation Residue만 선택하기

In [16]:
# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

#### step 3. 선택한 molecule의 인근 (3A) ATOM 선택하기

In [17]:
Data_Selecte_subst = result[result['subst_name'] == CATION_MOLECULE]

In [18]:
Data_Selecte_subst_index = result[result['subst_name'] == CATION_MOLECULE].index

In [19]:
start = time.time()

find_range = 3    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()

print("time :",time.time() - start)

time : 27.240888833999634


In [20]:
# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

#### step 4. IL Cluster Define

In [21]:
IL_Cluster = result[(result['subst_name'] == CATION_MOLECULE)|(result['subst_name'] == ANION_MOLECULE)]

In [22]:
IL_Cluster

Unnamed: 0,atom_id,atom_name,x,y,z,atom_type,subst_id,subst_name,charge,Residue_number
0,1,C,21.7125,12.9756,-21.7713,C.3,1,BMI,-0.3305,CATION_Res_1
1,2,N,22.9275,12.3355,-21.4824,N.pl3,1,BMI,0.1939,CATION_Res_1
2,3,C,22.9824,11.0176,-21.2151,C.ar,1,BMI,-0.1044,CATION_Res_1
3,4,N,24.2717,10.6997,-20.8984,N.pl3,1,BMI,0.2896,CATION_Res_1
4,5,C,25.0390,11.8696,-20.9131,C.ar,1,BMI,-0.3225,CATION_Res_1
...,...,...,...,...,...,...,...,...,...,...
15475,15476,F,18.5276,30.0993,-18.6566,F,2,T2N,-0.1518,ANION_Res_387
15476,15477,O,17.4059,33.6747,-17.9482,O.2,2,T2N,-0.6559,ANION_Res_387
15477,15478,O,16.6167,31.5197,-16.9102,O.2,2,T2N,-0.6559,ANION_Res_387
15478,15479,O,15.8857,34.0225,-20.4904,O.2,2,T2N,-0.6559,ANION_Res_387


## Select 5A Surface

#### step 1. Select 5A surface

In [23]:
Data_Selecte_subst = IL_Cluster
Data_Selecte_subst_index = IL_Cluster.index

In [24]:
start = time.time()

find_range = SURFACE_RANGE    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()

print("time :",time.time() - start)

time : 47.76913118362427


In [25]:
# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

In [26]:
IL_Cluster_surface = result[(result['subst_name'] == CATION_MOLECULE)|
                            (result['subst_name'] == ANION_MOLECULE)|
                            (result['subst_name'] == WATER_MOLECULE)|
                            (result['subst_name'] == SODIUM_ATOM)|
                            (result['subst_name'] == CHLORIDE_ATOM)]

#### step 2. Invert

In [27]:
IL_Cluster_surface_NAME = IL_Cluster_surface["Residue_number"].drop_duplicates().squeeze().to_list()

In [28]:
IL_Cluster_surface_invert = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_surface_NAME)]

#### step 3. Select 5A surface From Invert moleuce --range1

In [39]:
Data_Selecte_subst = IL_Cluster_surface_invert
Data_Selecte_subst_index = IL_Cluster_surface_invert.index

In [40]:
start = time.time()


find_range = DIGGER_RANGE    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result1 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 73.99800944328308


#### step 3. Select 5A surface From Invert moleuce --range2

In [41]:
start = time.time()


find_range = DIGGER_RANGE+1    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result2 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 85.17514133453369


#### step 3. Select 5A surface From Invert moleuce --range3

In [42]:
start = time.time()


find_range = DIGGER_RANGE+2    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result3 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 100.39217901229858


#### step 3. Select 5A surface From Invert moleuce --range4

In [43]:
start = time.time()


find_range = DIGGER_RANGE+3    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result4 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 121.33368182182312


#### step 3. Select 5A surface From Invert moleuce --range5

In [44]:
start = time.time()


find_range = DIGGER_RANGE+4    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result5 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 145.38730430603027


#### step 3. Select 5A surface From Invert moleuce --range6

In [45]:
start = time.time()


find_range = DIGGER_RANGE +5   ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result6 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 173.8938913345337


#### step 3. Select 5A surface From Invert moleuce --range7

In [46]:
start = time.time()


find_range = DIGGER_RANGE+6    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result7 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 204.4238464832306


#### step 3. Select 5A surface From Invert moleuce --range8

In [47]:
start = time.time()


find_range = DIGGER_RANGE+7   ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result8 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 239.98254680633545


#### step 3. Select 5A surface From Invert moleuce --range9

In [48]:
start = time.time()


find_range = DIGGER_RANGE+8    ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result9 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 275.80307364463806


#### step 3. Select 5A surface From Invert moleuce --range10

In [49]:
start = time.time()


find_range = DIGGER_RANGE +9   ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result10 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 324.4491653442383


#### step 3. Select 5A surface From Invert moleuce --range11

In [50]:
start = time.time()


find_range = DIGGER_RANGE+10   ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result11 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 374.7162094116211


#### step 3. Select 5A surface From Invert moleuce --range12

In [51]:
start = time.time()


find_range = DIGGER_RANGE+11   ### input
list_set = []
list_set_array = np.array([], dtype= int)

for i in range(0, len(Data_Selecte_subst_index)):
    select_dataframe = None  ## 선택한 데이터 프레임 초기화
    select_dataframe = Data_Selecte_subst.iloc[i]


    sq1_range = (select_dataframe[['x','y','z']]+find_range)
    sq3_range = (select_dataframe[['x','y','z']]-find_range)

    selected_dataframe = Total_system[(Total_system['x'] <= sq1_range["x"]) &
                                      (Total_system['x'] >= sq3_range["x"]) &
                                      (Total_system['y'] <= sq1_range["y"]) &
                                      (Total_system['y'] >= sq3_range["y"]) &
                                      (Total_system['z'] <= sq1_range["z"]) &
                                      (Total_system['z'] >= sq3_range["z"])]
                                      

    cal_x = (float(select_dataframe["x"]) - selected_dataframe["x"]).pow(2)
    cal_y = (float(select_dataframe["y"]) - selected_dataframe["y"]).pow(2)
    cal_z = (float(select_dataframe["z"]) - selected_dataframe["z"]).pow(2)
    Distance = (cal_x+cal_y+cal_z)**0.5
    
    #Distance

    Distance_index = Distance[(Distance <= find_range)].index
    Select_Atom_List=Distance_index.values.tolist()
    Select_Atom_List_NP = np.array(Select_Atom_List,dtype=int)
    
    list_set_array = np.append(list_set_array,Select_Atom_List_NP)
list_set_array= np.unique(list_set_array)
list_set = list_set_array.tolist()



# 포함하고자 하는 문자열 리스트 생성
Select_Residue_List = Total_system_SYSTEM_Reside_DF.iloc[list_set][["Residue_number"]].drop_duplicates().squeeze().to_list()

# 해당 list_set 있는 잔기를 선택
result12 = Total_system_SYSTEM_Reside_DF[Total_system_SYSTEM_Reside_DF['Residue_number'].isin(Select_Residue_List)]

print("time :",time.time() - start)

time : 432.091890335083


In [52]:
IL_Cluster_digger1 = result1[(result1['subst_name'] == CATION_MOLECULE)|
                             (result1['subst_name'] == ANION_MOLECULE)|
                             (result1['subst_name'] == WATER_MOLECULE)|
                             (result1['subst_name'] == SODIUM_ATOM)|
                             (result1['subst_name'] == CHLORIDE_ATOM)]

In [53]:
IL_Cluster_digger2 = result2[(result2['subst_name'] == CATION_MOLECULE)|
                             (result2['subst_name'] == ANION_MOLECULE)|
                             (result2['subst_name'] == WATER_MOLECULE)|
                             (result2['subst_name'] == SODIUM_ATOM)|
                             (result2['subst_name'] == CHLORIDE_ATOM)]

In [54]:
IL_Cluster_digger3 = result3[(result3['subst_name'] == CATION_MOLECULE)|
                             (result3['subst_name'] == ANION_MOLECULE)|
                             (result3['subst_name'] == WATER_MOLECULE)|
                             (result3['subst_name'] == SODIUM_ATOM)|
                             (result3['subst_name'] == CHLORIDE_ATOM)]

In [55]:
IL_Cluster_digger4 = result4[(result4['subst_name'] == CATION_MOLECULE)|
                             (result4['subst_name'] == ANION_MOLECULE)|
                             (result4['subst_name'] == WATER_MOLECULE)|
                             (result4['subst_name'] == SODIUM_ATOM)|
                             (result4['subst_name'] == CHLORIDE_ATOM)]

In [56]:
IL_Cluster_digger5 = result5[(result5['subst_name'] == CATION_MOLECULE)|
                             (result5['subst_name'] == ANION_MOLECULE)|
                             (result5['subst_name'] == WATER_MOLECULE)|
                             (result5['subst_name'] == SODIUM_ATOM)|
                             (result5['subst_name'] == CHLORIDE_ATOM)]

In [57]:
IL_Cluster_digger6 = result6[(result6['subst_name'] == CATION_MOLECULE)|
                             (result6['subst_name'] == ANION_MOLECULE)|
                             (result6['subst_name'] == WATER_MOLECULE)|
                             (result6['subst_name'] == SODIUM_ATOM)|
                             (result6['subst_name'] == CHLORIDE_ATOM)]

In [58]:
IL_Cluster_digger7 = result7[(result7['subst_name'] == CATION_MOLECULE)|
                             (result7['subst_name'] == ANION_MOLECULE)|
                             (result7['subst_name'] == WATER_MOLECULE)|
                             (result7['subst_name'] == SODIUM_ATOM)|
                             (result7['subst_name'] == CHLORIDE_ATOM)]

In [59]:
IL_Cluster_digger8 = result8[(result8['subst_name'] == CATION_MOLECULE)|
                             (result8['subst_name'] == ANION_MOLECULE)|
                             (result8['subst_name'] == WATER_MOLECULE)|
                             (result8['subst_name'] == SODIUM_ATOM)|
                             (result8['subst_name'] == CHLORIDE_ATOM)]

In [60]:
IL_Cluster_digger9 = result9[(result9['subst_name'] == CATION_MOLECULE)|
                             (result9['subst_name'] == ANION_MOLECULE)|
                             (result9['subst_name'] == WATER_MOLECULE)|
                             (result9['subst_name'] == SODIUM_ATOM)|
                             (result9['subst_name'] == CHLORIDE_ATOM)]

In [61]:
IL_Cluster_digger10 = result10[(result10['subst_name'] == CATION_MOLECULE)|
                               (result10['subst_name'] == ANION_MOLECULE)|
                               (result10['subst_name'] == WATER_MOLECULE)|
                               (result10['subst_name'] == SODIUM_ATOM)|
                               (result10['subst_name'] == CHLORIDE_ATOM)]

In [62]:
IL_Cluster_digger11 = result11[(result11['subst_name'] == CATION_MOLECULE)|
                               (result11['subst_name'] == ANION_MOLECULE)|
                               (result11['subst_name'] == WATER_MOLECULE)|
                               (result11['subst_name'] == SODIUM_ATOM)|
                               (result11['subst_name'] == CHLORIDE_ATOM)]

In [63]:
IL_Cluster_digger12 = result12[(result12['subst_name'] == CATION_MOLECULE)|
                               (result12['subst_name'] == ANION_MOLECULE)|
                               (result12['subst_name'] == WATER_MOLECULE)|
                               (result12['subst_name'] == SODIUM_ATOM)|
                               (result12['subst_name'] == CHLORIDE_ATOM)]

#### step 4. Define inside Water using invert

In [64]:
IL_Cluster_digger_NAME = IL_Cluster_digger1["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water1 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [65]:
IL_Cluster_digger_NAME = IL_Cluster_digger2["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water2 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [66]:
IL_Cluster_digger_NAME = IL_Cluster_digger3["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water3 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [67]:
IL_Cluster_digger_NAME = IL_Cluster_digger4["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water4 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [68]:
IL_Cluster_digger_NAME = IL_Cluster_digger5["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water5 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [69]:
IL_Cluster_digger_NAME = IL_Cluster_digger6["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water6 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [70]:
IL_Cluster_digger_NAME = IL_Cluster_digger7["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water7 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [71]:
IL_Cluster_digger_NAME = IL_Cluster_digger8["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water8 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [72]:
IL_Cluster_digger_NAME = IL_Cluster_digger9["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water9 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [73]:
IL_Cluster_digger_NAME = IL_Cluster_digger10["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water10 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [74]:
IL_Cluster_digger_NAME = IL_Cluster_digger11["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water11 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

In [75]:
IL_Cluster_digger_NAME = IL_Cluster_digger12["Residue_number"].drop_duplicates().squeeze().to_list()
IL_Cluster_digger_INSIDE = Total_system_SYSTEM_Reside_DF[~Total_system_SYSTEM_Reside_DF['Residue_number'].isin(IL_Cluster_digger_NAME)]
INSIDE_water12 = IL_Cluster_digger_INSIDE[(IL_Cluster_digger_INSIDE['subst_name'] == WATER_MOLECULE)]

## ANALYSIS

In [76]:
import sys
from collections import Counter

In [77]:
# (CMC) CATION_MOLECULE_COUNT
CMC = Counter(IL_Cluster['subst_name'])[CATION_MOLECULE] / CATION_ATOM_COUNT

In [78]:
# (AMC) ANION_MOLECULE_COUNT
AMC = Counter(IL_Cluster['subst_name'])[ANION_MOLECULE] / ANION_ATOM_COUNT

In [79]:
# (WMC) WATER_MOLECULE_COUNT
WMC1 = Counter(INSIDE_water1['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC2 = Counter(INSIDE_water2['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC3 = Counter(INSIDE_water3['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC4 = Counter(INSIDE_water4['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC5 = Counter(INSIDE_water5['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC6 = Counter(INSIDE_water6['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC7 = Counter(INSIDE_water7['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC8 = Counter(INSIDE_water8['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC9 = Counter(INSIDE_water9['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC10 = Counter(INSIDE_water10['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC11 = Counter(INSIDE_water11['subst_name'])['T5P'] / WATER_ATOM_COUNT
WMC12 = Counter(INSIDE_water12['subst_name'])['T5P'] / WATER_ATOM_COUNT

In [81]:
Absortion1 = (WMC1*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC1*WATER_weight))*100
Absortion2 = (WMC2*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC2*WATER_weight))*100
Absortion3 = (WMC3*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC3*WATER_weight))*100
Absortion4 = (WMC4*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC4*WATER_weight))*100
Absortion5 = (WMC5*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC5*WATER_weight))*100
Absortion6 = (WMC6*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC6*WATER_weight))*100
Absortion7 = (WMC7*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC7*WATER_weight))*100
Absortion8 = (WMC8*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC8*WATER_weight))*100
Absortion9 = (WMC9*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC9*WATER_weight))*100
Absortion10 = (WMC10*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC10*WATER_weight))*100
Absortion11 = (WMC11*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC11*WATER_weight))*100
Absortion12 = (WMC12*WATER_weight)/((CMC*CATION_weight)+(AMC*ANION_weight)+(WMC12*WATER_weight))*100


In [83]:
print(Absortion1,
      Absortion2,
      Absortion3,
      Absortion4,
      Absortion5,
      Absortion6,
      Absortion7,
      Absortion8,
      Absortion9,
      Absortion10,
      Absortion11,
      Absortion12)

4.662457000994847 3.424185397970309 2.54547067674524 2.121390454654562 1.6398668061716262 1.4243321854445723 1.2295418034100192 1.0992533007794938 0.914088424667614 0.6514956985278076 0.5086760027438016 0.2991993309309161
