### Generate concentration state set (the reaction pool) in 6D within the concentration constraints

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
from Data.datasets import save_obj, load_obj
import itertools

# listed extreme volume conditions, total volumes of each solution is 300 uL
R_vol_vertice = np.diag(np.array([300]*7))
# R1: DMF, R2:DMSO, R3:GBL, R4:morph/Pb in DMF, R5:morph in DMF, R6:FAH, R7:H2O, R8:DCM (R8 is separate vial, not counted for total solution volume)
R_vol_vertice = pd.DataFrame(columns = ['R1','R2','R3','R4','R5','R6','R7'], data = R_vol_vertice)

In [None]:
# Calculate the extreme concentration conditions resulting from extreme volume conditions.
R_conc_vertice = pd.DataFrame()

# Below are concentrations for each reagent: _a is PbI2, _b is morph, _c is solvent vol fraction (e.g. DMSO / DMSO solution)
############
R4_a = 2.32
R4_b = 2.91
R4_c = 0.494
############
R5 = 2.36
R5_c = 0.731
############

R_conc_vertice['Pb'] = (R_vol_vertice['R4']*R4_a)/300

R_conc_vertice['morph'] = (R_vol_vertice['R4']*R4_b + R_vol_vertice['R5']*R5)/300

# Total volume of solvents...it is NOT equal to the total volume of solution, which is set to 300 uL.
totvol_solv = R_vol_vertice['R1'] + R_vol_vertice['R2'] + R_vol_vertice['R3']\
              + R_vol_vertice['R4']*R4_c + R_vol_vertice['R5']*R5_c

# solvent/solvents volume fraction: e.g., vol[DMSO]/(vol)
R_conc_vertice['DMSO'] = R_vol_vertice['R2']/totvol_solv
R_conc_vertice['GBL'] = R_vol_vertice['R3']/totvol_solv

R_conc_vertice['FAH'] = ((R_vol_vertice['R6']*1.22)/46)/(300/1000)
R_conc_vertice['H2O'] = ((R_vol_vertice['R7']*0.998)/18)/(300/1000)

R_conc_vertice = R_conc_vertice.fillna(0)

In [None]:
R_vol_vertice

In [None]:
R_conc_vertice

### Calculate the convexhull

In [None]:
from scipy.spatial import ConvexHull
import matplotlib as mpl
from matplotlib.font_manager import FontProperties
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection

# Calculate the convexhull of extreme concentration conditions (R_conc_vertice)
convexhull = ConvexHull(R_conc_vertice)
# number of facets of convexhull, note: "convexhull.simplices" only gives the indice of points
n_facet = np.shape(convexhull.simplices)[0]

# Calculate the facets in the convexhull,
# and the facets is constructed as number of facets X number of points per facets X number of coordinations per points.
facets = np.zeros((n_facet,6,6))
for i in range(n_facet):
    for j in range(6):
        facets[i][j] = R_conc_vertice.iloc[convexhull.simplices[i][j]]

# Sanity check: make sure that the vertices of the convexhull are extreme concentration conditions. 
vertices = convexhull.points[convexhull.vertices] # it is np.array
vertices = pd.DataFrame(columns = R_conc_vertice.columns, data = vertices) # converted to dataframe
print('The vertices of the convexhull are extreme concentration conditions?')
all(vertices == R_conc_vertice)

### Generate the grid points

In [None]:
### Define functions to 1. generate grid points in a hypercube. 2. check if points in convexhulls.

# Generate meshgrid points of certain size and location
def gridgen(n=[10]*6,x1=[0,3],x2=[0,4],x3=[0,10],x4=[0,3],x5=[0,3],x6=[0,3]):
    a = np.linspace(x1[0],x1[1],n[0])
    b = np.linspace(x2[0],x2[1],n[1])
    c = np.linspace(x3[0],x3[1],n[2])
    d = np.linspace(x4[0],x4[1],n[3])
    e = np.linspace(x5[0],x5[1],n[4])
    f = np.linspace(x6[0],x6[1],n[5])
    
    points = np.zeros((n[0]*n[1]*n[2]*n[3]*n[4]*n[5],6))
    x1v,x2v,x3v,x4v,x5v,x6v = np.meshgrid(a,b,c,d,e,f)
    x1v = x1v.flatten()
    x2v = x2v.flatten()
    x3v = x3v.flatten()
    x4v = x4v.flatten()
    x5v = x5v.flatten()
    x6v = x6v.flatten()

    for i in tqdm(range(n[0]*n[1]*n[2]*n[3]*n[4]*n[5])):
        points[i] = [x1v[i], x2v[i], x3v[i], x4v[i], x5v[i], x6v[i]]
    return points

# Check if point is in convexhull
def point_in_hull(point, hull, tolerance = 1e-12):
    return all((np.dot(equ[:-1],point) + equ[-1] <= tolerance) for equ in hull.equations)

In [None]:
# Generate grid points in the ranges of concentration limits in a 6D space
Pb_range = [min(vertices['Pb']),max(vertices['Pb'])]
morph_range = [min(vertices['morph']),max(vertices['morph'])]
DMSO_range = [min(vertices['DMSO']),max(vertices['DMSO'])]
GBL_range = [min(vertices['GBL']),max(vertices['GBL'])]
FAH_range = [min(vertices['FAH']),max(vertices['FAH'])]
H2O_range = [min(vertices['H2O']),max(vertices['H2O'])]

FAH_conc_lim = 16 # we used the maximam concentration of formic acid for most of our study
H2O_conc_lim = 13.5 # it is approximately 25% water, more than 25% of water could result in bilayer when DCM diffuse into the solution.

n = 20 # number of interval for pb, morph, dmso, gbl, which concentrations are not constrained
n_FAH = int(FAH_range[1]/FAH_conc_lim*n) # number of interval for n_FAH
n_H2O = int(H2O_range[1]/H2O_conc_lim*n) # number of interval for n_H2O

# Generate meshgrid point
grid_points = gridgen(n=[n,n,n,n,n_FAH,n_H2O], x1 = Pb_range, x2 = morph_range, x3 = DMSO_range, \
                      x4 = GBL_range, x5 = FAH_range, x6 = H2O_range)
print("Finished generating grid points in 6D space")
print("*"*20)

In [None]:
# Search and plot meshgrid points in the convexhull
point_in_box = []

for j in tqdm(grid_points):
    if point_in_hull(j, convexhull):
        point_in_box.append(list(j))
point_in_box = np.array(point_in_box)

In [None]:
# Generate concentration state space
R_conc_statespace = pd.DataFrame(columns = R_conc_vertice.columns, data = point_in_box)
R_conc_statespace = R_conc_statespace.loc[(R_conc_statespace['morph'] != 0) & (R_conc_statespace['Pb'] != 0)]
R_conc_statespace = R_conc_statespace.loc[(R_conc_statespace['FAH'] <= FAH_conc_lim) & \
                                              (R_conc_statespace['H2O'] <= H2O_conc_lim)]

save_obj(grid_points, '8R homogeneous concentration grid points')
save_obj(R_conc_statespace, '8R homogeneous concentration statespace (Pb2, morph, H2O and FAH constrained)')

In [None]:
R_conc_statespace

### Plot the state space in 3D

In [None]:
R_conc_statespace = load_obj('8R homogeneous concentration statespace (Pb2, morph, H2O and FAH constrained)')

In [None]:
### Visualize data points in 3D space
from Models import plot
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
from Data.datasets import save_obj, load_obj
import itertools


df_plot = R_conc_statespace.filter(['DMSO','GBL','H2O'])
df_plot['crystal score'] = [5]*(R_conc_statespace.shape[0])
df_plot = df_plot.drop_duplicates()

%matplotlib notebook
plot.plot3d2d(point=np.array(df_plot), x_range = [0, 1], y_range = [0, 1], z_range = [0, 16], \
              xy_loc = -8, xz_loc = 0.5, yz_loc = -1,\
              x_step = 0.2, y_step = 0.2, z_step = 2, elev = 30, azim = -60, name = '8R_DMSO_GBL_H2O')

In [None]:
from sklearn.preprocessing import StandardScaler
# Get standardization function and save it.
std_scaler = StandardScaler().fit(R_conc_statespace)
save_obj(std_scaler,'std_scaler_8 reagent')

# Create a standardized copy of concentration state space (remove high h2o and fomic acid) for machine learning modeling
R_conc_statespace_scal = std_scaler.transform(R_conc_statespace)
R_conc_statespace_scal = pd.DataFrame(columns = R_conc_statespace.columns, \
                                      index = R_conc_statespace.index, \
                                      data = R_conc_statespace_scal)
save_obj(R_conc_statespace_scal, \
         '8R homogeneous concentration statespace_standardized (Pb2, morph, H2O and FAH constrained)')

In [None]:
(np.array(R_conc_statespace).size * np.array(R_conc_statespace).itemsize)/1024/1024