In [1]:
import sys, time
import pickle
import sklearn.preprocessing
from scipy import stats
import pandas as pd
import numpy as np
from _plotly_future_ import v4_subplots
import plotly.graph_objs as go
import plotly.io as pio
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.figure_factory as ff
plotly.io.orca.config.executable = '/anaconda2/envs/pytorch/bin/orca'
init_notebook_mode(connected=True)

glob_layout = go.Layout(
    font=dict(family='Helvetica', size=24, color='black'),
    margin=dict(l=100, r=10, t=10, b=100),
    xaxis=dict(showgrid=False,  zeroline=False, ticks="inside", showline=True,
               tickwidth=3, linewidth=3, ticklen=10,
               mirror="allticks", color="black"),
    yaxis=dict(showgrid=False,  zeroline=False, ticks="inside", showline=True,
               tickwidth=3, linewidth=3, ticklen=10,
               mirror="allticks", color="black"),
    legend_orientation="v",
)

In [2]:
from perato_utils import is_pareto, area_under_pareto
from plot_utils import *
from gpr import process_generation_2DEI, gp_predict
from ei import getEiVec2D_aug, getPiVec2D_aug, get_ei_samples_kmedoids

In [3]:
fnames = pickle.load(open('fnames.pkl', 'rb'))
# df = pd.read_json('df_RAC155_homogap.json', orient='records', lines=True)
df = pickle.load(open('df_RAC155_homogap_all_1strow.pkl', 'rb'), encoding='latin1')
df = df.dropna(subset=['alphaHOMO', 'gap'])
whole_percentage = 100
df = df.sample(n=int(whole_percentage*0.01*len(df)), random_state=0)
df = df.reset_index()
df = df.replace('undef', np.nan)
df['charge'] = df['ligcharge'].values + df['ox'].values
df['natom'] = [len(row['init_geo'].split('\n')) for idx, row in df.iterrows()]

In [4]:
homo_scaled = []
for idx, row in df.iterrows():
    c = row['charge']
    if c >= -1:
        homo_scaled.append(row['alphaHOMO']/(abs(c+1)+1))
    else:
        homo_scaled.append(-row['alphaHOMO']/(abs(c+1)+1))
df['homo_scaled'] = homo_scaled
df['gap_scaled'] = df['gap'].values*np.sqrt(df['natom'].values)
df = df[df["ligstr"]!="DEJBIF"]

In [5]:
df_good = df[(df['geo_flag'] == 1) & (df['ss_flag'] == 1) & df['converged']==True]
df_conv = df[df['converged']==True]
print(len(df_good), len(df_conv), len(df))

21544 28535 28535


In [6]:
y1l = 'alphaHOMO'
y2l = 'gap'
# y1l = 'homo_scaled'
# y2l = 'gap_scaled'

In [None]:
pareto_inds, pareto_points = is_pareto(points=df_good[[y1l, y2l]].values)

In [None]:
df_pareto = df_good.iloc[pareto_inds]

In [None]:
df_pareto

In [None]:
trace0 = go.Scatter(
    x=df_pareto[y1l],
    y=df_pareto[y2l],
    text=df_pareto['unique_name'],
    mode='markers',
    opacity=1,
    marker=dict(
        size=8,
        color=df_pareto['ox'],
        colorscale='Picnic',
        colorbar=dict(
            title="ox"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "alphaHOMO"})
layout["yaxis"].update({'title': "gap"})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
iplot(fig)
# pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/pareto_front_colored_ox.pdf')

In [None]:
trace0 = go.Scatter(
    x=df_pareto[y1l],
    y=df_pareto[y2l],
    text=df_pareto['unique_name'],
    mode='markers',
    opacity=1,
    marker=dict(
        size=8,
        color=df_pareto['ligcharge'],
        colorscale='Picnic',
        colorbar=dict(
            title="ligcharge"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "alphaHOMO"})
layout["yaxis"].update({'title': "gap"})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
iplot(fig)
# pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/pareto_front_colored_ligcharge.pdf')

In [None]:
trace0 = go.Scatter(
    x=df_pareto[y1l],
    y=df_pareto[y2l],
    text=df_pareto['unique_name'],
    mode='markers',
    opacity=1,
    marker=dict(
        size=8,
        color=df_pareto['ligcharge'].values + df_pareto['ox'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="charge"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "alphaHOMO"})
layout["yaxis"].update({'title': "gap"})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
iplot(fig)
# pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/pareto_front_colored_charge.pdf')

In [None]:
trace0 = go.Scatter(
    x=df_pareto[y1l],
    y=df_pareto[y2l],
    text=df_pareto['unique_name'],
    mode='markers',
    opacity=1,
    marker=dict(
        size=8,
        color=df_pareto['natom'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="natom"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "alphaHOMO"})
layout["yaxis"].update({'title': "gap"})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
iplot(fig)
# pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/pareto_front_colored_charge.pdf')

In [None]:
trace0 = go.Scatter(
    x=df_good[y1l].values,
    y=df_good[y2l].values,
    mode='markers',
    opacity=1,
    marker=dict(
        size=4,
        color=df_good['ox'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="ox"
        ),
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.05, 0]})
layout["yaxis"].update({'title': y2l, "range": [0, 0.35]})
layout.legend.update(x=0, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(height=500, width=600, showlegend=False)

fig = dict(data=data, layout=layout)
# iplot(fig)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_good_colored_ox.pdf')

In [None]:
trace0 = go.Scatter(
    x=df_good[y1l].values,
    y=df_good[y2l].values,
    mode='markers',
    opacity=1,
    marker=dict(
        size=4,
        color=df_good['ligcharge'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="ligcharge"
        ),
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.05, 0]})
layout["yaxis"].update({'title': y2l, "range": [0, 0.35]})
layout.legend.update(x=0, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(height=500, width=600, showlegend=False)

fig = dict(data=data, layout=layout)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_good_colored_ligcharge.pdf')

In [None]:
_df_good = df_good[df_good['charge'] <5]
trace0 = go.Scatter(
    x=_df_good[y1l],
    y=_df_good[y2l],
    mode='markers',
    opacity=1,
    marker=dict(
        size=4,
        color=_df_good['charge'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="charge"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.05, 0]})
layout["yaxis"].update({'title': y2l, "range": [0, 0.35]})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_good_colored_charge.pdf')

In [18]:
import plotly.figure_factory as ff

hist_data, group_labels = [], []
for c in range(-4, 5):
    if len(df_good[df_good['charge'] == c]) > 10:
        if c >= -1:
            hist_data += [(df_good[df_good['charge'] == c]['alphaHOMO'].values)/(abs(c+1)+1)]
        else:
            hist_data += [-(df_good[df_good['charge'] == c]['alphaHOMO'].values)/(abs(c)+2)]
        group_labels += ['charge=%d'%c] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels, show_hist=False)
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "(2H(-1)-1)HOMO/sqrt(|charge+1|+1)"})
layout["yaxis"].update({'title': 'frequency'})
fig.update(layout=layout)
fig.show()
# pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/distribution_HOMO_(2H(-1)-1)HOMO_div_sqrt(|charge+1|+1).pdf')

In [11]:
import plotly.figure_factory as ff

hist_data, group_labels = [], []
for c in range(-4, 5):
    hist_data += [(df_good[df_good['charge'] == c]['alphaHOMO'].values+0.2)]
    group_labels += ['charge=%d'%c] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels, show_hist=False)
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "HOMO"})
layout["yaxis"].update({'title': 'frequency'})
fig.update(layout=layout)
fig.show()
# pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/distribution_HOMO.pdf')

In [None]:
homo_scaled = []
for idx, row in df.iterrows():
    c = row['charge']
    if c >= -1:
        homo_scaled.append(row['alphaHOMO']/(abs(c+1)+1))
    else:
        homo_scaled.append(-row['alphaHOMO']/(abs(c+1)+1))
df['homo_scaled'] = homo_scaled

In [None]:
data = []
for ii in list(range(-4, 5)):
    trace0 = go.Scatter(
        x=df_good[df_good['charge']==ii]['homo_scaled'],
        y=df_good[df_good['charge']==ii]['gap_scaled'],
        mode='markers',
        opacity=1,
        name="c=%d"%(ii),
        marker=dict(
            size=4,
            symbol='circle',
        ),
    )
    data += [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l,})
layout["yaxis"].update({'title': y2l})
layout.legend.update(x=0.8, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=True)
fig = dict(data=data, layout=layout)
iplot(fig)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/scaledprop_space_good_colored_charge.pdf')

In [None]:
import plotly.figure_factory as ff

df_good['gap_scaled'] = df_good['gap'].values*np.sqrt(df_good['natom'].values)
hist_data, group_labels = [], []
for c in range(11):
    _df = df_good[(df_good['natom'] > 20* c) & (df_good['natom'] <= 20* (c+1))]
    hist_data += [_df['gap_scaled'].values]
    group_labels += ['size=[%d, %d]'%(20*c, 20*(c+1))] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels, show_hist=False)
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "gap"})
layout["yaxis"].update({'title': 'frequency'})
fig.update(layout=layout)
# fig.show()
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/distribution_gap_div_sqrt(natom).pdf')

In [None]:
import plotly.figure_factory as ff

hist_data, group_labels = [], []
for c in range(11):
    _df = df_good[(df_good['natom'] > 20* c) & (df_good['natom'] <= 20* (c+1))]
    hist_data += [_df['gap'].values]
    group_labels += ['size=[%d, %d]'%(20*c, 20*(c+1))] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels, show_hist=False)
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': "gap"})
layout["yaxis"].update({'title': 'frequency'})
fig.update(layout=layout)
# fig.show()
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/distribution_gap.pdf')

In [None]:
_df = df[(df['tag']!='rfb') ]
trace0 = go.Scatter(
    x=_df[y1l],
    y=_df[y2l],
    mode='markers',
    opacity=1,
    marker=dict(
        size=4,
        color=_df['ox'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="ox"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.05, 0]})
layout["yaxis"].update({'title': y2l, "range": [0, 0.35]})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
# iplot(fig)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_good_colored_ox_nonrfb.pdf')

In [None]:
_df = df[(df['tag']!='rfb') ]
_df = _df[_df['charge'] <5]
trace0 = go.Scatter(
    x=_df[y1l],
    y=_df[y2l],
    mode='markers',
    opacity=1,
    marker=dict(
        size=4,
        color=_df['charge'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="charge"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.05, 0]})
layout["yaxis"].update({'title': y2l, "range": [0, 0.35]})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
# iplot(fig)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_good_colored_charge_nonrfb.pdf')

In [None]:
df_all= pickle.load(open('df_RAC155_homogap_all.pkl', 'rb'), encoding='latin1')
df_all['metal_row'] = [1 if x in ['cr', 'mn', 'fe', 'co'] else 2 for x in df_all['metal'].values]
df_allconv = df_all[df_all['converged']==True]
df_allconv['charge'] = df_allconv['ligcharge'].values + df_allconv['ox'].values 

In [None]:
np.unique(df_all['metal'].values)

In [None]:
df_allconv["flag_score"] = df_allconv['geo_flag'].values + df_allconv['ss_flag'].values 

In [None]:
len(df_allconv[df_allconv['metal_row']==2])

In [None]:
len(df_allconv)

In [None]:
# df_allconv.to_csv("df_allconv.csv", index=False)

In [None]:
trace0 = go.Scatter(
    x=df_allconv[y1l],
    y=df_allconv[y2l],
    mode='markers',
    opacity=1,
    marker=dict(
        size=4,
        color=df_allconv['charge'].values,
        colorscale='Picnic',
        colorbar=dict(
            title="charge"
        ),
        symbol='circle',
    ),
)
data = [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.05, 0]})
layout["yaxis"].update({'title': y2l, "range": [0, 0.35]})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=600, showlegend=False)
fig = dict(data=data, layout=layout)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_converged_colored_charge.pdf')

In [None]:
color_list = ['blue', 'green']
data = []
for ii in list(range(2)):
    trace0 = go.Scatter(
        x=df_allconv[df_allconv['metal_row']==ii+1][y1l],
        y=df_allconv[df_allconv['metal_row']==ii+1][y2l],
        mode='markers',
        opacity=1,
        name="metal-row=%d"%(ii+1),
        marker=dict(
            size=4,
            color=color_list[ii],
            symbol='circle',
        ),
    )
    data += [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.06, 0.01]})
layout["yaxis"].update({'title': y2l, "range": [-0.01, 0.4]})
layout.legend.update(x=0.5, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=500, showlegend=True)
fig = dict(data=data, layout=layout)
# iplot(fig)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_converged_colored_metalrow.pdf')

In [None]:
color_list = ['red', 'orange', 'green']
data = []
for ii in list(range(2, -1, -1)):
    trace0 = go.Scatter(
        x=df_allconv[df_allconv['flag_score']==ii][y1l],
        y=df_allconv[df_allconv['flag_score']==ii][y2l],
        mode='markers',
        opacity=1,
        name=ii,
        marker=dict(
            size=4,
            color=color_list[ii],
            symbol='circle',
        ),
    )
    data += [trace0]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.06, 0.01]})
layout["yaxis"].update({'title': y2l, "range": [-0.01, 0.4]})
layout.legend.update(x=0.8, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=500, showlegend=True)
fig = dict(data=data, layout=layout)
# iplot(fig)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/space_converged_colored_flagstatus.pdf')

In [None]:
_df = df_allconv[df_allconv['flag_score'].isin([0,1,2])].dropna()
pareto_inds_all, pareto_points_all = is_pareto(points=df_allconv[df_allconv['flag_score'].isin([0,1,2])].dropna()[[y1l, y2l]].values)
len(_df), len(df_allconv[df_allconv['flag_score'].isin([0,1,2])])

In [None]:
df_convall_pareto = _df.iloc[pareto_inds_all]

In [None]:
trace0 = go.Scatter(
    x=df_pareto[y1l],
    y=df_pareto[y2l],
    text=df_pareto['unique_name'],
    mode='markers',
    opacity=1,
    marker=dict(
        size=5,
        color='green',
        symbol='circle',
    ),
)
pp = get_front_line_points(pareto_points)
trace1 = go.Scatter(
    x=pp[:, 0],
    y=pp[:, 1],
    mode='lines',
    opacity=1,
    line=dict(color='green', width=2, ),
)
trace2 = go.Scatter(
    x=pareto_points_all[:,0],
    y=pareto_points_all[:,1],
    text=df_convall_pareto['unique_name'],
    mode='markers',
    opacity=1,
    marker=dict(
        size=5,
        color='red',
        symbol='circle',
    ),
)
pp = get_front_line_points(pareto_points_all)
trace3 = go.Scatter(
    x=pp[:, 0],
    y=pp[:, 1],
    mode='lines',
    opacity=1,
    line=dict(color='red', width=2, dash='dash'),
)
data = [trace0, trace1, trace2, trace3]
layout = go.Layout()
layout.update(glob_layout)
layout["xaxis"].update({'title': y1l, "range": [-1.07, 0.01]})
layout["yaxis"].update({'title': y2l, "range": [-0.01, 0.35]})
layout.legend.update(x=0.6, y=1.0, bgcolor='rgba(0,0,0,0)')
layout.update(xaxis=dict(), yaxis=dict())
layout.update(height=500, width=500, showlegend=False)
fig = dict(data=data, layout=layout)
pio.write_image(fig, '2DEI_alphaHOMO_gap/demo/Pareto_fronts/pareto_front_good&converged.pdf')