In [3]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
os.chdir('..')

In [2]:
import pandas as pd
import numpy as np
import pickle
from modules.functions import pymatgen_comp, data_generator_vec, check_cuda
from modules.pytorch_models import Generator, Discriminator
from modules.trained_models import get_uts_without_grain
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
from sklearn.neighbors import KernelDensity
import torch.nn as nn
import tqdm
import seaborn as sns
import umap
import json

2023-01-20 01:29:47.319525: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [3]:
el_list_loc = 'misc/element_order_uts.pkl'
with open(el_list_loc,'rb') as fid:
    el_list = pickle.load(fid)

with open('misc/scaler_y_multi.pkl','rb') as fid:
    scaler = pickle.load(fid)

# GAN

In [4]:
dataset = pd.read_csv('dataset/synthetic_dataset.csv', index_col = 0)
to_train_df = dataset[dataset['uts1200C']>0].copy()
comp_dset = data_generator_vec(to_train_df['Composition'], el_list=el_list)
vec_comps = comp_dset.real_data
properties = ['uts1200C','price','density']

y = to_train_df.loc[:,properties].values.reshape(-1,len(properties)).astype('float32')
y_scaled = scaler.transform(y)
kde = KernelDensity(kernel='gaussian',bandwidth=0.5)
v = kde.fit(y_scaled)

In [5]:
def prop_sampler(n_samples):
    return kde.sample(n_samples).astype('float32')

def noise_sampler(N, z_dim):
    return np.random.normal(size=[N, z_dim]).astype('float32')


def plot(points,dset, title,  thresh=0.01):

    fig, ax = plt.subplots(2, 2, figsize=(8, 8))
    ax = ax.flatten()
    ax[0].set_title(title)
    ax[0].violinplot(points, np.arange(points.shape[1]))
    ax[0].set_xticks(np.arange(dset.size))
    ax[0].set_xticklabels(dset.elements)
    s = points.sum(axis=1)
    if np.max(s) - np.min(s) < 0.010:
        ax[1].hist(s, np.linspace(0.990, 1.010, 6))
    else:
        ax[1].hist(s)
    ax[2].hist(points.flatten())
    ne = (points > thresh).sum(axis=1)
    ax[3].hist(ne, np.arange(0, 12), width=0.80)
    plt.show()
    plt.close()


In [6]:
batch_size =1000
minibatch_size = batch_size
prop_dim = y_scaled.shape[1]
latent_dim = 4
gen_in = latent_dim + prop_dim
hidden = gen_in*2
n_hidden = 3
gen_out = comp_dset.size
cuda = check_cuda()

  return torch._C._cuda_getDeviceCount() > 0


In [None]:
model_path = 'saved_models'
if os.path.exists(os.path.join(model_path, 'GAN_generator_multi.pt')):
    generator = torch.jit.load(os.path.join(model_path, 'GAN_generator_multi_test.pt'), map_location='cpu')
else:
    print('train model first!')

In [None]:
import json
from modules.mongodb_rom import calculate_density, calculate_price
from modules.functions import get_comp
with open('misc/starting_comp.json','r') as fid:
    start_comp_dict = json.load(fid)

start_comp = start_comp_dict['start_comp']
start_comp_decoded = get_comp(np.array(start_comp), el_list)
#pmg_start_comp = pymatgen_comp([start_comp_decoded])[0]
start_uts = start_comp_dict['start_uts']
start_price = calculate_price(start_comp_decoded)
start_density = calculate_density(start_comp_decoded)

In [None]:
lc = noise_sampler(1,4)
c_unscaled = np.array([1.4, 31.54, 8.24]).reshape(1,-1)
c_scaled = scaler.transform(c_unscaled)
c = (np.ones((1,3))*c_scaled).astype('float32')


tc_t = torch.from_numpy(lc)
c = torch.from_numpy(c)
g_fake = generator(tc_t,c).to('cpu').detach().numpy()

In [None]:
get_uts_without_grain(g_fake[0].reshape(1,-1)).item(), calculate_price(get_comp(g_fake[0], el_list)), calculate_density(get_comp(g_fake[0], el_list))

In [None]:
from modules.mongodb_rom import calculate_entropy_mixing
t = get_comp(g_fake[0], el_list)
t,calculate_entropy_mixing(t)

In [None]:
decoded_comp = [get_comp(x, el_list) for x in g_fake]

json_dict = {
    'decoded_comp': decoded_comp[0].reduced_formula,
    'start_comp' : g_fake[0].tolist(),
    'start_uts' : get_uts_without_grain(g_fake[0].reshape(1,-1)).item(),
    'start_w_content' : g_fake[0][-1],
}

with open('misc/starting_comp_multi.json','wb') as fid:
    pickle.dump(json_dict,fid)