In [4]:
from torchsummary import summary
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
from utils.utils import *

In [2]:
df_train = pd.read_csv('data/training_set.csv')
df_test = pd.read_csv('data/testing_set.csv')

In [7]:
sentence_dict = []
for i, report in enumerate(df_train["Findings"]):
    _, sentence = tokenize_report(report)
    for sent in sentence:
        if sent not in sentence_dict:
            sentence_dict.append(sent)

In [18]:
##Load Model (Sbert)
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

#Our sentences we like to encode
sentences = sentence_dict
sentences.remove('')

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

In [21]:
print(embeddings.shape, len(sentences))

(5401, 384) 5401


In [20]:
writer = SummaryWriter()
writer.add_embedding(embeddings, sentences)

# TMLCC Tensorboard Example

In [15]:
df = pd.read_csv('Latent_feat_last_hope_train.csv')
df_num = pd.read_csv('clean_train.csv').set_index('MOFname')
df_topo = pd.read_csv('train.csv').set_index('MOFname').topology
y = df.drop(['target', 'mofname'], axis=1).to_numpy()
label = df['target'].to_numpy()
error = np.abs(df['target'].to_numpy() - df['predict'].to_numpy())

In [16]:
df = df.join(df_num, on='mofname')
surface_area = df['surface_area [m^2/g]']
df = df.join(df_topo, on='mofname')
topo = df['topology']

In [17]:
metadata_dict = {}
metadata_dict['target'] = label
metadata_dict['error'] = error
metadata_dict['surface area'] = surface_area
metadata_dict['topology'] = topo

In [18]:
feat = list(df_num.columns)[10:]
feat.remove('surface_area [m^2/g]')
feat.remove('CO2_working_capacity [mL/g]')
print(feat)

['volume [A^3]', 'weight [u]', 'density [g/cm^3]', 'void_fraction', 'void_volume [cm^3/g]', 'functional_groups', 'metal_linker', 'organic_linker1', 'organic_linker2', 'catalog CO2/N2', 'CO2/N2_selectivity', 'heat_adsorption_CO2_P0.15bar_T298K [kcal/mol]']


In [19]:
for col in feat:
    metadata_dict[col] = df[col].values

In [20]:
writer = SummaryWriter()
writer.add_embedding(y, metadata=list(zip(*metadata_dict.values())),
              metadata_header=metadata_dict.keys())

In [62]:
import matplotlib.pyplot as plt

In [120]:
for i in metadata_dict:
    print(i)

target
error
surface area
topology
volume [A^3]
weight [u]
density [g/cm^3]
void_fraction
void_volume [cm^3/g]
functional_groups
metal_linker
organic_linker1
organic_linker2
catalog CO2/N2
CO2/N2_selectivity
heat_adsorption_CO2_P0.15bar_T298K [kcal/mol]


In [None]:
%tensorboard --logdir=runs