In [115]:
#all imports
#for reading json file
import json
#for dataframes
import pandas as pd
import numpy as np
#for implementing widgets
import ipywidgets as widgets
from IPython.display import clear_output
#for alllowing user to download multiple data CSVs
from hublib.ui import FileUpload, Download
#for making dataframe into a qgrid
import qgrid

In [116]:
def create_dataframe():  
    source_target = []
    all_mofs = []
    LCD = []
    PLD = []
    surface_area = []
    density = []
    formula = []
    space_group = []
    volume_fraction = []
    
    #read in data from JSON file
    with open('/home/corebuffalo/elliotsnitzer/tools/mofmap/bin/data.json') as json_file:
        data = json.load(json_file)
        for mof in data:
            source_target.append([mof['source'],mof['target']])
            all_mofs.append(mof['source'])
            all_mofs.append(mof['target'])
            LCD.append(mof['LCD'][0])
            LCD.append(mof['LCD'][1])
            PLD.append(mof['PLD'][0])
            PLD.append(mof['PLD'][1])
            surface_area.append(mof['area'][0])
            surface_area.append(mof['area'][1])
            density.append(mof['density'][0])
            density.append(mof['density'][1])
            formula.append(mof['formula'][0])
            formula.append(mof['formula'][1])
            space_group.append(mof['space_group'][0])
            space_group.append(mof['space_group'][1])
            volume_fraction.append(mof['vol_frac'][0])
            volume_fraction.append(mof['vol_frac'][1])
    
    #creating dictionary of data to use when creating dataframe
    data = {'CSD Identifier':all_mofs, 'Largest Cavity Diameter':LCD,
            'Pore-Limiting Diameter':PLD, 'Surface Area':surface_area, 
            'Density':density, 'Molecular Formula':formula, 'Space Group':space_group,
            'Volume Fraction':volume_fraction}
    #actually creating dataframe
    #also set index to csd_id and drop duplicate MOFs
    df = pd.DataFrame(data)
    df = df.drop_duplicates()
    df = df.set_index('CSD Identifier')
    df = df.sort_index()
    
    #get correlated molecules for every MOF
    csd_relation = []
    csd_relations = []

    for csd in df.index.tolist():
        csd_relation = []
        for r in source_target:
            if csd == r[0]:
                csd_relation.append(r[1])
        csd_relation = set(csd_relation)
        if len(csd_relation)==0:
            csd_relations.append('None')
        else:
            csd_relations.append(csd_relation)
            
    #get metals from the molecular formulas of each MOF
    #place metals into a list of strings
    metals = {'Zn':'Zinc','Cu':'Copper','Cr':'Chromium','Mn':'Manganese',
              'Ni':'Nickel','V':'Vanadium','Fe':'Iron','U':'Uranium','Co':'Cobalt',
              'Cd':'Cadmium','W':'Tungsten','In':'Indium','Ag':'Silver','Pb':'Lead',
              'Dy':'Dysprosium','Gd':'Gadolinium','Pr':'Praseodymium','Nd':'Neodymium','Eu':'Europium',
              'Tb':'Terbium','Er':'Erbium','Yb':'Ytterbium','Hg':'Mercury',
              'Sr':'Strontium','Au':'Gold','Sm':'Samarium','Al':'Aluminum','Zr':'Zirconium',
              'Ce':'Cerium'}
    metals_list = []
    for form in df['Molecular Formula']:
        metal_present = ''
        for m in metals:
            if m in form:
                metal_present = (metal_present+' '+m)
        metals_list.append(metal_present.strip())
        
    #add rows to the dataframe to complete the data
    df['Correlated Molecules'] = csd_relations
    df['Metals'] = metals_list
    
    #return the completed dataframe
    return df

In [117]:
#check if complete dataset already exists
#if it does, open and read the file into a dataframe
file_name = '/home/corebuffalo/elliotsnitzer/notebooks/complete_mof_data.csv'
try:
    file = open(file_name,'r')
    df = pd.read_csv(file)
    df = df.set_index('CSD Identifier')
    #this is necessary to convert the string of correlated mofs read from the file back into a list
    #without this the mof_correlation_widget will not work
    corrected_correlations = []
    for c in df['Correlated Molecules']:
        listed_cmofs = []
        c_mofs = c.replace('{','').replace('}','').replace('\'','').split(',')
        for c_mof in c_mofs:
            if c_mof == 'None':
                listed_cmofs = 'None'
            else:
                listed_cmofs.append(c_mof.strip())
        corrected_correlations.append(listed_cmofs)
    df['Correlated Molecules'] = corrected_correlations
#if it doesn't just create the dataframe from scratch using the JSON file
except FileNotFoundError:
    df = create_dataframe()

In [118]:
image_file_root = '/home/corebuffalo/elliotsnitzer/tools/mofmap/bin/finger_prints_lo/'

def show_image(img_id):
    image_file = image_file_root+img_id+'_lo.png'
    file = open(image_file, 'rb')
    image = file.read()
    image_widget.value = image
    
def show_info(img_id):
    info_widget.clear_output()
    row_info = df.loc[img_id]
    correlations(row_info)
    display_info = info_formatter(row_info,img_id)
    with info_widget:
        print(display_info)
        
def correlations(row_info):
    correlations = row_info['Correlated Molecules']
    correlated_mofs = list(correlations)
    if len(correlated_mofs[0])==1:
        correlated_mofs = []
    mof_selection_widget.options = correlated_mofs
    
def info_formatter(row_info,img_id):
    formatted_string = ('                 '+'\033[1m'+img_id+'\033[0m'+
                        '\nSurface Area: '+str(row_info['Surface Area'])+
                        '\nPore-Limiting Diameter: '+str(row_info['Pore-Limiting Diameter'])+
                        '\nLargest Cavity Diameter: '+str(row_info['Largest Cavity Diameter'])+
                        '\nDensity: '+str(row_info['Density'])+
                        '\nVolume Fraction: '+str(row_info['Volume Fraction'])+
                        '\nSpace Group: '+str(row_info['Space Group'])+
                        '\nMetals: '+str(row_info['Metals'])+
                        '\nMolecular Formula: '+str(row_info['Molecular Formula']))
    return formatted_string

In [119]:
modified_dataframe = pd.DataFrame({'Surface Area':df['Surface Area'],
                                   'Pore-Limiting Diameter':df['Pore-Limiting Diameter'],
                                   'Metals':df['Metals']})
image_widget = widgets.Image(
    format = 'png',
    width = 400,
    height = 500)

info_widget = widgets.Output(layout = {'border':'1px solid black'})

correlation_label = widgets.Label(value = 'Correlated MOFs:')
mof_selection_widget = widgets.SelectMultiple()
correlation_widget = widgets.HBox([correlation_label,mof_selection_widget])

col_opts = {
    'editable':False,
    'toolTip':'Not Editable'
}
qgrid_widget = qgrid.show_grid(modified_dataframe, grid_options=col_opts)

def handle_row_selection(change):
    if change['name']=='_selected_rows':
        temp_df = qgrid_widget.get_selected_df()
        img_id = temp_df.index[0]
        show_image(img_id)
        show_info(img_id)
        
qgrid_widget.observe(handle_row_selection)


def handle_correlation_selection(change):
    img_id = mof_selection_widget.value
    if len(img_id)!=0:
        show_image(img_id[0])
        show_info(img_id[0])
    
mof_selection_widget.observe(handle_correlation_selection,names = 'value')

info_box = widgets.VBox([info_widget,correlation_widget])
image_plus_info = widgets.HBox([image_widget,info_box])
display(image_plus_info)
qgrid_widget

In [120]:
def current_df():
    df_modified = qgrid_widget.get_changed_df()
    df_modified.to_csv(r'modified_mof_data.csv',sep = ',',mode = 'w')
    
modified_download = Download('modified_mof_data.csv', style = 'info',
                         label='Modified Dataset', icon='arrow-circle-down', 
                         tooltip='DOWNLOAD PARTIAL DATA FILE', cb = current_df)


In [121]:
#allow user to download complete dataset
def complete_df():
    df.to_csv(r'complete_mof_data.csv',sep = ',',mode = 'w')
    
complete_download = Download('complete_mof_data.csv', style = 'success',
                                  label='Complete Dataset', icon='download', 
                                  tooltip='DOWNLOAD COMPLETE DATA FILE', cb = complete_df)


In [124]:
downloads_widget = widgets.HBox([modified_download.w,complete_download.w])
downloads_widget