In [18]:
import numpy as np
import pandas as pd
import pickle

In [2]:
core_df = pd.read_csv('CoRE-MOF Data/CoRE2019_alldata.csv')
qmof_df = pd.read_csv('QMOF Data/qmof.csv')

  qmof_df = pd.read_csv('QMOF Data/qmof.csv')


In [3]:
CSD_to_QMOF = {}

for i in qmof_df['name']:
    csd_refCode = i.split('_')[0]
    CSD_to_QMOF[csd_refCode] = i

CSD_to_CORE = {}

for i in core_df['MOFname']:
    csd_refCode = i.split('_')[0]
    CSD_to_CORE[csd_refCode] = i

In [4]:
len(CSD_to_CORE), len(CSD_to_QMOF)

(9326, 16049)

In [19]:
with open('CSD code map/CSD_to_CORE.pickle', 'wb') as handle:
    pickle.dump(CSD_to_CORE, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('CSD code map/CSD_to_QMOF.pickle', 'wb') as handle:
    pickle.dump(CSD_to_QMOF, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [20]:
#3 sets: intersection, CoRE-MOF only, QMOF only
CoRE_QMOF_int = list(set(CSD_to_CORE.keys()).intersection(set(CSD_to_QMOF.keys())))
CoRE_only = list(set(CSD_to_CORE.keys()).difference(set(CSD_to_QMOF.keys())))
QMOF_only = list(set(CSD_to_QMOF.keys()).difference(set(CSD_to_CORE.keys())))

len(CoRE_QMOF_int), len(CoRE_only), len(QMOF_only)

(1164, 8162, 14885)

In [21]:
with open('CSD code map/sets/CoRE_QMOF_intersection.pickle', 'wb') as handle:
    pickle.dump(CoRE_QMOF_int, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('CSD code map/sets/CoRE_only.pickle', 'wb') as handle:
    pickle.dump(CoRE_only, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('CSD code map/sets/QMOF_only.pickle', 'wb') as handle:
    pickle.dump(QMOF_only, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [6]:
core_properties = ['Di', 'total_SA_volumetric', 'pure_uptake_CO2_298.00_15000', 'pure_uptake_CO2_298.00_1600000', 'pure_uptake_methane_298.00_6500000', 'logKH_CO2']
qmof_properties = ['outputs.pbe.bandgap']

In [7]:
property_dict = {'CSD code' : [],
                 'Largest included sphere diameter (A)' : [],
                 'Total volumetric surface area (m^2/m^3)' : [],
                 'CO2 uptake at LP (mol/kg)' : [],
                 'CO2 uptake at HP (mol/kg)' : [],
                 'CH4 uptake at HP (mol/kg)' : [],
                 'logKH_CO2' : [],
                 'Band gap (eV)' : []
                 }

In [8]:
#case 1: in both CoRE and QMOF
for code in CoRE_QMOF_int:
    core_id= CSD_to_CORE[code]
    qmof_id = CSD_to_QMOF[code]

    #properties
    Di = core_df[core_df['MOFname'] == core_id]['Di'].values[0]
    SA = core_df[core_df['MOFname'] == core_id]['total_SA_volumetric'].values[0]
    uptake_co2_LP = core_df[core_df['MOFname'] == core_id]['pure_uptake_CO2_298.00_15000'].values[0]
    uptake_co2_HP = core_df[core_df['MOFname'] == core_id]['pure_uptake_CO2_298.00_1600000'].values[0]
    uptake_ch4_HP = core_df[core_df['MOFname'] == core_id]['pure_uptake_methane_298.00_6500000'].values[0]
    logKH_CO2 = core_df[core_df['MOFname'] == core_id]['logKH_CO2'].values[0]
    bandGap = qmof_df[qmof_df['name'] == qmof_id]['outputs.pbe.bandgap'].values[0]

    #append to dict
    property_dict['CSD code'].append(code)
    property_dict['Largest included sphere diameter (A)'].append(Di)
    property_dict['Total volumetric surface area (m^2/m^3)'].append(SA)
    property_dict['CO2 uptake at LP (mol/kg)'].append(uptake_co2_LP)
    property_dict['CO2 uptake at HP (mol/kg)'].append(uptake_co2_HP)
    property_dict['CH4 uptake at HP (mol/kg)'].append(uptake_ch4_HP)
    property_dict['logKH_CO2'].append(logKH_CO2)
    property_dict['Band gap (eV)'].append(bandGap)

In [9]:
#case 2: in CoRE-MOF only
for code in CoRE_only:
    core_id= CSD_to_CORE[code]

    #properties
    Di = core_df[core_df['MOFname'] == core_id]['Di'].values[0]
    SA = core_df[core_df['MOFname'] == core_id]['total_SA_volumetric'].values[0]
    uptake_co2_LP = core_df[core_df['MOFname'] == core_id]['pure_uptake_CO2_298.00_15000'].values[0]
    uptake_co2_HP = core_df[core_df['MOFname'] == core_id]['pure_uptake_CO2_298.00_1600000'].values[0]
    uptake_ch4_HP = core_df[core_df['MOFname'] == core_id]['pure_uptake_methane_298.00_6500000'].values[0]
    logKH_CO2 = core_df[core_df['MOFname'] == core_id]['logKH_CO2'].values[0]
    bandGap = np.nan #no band gap label in CoRE-MOF

    #append to dict
    property_dict['CSD code'].append(code)
    property_dict['Largest included sphere diameter (A)'].append(Di)
    property_dict['Total volumetric surface area (m^2/m^3)'].append(SA)
    property_dict['CO2 uptake at LP (mol/kg)'].append(uptake_co2_LP)
    property_dict['CO2 uptake at HP (mol/kg)'].append(uptake_co2_HP)
    property_dict['CH4 uptake at HP (mol/kg)'].append(uptake_ch4_HP)
    property_dict['logKH_CO2'].append(logKH_CO2)
    property_dict['Band gap (eV)'].append(bandGap)

In [13]:
#case 3: in QMOF only
for code in QMOF_only:
    qmof_id= CSD_to_QMOF[code]

    #properties
    Di = qmof_df[qmof_df['name'] == qmof_id]['info.lcd'].values[0]
    SA = np.nan
    uptake_co2_LP = np.nan
    uptake_co2_HP = np.nan
    uptake_ch4_HP = np.nan
    logKH_CO2 = np.nan
    bandGap = qmof_df[qmof_df['name'] == qmof_id]['outputs.pbe.bandgap'].values[0]

    #append to dict
    property_dict['CSD code'].append(code)
    property_dict['Largest included sphere diameter (A)'].append(Di)
    property_dict['Total volumetric surface area (m^2/m^3)'].append(SA)
    property_dict['CO2 uptake at LP (mol/kg)'].append(uptake_co2_LP)
    property_dict['CO2 uptake at HP (mol/kg)'].append(uptake_co2_HP)
    property_dict['CH4 uptake at HP (mol/kg)'].append(uptake_ch4_HP)
    property_dict['logKH_CO2'].append(logKH_CO2)
    property_dict['Band gap (eV)'].append(bandGap)

In [14]:
propertyDF = pd.DataFrame(property_dict)
propertyDF

Unnamed: 0,CSD code,Largest included sphere diameter (A),Total volumetric surface area (m^2/m^3),CO2 uptake at LP (mol/kg),CO2 uptake at HP (mol/kg),CH4 uptake at HP (mol/kg),logKH_CO2,Band gap (eV)
0,QONLAI,6.10211,1909.4500,0.624173,10.315793,8.403936,-4.376034,0.485636
1,BUKYAJ,5.10209,752.8280,2.442599,6.769549,6.642334,-3.589223,3.866396
2,QAGDIO,12.81046,1833.5900,0.440013,16.889565,13.095273,-4.533132,0.340761
3,XUYDIF,3.88622,7.3351,0.363202,1.301727,1.374730,-4.342234,1.785168
4,YEZFIU,7.76134,1275.6500,0.318448,6.437679,5.797507,-4.685284,4.751611
...,...,...,...,...,...,...,...,...
24206,QAZKIN,4.20402,,,,,,3.292749
24207,WEZXAB01,2.28741,,,,,,4.832323
24208,XANKED,1.94585,,,,,,0.418669
24209,AKINIS,2.42584,,,,,,2.156904


In [15]:
propertyDF.to_csv('CoRE_or_QMOF.csv', index = False)

In [16]:
qmof_df['info.doi']

0        https://doi.org/10.1016/j.molstruc.2004.03.051
1                     https://doi.org/10.1021/ja048624i
2                      https://doi.org/10.1039/b404485a
3                      https://doi.org/10.1039/b404485a
4                https://doi.org/10.1002/slct.201600331
                              ...                      
20370           https://doi.org/10.1021/acs.cgd.7b00848
20371           https://doi.org/10.1021/acs.cgd.7b00848
20372           https://doi.org/10.1021/acs.cgd.7b00848
20373           https://doi.org/10.1021/acs.cgd.7b00848
20374           https://doi.org/10.1021/acs.cgd.7b00848
Name: info.doi, Length: 20375, dtype: object