In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.io as sio
from matplotlib import colors
import matplotlib.pyplot as plt
from toolz.curried import pipe, curry

In [4]:
flist = sorted(glob.glob("poreData/relaxed_bulk_structures_pore_0-1/*_pore.mat"))
print(len(flist))

217


In [5]:
cifs = np.asarray([fname.split("/")[-1].split("zz_")[-1].split("ff_")[0] for fname in flist])[:,None]

In [6]:
%%time
columns = ["cif", "pld", "lcd", "asa", "av", "psd mean", "psd std", "n_paths", "paths mean", "paths std", "xdim", "ydim", "zdim", "n_channels"]
table = np.zeros((len(flist), 13))

paths_list = []
psd_list = []
dim_list = []
area_list = []
h_list = []
for i, fname in enumerate(flist):
    s = sio.loadmat(fname)
    table[i, 0] = s['pld'][0]
    table[i, 1] = s['lcd'][0]
    table[i, 2] = s['asa'][0]
    table[i, 3] = s['av'][0]
    psd_list.append(s['psd'][0])
    table[i, 4] = np.mean(s['psd'][0])
    table[i, 5] = np.std(s['psd'][0])
    if len(s['paths']) is not 0:
        table[i, 6] = len(s['paths'][0])
        paths_list.append(s['paths'][0])
    table[i, 7] = np.mean(s['paths'])
    table[i, 8] = np.std(s['paths'])
    table[i, 9] = s['dim'][0][0]
    table[i, 10] = s['dim'][0][1]
    table[i, 11] = s['dim'][0][2]
    dim_list.append(s['dim'][0][0]*s['dim'][0][1]*s['dim'][0][2])
    area_list.append(s['dim'][0][0]*s['dim'][0][1]+ s['dim'][0][0]*s['dim'][0][2] + s['dim'][0][1]*s['dim'][0][2])
    if len(s['paths']) is not 0:
        table[i, 12] = s['n_paths'][0]
table = np.concatenate([cifs, table], axis=1)
vols = np.asarray(dim_list)
areas = np.asarray(area_list)

CPU times: user 368 ms, sys: 160 ms, total: 528 ms
Wall time: 2.72 s


In [7]:
df = pd.DataFrame(data=table, index=table[:,0])
for col in columns[1:]:
    df[col] = df[col].astype(float)
df.head(5)

KeyError: 'pld'

In [None]:
df.describe()

In [None]:
df.to_csv("iza_rlaxed_bulk.csv", sep=',')

In [None]:
l1 = np.array(df["pld"])
l2 = np.array(df["lcd"])

plt.figure(figsize=(6, 6))
plt.scatter(l1, l2, c="r")
plt.title("LCDs and PLDs for Relaxed Bulk IZA Structures\n")
plt.xlabel("pld $(\mathrm{\AA})$")
plt.ylabel("lcd $(\mathrm{\AA})$")
plt.xlim([-0.2, 18.0])
plt.ylim([-0.2, 18.0])
plt.plot([-0.2, 18.0], [-0.2, 18.0], ls="--", c=".3")
plt.show()

In [None]:
l = np.array(df["asa"])
sns.distplot(l/areas, bins=30, kde=False, color="r")
plt.title("Accessible Surface Area$(\mathrm{\AA}^2)$, probe=0.1$\mathrm{\AA}$")
plt.xlabel("Surface Area")
plt.ylabel("Structure Count")
plt.show()

l = np.array(df["av"])
sns.distplot(l/vols, bins=30, kde=False, color="r")
plt.title("Accessible Volume $(\mathrm{\AA}^3)$, probe=0.1$\mathrm{\AA}$")
plt.xlabel("Volume")
plt.ylabel("Structure Count")
plt.show()

l = np.array(df["pld"])
sns.distplot(l, bins=30, kde=False, color="r")
plt.title("Pore Limiting Diameter $(\mathrm{\AA})$")
plt.xlabel("Pore Limiting Diameter")
plt.ylabel("Structure Count")
plt.show()

l = np.array(df["lcd"])
sns.distplot(l, bins=30, kde=False, color="r")
plt.title("Largest Cavity Diameter $(\mathrm{\AA})$")
plt.xlabel("Largest Cavity Diameter")
plt.ylabel("Structure Count")
plt.show()

l = np.array(df["n_channels"])
sns.distplot(l, bins=30, kde=False, color="r")
plt.title("Z direction unique paths in a $(2\cdot 2\cdot 2)$ super cell, probe=0.1$\mathrm{\AA}$")
plt.xlabel("No. of Channels")
plt.ylabel("Structure Count")
plt.show()

l = np.array(df["paths mean"])
l[np.isnan(l)] = 0
sns.distplot(l, bins=30, kde=False, color="r")
plt.title("Mean Path Length, probe=0.1$\mathrm{\AA}$")
plt.xlabel("Mean Path Length")
plt.ylabel("Structure Count")
plt.show()

l = vols.copy()
l[np.isnan(l)] = 0
sns.distplot(np.log10(l/8), bins=30, kde=False, color="r")
plt.title("Unit Cell Volumes, probe=0.1$\mathrm{\AA}$")
plt.xlabel("log of Unit Cell Volume")
plt.ylabel("Structure Count")
plt.show()

In [None]:
%%time
for i in range(217):
    psd = psd_list[i]
    sns.distplot(psd, bins=30, kde=False)
plt.title("Overlay of accessible Pore Size Distribution for few structures")
plt.xlabel("Pore size in $\mathrm{\AA}$")
plt.ylabel("Pixel Count")
plt.show()

In [None]:
nbins = 100
psd_norm = np.zeros((len(psd_list), nbins))

for indx, psd in enumerate(psd_list):
    psd_norm[indx, :] = (np.histogram(psd, bins=nbins, range=(0,10))[0] / len(psd))

In [None]:
for i in range(217):
    psd = psd_norm[i]
    plt.plot(psd)
plt.show()