In [None]:
from data import DATA_3DCD, DATA_MP

In [None]:
%run ./modules.ipynb
%run ./data.py

In [None]:
sns.set(style="white", palette="muted", color_codes=True)
my_c = colors.ListedColormap(["mediumblue", "red"])
c = ["mediumblue", "red"]
sns.set_palette(sns.color_palette(c))

In [None]:
from pathlib import Path

pg_list_order = json.loads(Path("point_groups.json").read_text())["point_groups"]

The geometric.npz file already exists for example data, so we can just import it as follows to create the figures. Otherwise, run the whole notebook. 

3DCD data analysis

In [None]:
# Generates DATA_3DCD.geo
if DATA_3DCD.geo.exists()==False:

    # my_frames = ase.io.read(DATA_3DCD.structures, index="::200")
    my_frames = ase.io.read(DATA_3DCD.structures, index="::1000")  # DEBUG

    packing = np.zeros(len(my_frames))  #packing fraction
    n_species = np.zeros(len(my_frames))  # number of elements composign the structure --> heterogeneity of the structure 
    cn = np.zeros(len(my_frames))  # common neighbours averaged for each structure 
    alpha = np.zeros(len(my_frames))  # alpha parameter (description in paper)
    x = np.zeros(len(my_frames))  # x parameter (description in paper)
    std_ratio = np.zeros(len(my_frames))  # standard deviation of atomic radii
    magic = np.zeros(len(my_frames))  # is the structure magic? 1, yes. 0, no. 
    natoms = np.zeros(len(my_frames))  # number of atoms within a structure. If natoms=multiple_4, structure is magic. 
    red_frame = []  # reduced frame, keeping only biggest atoms 
    pg_list = []  # point groups list 

    for k, i in enumerate(tqdm(my_frames)):
        i.wrap(eps=1e-10)
        natoms[k] = len(i)
        if len(i) % 4 == 0:
            magic[k] = 1
        frame_py = get_pymatgen(i)
        v = i.get_volume()  # unit = A^^3
        el = i.get_chemical_symbols()
        n_species[k] = len(np.unique(el))
        rad = get_r(el)
        indexes = [
            i for i, x in enumerate(rad) if x == max(i for i in rad if i is not None)
        ]
        max_el = np.array(i.get_chemical_symbols())[indexes]
        max_pos = np.array(i.get_positions())[indexes]
        new = Atoms(max_el, positions=max_pos, pbc=True, cell=i.get_cell())
        red_frame.append(new)
        sg_red = ase.spacegroup.get_spacegroup(new, symprec=1e-05)
        sg = ase.spacegroup.get_spacegroup(i, symprec=1e-05)
        sg = sg.no # not sure about number or space group name? needs checking 
        pg = point_group(sg)
        pg_list.append(pg)

        v_atom = sum([(4 / 3) * pi * (rad[i] / 100) ** 3 for i in range(len(rad))])
        nn = np.zeros(len(el))
        for l in range(len(el)):
            try:
                nn[l] = CrystalNN(
                    weighted_cn=False,
                    distance_cutoffs=None,
                    x_diff_weight=0.0,
                    porous_adjustment=False,
                ).get_cn(frame_py, l)
            except:
                pass
        cn[k] = nn.mean()
        rad = np.array(rad)
        n_s = np.count_nonzero(rad == np.min(rad))
        n_l = np.count_nonzero(rad == np.max(rad))
        alpha[k] = np.min(rad) / np.max(rad)
        x[k] = n_s / (n_s + n_l)
        std_ratio[k] = np.std(rad)
        packing[k] = v_atom / v

    file = np.savez(
        "../r4data/3DCD/geo.npz",
        magic=magic,
        cn=cn,
        n_species=n_species,
        natoms=natoms,
        packing=packing,
        alpha=alpha,
        x=x,
        std_ratio=std_ratio,
        pg_list=pg_list,
    )
else:  # generate figure directly 

    npzfile = np.load(DATA_3DCD.geo, allow_pickle=True)

    # n_species = npzfile["n_species"]  # not sure if needed 
    
    dict_ = {
        r"$\alpha$": npzfile["alpha"],
        r"$x$": npzfile["x"],
        r"$\sigma_{radii}$": npzfile["std_ratio"],
        r"$N_{atoms,cell}$": npzfile["natoms"].ravel(),
        r"$PF$": npzfile["packing"],
        "magic": npzfile["magic"].ravel(),
        r"$N_{species}$": npzfile["n_species"],
        "point_group": npzfile["pg_list"],
    }
    df = pd.DataFrame(dict_)

#   First set of histograms generation

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))

    sns.histplot(
        ax=ax1,
        data=df,
        x=r"$N_{species}$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        bins=int(max(n_species)),
        kde=True,
    )

    sns.histplot(
        ax=ax2,
        data=df,
        x=r"$x$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        kde=True,
    )
    ax2.set_ylabel(None)
    ax2.set_xlim(0, 1)

    sns.histplot(
        ax=ax3,
        data=df,
        x=r"$\alpha$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        bins=20,
        kde=True,
    )
    ax3.set_xlim(0, 1)
    ax3.set_ylabel(None)

    sns.histplot(
        ax=ax4,
        data=df,
        x=r"$PF$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        kde=True,
    )
    ax4.set_ylabel(None)
    ax4.set_xlim(0, 1)

    sns.despine(left=True, bottom=True)
    plt.show()
    plt.close()

#   Second horizontal histogram: inherited symmetries  


    df["point_group"] = pg_list

# below not needed anymore 

    # sg_list = []
    # pg_list = []
    # for j in range(len(my_frames)):
    #     frame = my_frames[j]
    #     sg = ase.spacegroup.get_spacegroup(frame, symprec=1e-05)
    #     sg = sg.no
    #     sg_list.append(sg)
    #     sp_group = point_group(sg)
    #     pg_list.append(pg)


    m = df.loc[df["magic"] == 1]
    nm = df.loc[df["magic"] == 0]

    inh_symm_m = inh_symm(m["point_group"])
    inh_symm_whole = inh_symm(df["point_group"])
    inh_symm_nm = inh_symm(nm["point_group"])

    inh_symm_m = inh_symm_m.reindex(pg_list_order)
    inh_symm_nm = inh_symm_nm.reindex(pg_list_order)
    inh_symm_whole = inh_symm_whole.reindex(pg_list_order)

    inh_symm_m = inh_symm_m / inh_symm_m.loc["1"]
    inh_symm_nm = inh_symm_nm / inh_symm_nm.loc["1"]

    inh_symm_m.columns = ["magic"]
    inh_symm_m["non-magic"] = inh_symm_nm["point_group"]

    fig = inh_symm_m.plot.barh(
        figsize=(8, 9), color={"magic": "red", "non-magic": "mediumblue"}
    )
    plt.ylabel("point group")
    plt.xlabel("proportion of inherited symmetries")
    sns.despine(left=True, bottom=True)

MP data analysis 

In [None]:
# Generates DATA_3DCD.geo
if DATA_MP.geo.exists()==False:

    # my_frames = ase.io.read(DATA_3DCD.structures, index="::200")
    my_frames = ase.io.read(DATA_MP.structures, index="::1000")  # DEBUG

    packing = np.zeros(len(my_frames))  #packing fraction
    n_species = np.zeros(len(my_frames))  # number of elements composign the structure --> heterogeneity of the structure 
    cn = np.zeros(len(my_frames))  # common neighbours averaged for each structure 
    alpha = np.zeros(len(my_frames))  # alpha parameter (description in paper)
    x = np.zeros(len(my_frames))  # x parameter (description in paper)
    std_ratio = np.zeros(len(my_frames))  # standard deviation of atomic radii
    magic = np.zeros(len(my_frames))  # is the structure magic? 1, yes. 0, no. 
    natoms = np.zeros(len(my_frames))  # number of atoms within a structure. If natoms=multiple_4, structure is magic. 
    red_frame = []  # reduced frame, keeping only biggest atoms 
    pg_list = []  # point groups list 

    for k, i in enumerate(tqdm(my_frames)):
        i.wrap(eps=1e-10)
        natoms[k] = len(i)
        if len(i) % 4 == 0:
            magic[k] = 1
        frame_py = get_pymatgen(i)
        v = i.get_volume()  # unit = A^^3
        el = i.get_chemical_symbols()
        n_species[k] = len(np.unique(el))
        rad = get_r(el)
        indexes = [
            i for i, x in enumerate(rad) if x == max(i for i in rad if i is not None)
        ]
        max_el = np.array(i.get_chemical_symbols())[indexes]
        max_pos = np.array(i.get_positions())[indexes]
        new = Atoms(max_el, positions=max_pos, pbc=True, cell=i.get_cell())
        red_frame.append(new)
        sg_red = ase.spacegroup.get_spacegroup(new, symprec=1e-05)
        sg = ase.spacegroup.get_spacegroup(i, symprec=1e-05)
        sg = sg.no # not sure about number or space group name? needs checking 
        pg = point_group(sg)
        pg_list.append(pg)

        v_atom = sum([(4 / 3) * pi * (rad[i] / 100) ** 3 for i in range(len(rad))])
        nn = np.zeros(len(el))
        for l in range(len(el)):
            try:
                nn[l] = CrystalNN(
                    weighted_cn=False,
                    distance_cutoffs=None,
                    x_diff_weight=0.0,
                    porous_adjustment=False,
                ).get_cn(frame_py, l)
            except:
                pass
        cn[k] = nn.mean()
        rad = np.array(rad)
        n_s = np.count_nonzero(rad == np.min(rad))
        n_l = np.count_nonzero(rad == np.max(rad))
        alpha[k] = np.min(rad) / np.max(rad)
        x[k] = n_s / (n_s + n_l)
        std_ratio[k] = np.std(rad)
        packing[k] = v_atom / v

    file = np.savez(
        "../r4data/MP/geo.npz",
        magic=magic,
        cn=cn,
        n_species=n_species,
        natoms=natoms,
        packing=packing,
        alpha=alpha,
        x=x,
        std_ratio=std_ratio,
        pg_list=pg_list,
    )
else:  # generate figure directly 

    npzfile = np.load(DATA_MP.geo, allow_pickle=True)

    # n_species = npzfile["n_species"]  # not sure if needed 
    
    dict_ = {
        r"$\alpha$": npzfile["alpha"],
        r"$x$": npzfile["x"],
        r"$\sigma_{radii}$": npzfile["std_ratio"],
        r"$N_{atoms,cell}$": npzfile["natoms"].ravel(),
        r"$PF$": npzfile["packing"],
        "magic": npzfile["magic"].ravel(),
        r"$N_{species}$": npzfile["n_species"],
        "point_group": npzfile["pg_list"],
    }
    df = pd.DataFrame(dict_)

#   First set of histograms generation

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))

    sns.histplot(
        ax=ax1,
        data=df,
        x=r"$N_{species}$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        bins=int(max(n_species)),
        kde=True,
    )

    sns.histplot(
        ax=ax2,
        data=df,
        x=r"$x$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        kde=True,
    )
    ax2.set_ylabel(None)
    ax2.set_xlim(0, 1)

    sns.histplot(
        ax=ax3,
        data=df,
        x=r"$\alpha$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        bins=20,
        kde=True,
    )
    ax3.set_xlim(0, 1)
    ax3.set_ylabel(None)

    sns.histplot(
        ax=ax4,
        data=df,
        x=r"$PF$",
        hue="magic",
        stat="probability",
        legend=False,
        common_norm=False,
        multiple="dodge",
        shrink=0.8,
        kde=True,
    )
    ax4.set_ylabel(None)
    ax4.set_xlim(0, 1)

    sns.despine(left=True, bottom=True)
    plt.show()
    plt.close()

#   Second horizontal histogram: inherited symmetries  


    df["point_group"] = pg_list

# below not needed anymore 

    # sg_list = []
    # pg_list = []
    # for j in range(len(my_frames)):
    #     frame = my_frames[j]
    #     sg = ase.spacegroup.get_spacegroup(frame, symprec=1e-05)
    #     sg = sg.no
    #     sg_list.append(sg)
    #     sp_group = point_group(sg)
    #     pg_list.append(pg)


    m = df.loc[df["magic"] == 1]
    nm = df.loc[df["magic"] == 0]

    inh_symm_m = inh_symm(m["point_group"])
    inh_symm_whole = inh_symm(df["point_group"])
    inh_symm_nm = inh_symm(nm["point_group"])

    inh_symm_m = inh_symm_m.reindex(pg_list_order)
    inh_symm_nm = inh_symm_nm.reindex(pg_list_order)
    inh_symm_whole = inh_symm_whole.reindex(pg_list_order)

    inh_symm_m = inh_symm_m / inh_symm_m.loc["1"]
    inh_symm_nm = inh_symm_nm / inh_symm_nm.loc["1"]

    inh_symm_m.columns = ["magic"]
    inh_symm_m["non-magic"] = inh_symm_nm["point_group"]

    fig = inh_symm_m.plot.barh(
        figsize=(8, 9), color={"magic": "red", "non-magic": "mediumblue"}
    )
    plt.ylabel("point group")
    plt.xlabel("proportion of inherited symmetries")
    sns.despine(left=True, bottom=True)

In [None]:
# # Generates
# if Path("../r4data").exists():

#     my_frames = ase.io.read(DATA_MP.structures, index="::200")

#     packing = np.zeros(len(my_frames))
#     n_species = np.zeros(len(my_frames))
#     cn = np.zeros(len(my_frames))
#     alpha = np.zeros(len(my_frames))
#     x = np.zeros(len(my_frames))
#     std_ratio = np.zeros(len(my_frames))
#     magic = np.zeros(len(my_frames))
#     natoms = np.zeros(len(my_frames))
#     pg_list=[]
#     red_frame = []

#     for i, k in zip(my_frames, range(len(my_frames))):
#         i.wrap(eps=1e-10)
#         natoms[k] = len(i)
#         if len(i) % 4 == 0:
#             magic[k] = 1
#         frame_py = get_pymatgen(i)
#         v = i.get_volume()  # unit = A^^3
#         el = i.get_chemical_symbols()
#         n_species[k] = len(np.unique(el))
#         rad = get_r(el)
#         indexes = [
#             i for i, x in enumerate(rad) if x == max(i for i in rad if i is not None)
#         ]
#         max_el = np.array(i.get_chemical_symbols())[indexes]
#         max_pos = np.array(i.get_positions())[indexes]
#         new = Atoms(max_el, positions=max_pos, pbc=True, cell=i.get_cell())
#         red_frame.append(new)
#         sg_red = ase.spacegroup.get_spacegroup(new, symprec=1e-05)
#         sg = ase.spacegroup.get_spacegroup(i, symprec=1e-05)
#         sg = sg.no # not sure about this line, do I need number or space group name?
#         pg = point_group(sg)
#         pg_list.append(pg)
            
#         v_atom = sum([(4 / 3) * pi * (rad[i] / 100) ** 3 for i in range(len(rad))])
#         nn = np.zeros(len(el))
#         for l in range(len(el)):
#             try:
#                 nn[l] = CrystalNN(
#                     weighted_cn=False,
#                     distance_cutoffs=None,
#                     x_diff_weight=0.0,
#                     porous_adjustment=False,
#                 ).get_cn(frame_py, l)
#             except:
#                 pass
#         cn[k] = nn.mean()
#         rad = np.array(rad)
#         n_s = np.count_nonzero(rad == np.min(rad))
#         n_l = np.count_nonzero(rad == np.max(rad))
#         alpha[k] = np.min(rad) / np.max(rad)
#         x[k] = n_s / (n_s + n_l)
#         std_ratio[k] = np.std(rad)
#         packing[k] = v_atom / v
#     file = np.savez(
#     "../r4data/3DCD/geo.npz",
#     magic=magic,
#     cn=cn,
#     n_species=n_species,
#     natoms=natoms,
#     packing=packing,
#     alpha=alpha,
#     x=x,
#     std_ratio=std_ratio,
#     pg_list=pg_list,
# )


#         dict = {
#             r"$\alpha$": alpha,
#             r"$x$": x,
#             r"$\sigma_{radii}$": std_ratio,
#             r"$CN_{avg}$": cn,
#             r"$N_{atoms,cell}$": natoms.ravel(),
#             r"$PF$": packing,
#             "magic": magic.ravel(),
#             r"$N_{species}$": n_species,
#             "FCC$_{tot}$": fcc,
#             "FCC$_{red}$": fcc_red,
#             "HCP$_{tot}$": hcp,
#             "HCP$_{red}$": hcp_red,
#         }
#         df = pd.DataFrame(dict)

#         print(
#             "total structure: between HCP, "
#             + str(
#                 len(df.loc[((df["magic"] == 1) & (df[r"HCP$_{tot}$"] == 1))])
#                 / len(df.loc[((df[r"HCP$_{tot}$"] == 1))])
#             )
#             + " percent are magic, reduced structure: between HCP, "
#             + str(
#                 len(df.loc[((df["magic"] == 1) & (df[r"HCP$_{red}$"] == 1))])
#                 / len(df.loc[((df[r"HCP$_{red}$"] == 1))])
#             )
#             + " percent are magic",
#             "total structure: between FCC, "
#             + str(
#                 len(df.loc[((df["magic"] == 1) & (df[r"FCC$_{tot}$"] == 1))])
#                 / len(df.loc[((df[r"FCC$_{tot}$"] == 1))])
#             )
#             + " percent are magic, reduced structure: between FCC, "
#             + str(
#                 len(df.loc[((df["magic"] == 1) & (df[r"FCC$_{red}$"] == 1))])
#                 / len(df.loc[((df[r"FCC$_{red}$"] == 1))])
#             )
#             + " percent are magic",
#         )

#         if not DATA_MP.geo.exists():
#             file = np.savez(
#                 "../r4data/MP/geo.npz",
#                 magic=magic,
#                 cn=cn,
#                 n_species=n_species,
#                 natoms=natoms,
#                 fcc=fcc,
#                 hcp=hcp,
#                 packing=packing,
#                 alpha=alpha,
#                 x=x,
#                 std_ratio=std_ratio,
#             )

#         %run ./data.ipynb
#         npzfile = np.load(DATA_MP.geo, allow_pickle=True)

#         n_species = npzfile["n_species"]
#         dict = {
#             r"$\alpha$": npzfile["alpha"],
#             r"$x$": npzfile["x"],
#             r"$\sigma_{radii}$": npzfile["std_ratio"],
#             r"$N_{atoms,cell}$": npzfile["natoms"].ravel(),
#             r"$PF$": npzfile["packing"],
#             "magic": npzfile["magic"].ravel(),
#             r"$N_{species}$": npzfile["n_species"],
#         }
#         df = pd.DataFrame(dict)

#         fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))

#         sns.histplot(
#             ax=ax1,
#             data=df,
#             x=r"$N_{species}$",
#             hue="magic",
#             stat="probability",
#             legend=False,
#             common_norm=False,
#             multiple="dodge",
#             shrink=0.8,
#             bins=int(max(n_species)),
#             kde=True,
#         )

#         sns.histplot(
#             ax=ax2,
#             data=df,
#             x=r"$x$",
#             hue="magic",
#             stat="probability",
#             legend=False,
#             common_norm=False,
#             multiple="dodge",
#             shrink=0.8,
#             kde=True,
#         )
#         ax2.set_ylabel(None)
#         ax2.set_xlim(0, 1)

#         sns.histplot(
#             ax=ax3,
#             data=df,
#             x=r"$\alpha$",
#             hue="magic",
#             stat="probability",
#             legend=False,
#             common_norm=False,
#             multiple="dodge",
#             shrink=0.8,
#             bins=20,
#             kde=True,
#         )
#         ax3.set_xlim(0, 1)
#         ax3.set_ylabel(None)

#         sns.histplot(
#             ax=ax4,
#             data=df,
#             x=r"$PF$",
#             hue="magic",
#             stat="probability",
#             legend=False,
#             common_norm=False,
#             multiple="dodge",
#             shrink=0.8,
#             kde=True,
#         )
#         ax4.set_ylabel(None)
#         ax4.set_xlim(0, 1)

#         sns.despine(left=True, bottom=True)

#         sg_list = []
#         pg_list = []
#         for j in range(len(my_frames)):
#             frame = my_frames[j]
#             sg = ase.spacegroup.get_spacegroup(frame, symprec=1e-05)
#             sg = sg.no
#             sg_list.append(sg)
#             sp_group = point_group(sg)
#             pg_list.append(pg)

#         df["point_group"] = pg_list
#         m = df.loc[df["magic"] == 1]
#         nm = df.loc[df["magic"] == 0]

#         inh_symm_m = inh_symm(m["point_group"])
#         inh_symm_whole = inh_symm(df["point_group"])
#         inh_symm_nm = inh_symm(nm["point_group"])

#         inh_symm_m = inh_symm_m.reindex(pg_list_order)
#         inh_symm_nm = inh_symm_nm.reindex(pg_list_order)
#         inh_symm_whole = inh_symm_whole.reindex(pg_list_order)

#         inh_symm_m = inh_symm_m / inh_symm_m.loc["1"]
#         inh_symm_nm = inh_symm_nm / inh_symm_nm.loc["1"]

#         inh_symm_m.columns = ["magic"]
#         inh_symm_m["non-magic"] = inh_symm_nm["point_group"]

#         fig = inh_symm_m.plot.barh(
#             figsize=(8, 9), color={"magic": "red", "non-magic": "mediumblue"}
#         )
#         plt.ylabel("point group")
#         plt.xlabel("proportion of inherited symmetries")
#         sns.despine(left=True, bottom=True)