In [3]:
import os
import networkx as nx
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree
import pickle
import numpy as np

directory = 'all_graphs_gb_atoms'

if not os.path.exists(directory):
    os.makedirs(directory)
    print("Directory created:", directory)
else:
    print("Directory already exists:", directory)

with open('../gbs_ase.pickle', 'rb') as handle:
    my_ase = pickle.load(handle)

def distance(location1, location2):
    return sum((x - y) ** 2 for x, y in zip(location1, location2)) ** 0.5

# Define radius
radius = 3.74

for i in range(len(my_ase)):
    keep = my_ase[i].arrays["c_csym"].reshape(-1) > .1
    atom_locations = my_ase[i][keep].positions
    
    minx = min(atom_locations[:,0])
    miny = min(atom_locations[:,1])
    minz = min(atom_locations[:,2])
    
    atom_locations[:,0] += abs(minx)
    atom_locations[:,1] += abs(miny)
    atom_locations[:,2] += abs(minz)
    
    # Create a graph
    G = nx.Graph()

    # Add atoms as nodes
    for atom_id, location in enumerate(atom_locations):
        G.add_node(atom_id, location=location)
    # Create KDTree
    tree = cKDTree(atom_locations,boxsize=[np.max(atom_locations[:,0]+.0001),np.max(atom_locations[:,1]+.0001),1e10])
    # Add bonds (edges) between atoms within radius
    for j, location1 in enumerate(atom_locations):
        neighbors = tree.query_ball_point(location1, r=radius)
        for neighbor in neighbors:
            if neighbor != j:  # Ignore self
                location2 = atom_locations[neighbor]
                weight = 1 / distance(location1, location2)
                G.add_edge(j, neighbor, weight=weight)
    with open(f'all_graphs_gb_atoms/{my_ase[i].compute_id}.pkl', 'wb') as f:
        pickle.dump(G, f)               
    print(f"finished graph {my_ase[i].compute_id}")


finished graph struct_S103e_fcc_N119_19_205_Al_M99_200714.3134
finished graph struct_S103e_fcc_N133_203_11_Al_M99_200714.2934
finished graph struct_S103e_fcc_N142_130_107_Al_M99_210622.63
finished graph struct_S103e_fcc_N167_282_99_Al_M99_210624.2015
finished graph struct_S103e_fcc_N176_106_195_Al_M99_210622.2199
finished graph struct_S103e_fcc_N18_60_89_Al_M99_210625.1551
finished graph struct_S103e_fcc_N18_n43_89_Al_M99_200714.782
finished graph struct_S103e_fcc_N199_629_97_Al_M99_210624.1414
finished graph struct_S103e_fcc_N19_29_31_Al_M99_200714.175
finished graph struct_S103e_fcc_N251_253_171_Al_M99_210624.1655


In [5]:
import os
from concurrent.futures import ProcessPoolExecutor

path = 'all_graphs_gb_atoms'
graphs = np.sort(os.listdir(path))
path_graphs = [os.path.join(path, file) for file in graphs]
g_list = []
for path_graph in path_graphs:
    with open(path_graph,'rb') as f:
        data = pickle.load(f)
    g_list.append(data)

In [7]:
from karateclub import Graph2Vec
model = Graph2Vec()
model.fit(g_list)
X = model.get_embedding()
print(np.shape(X))

(10, 128)
