In [2]:
from typing import Dict

import numpy as np

In [63]:
class Embedding(object):
    def __init__(self, embedding_path: str, dimensions: int, index_path: str = None):
        self.dimensions = dimensions
        self.embeddings = self.load_embeddings(embedding_path)
        self.index: Dict[str, int] = {}
        if index_path:
            self.load_index(index_path)

    def load_embeddings(self, file_name: str) -> np.ndarray:
        print("Loading embeddings...")
        embeddings = np.fromfile(file_name, dtype=np.float32)
        length = embeddings.shape[0]
        assert length % self.dimensions == 0, f"The number of floats ({length}) in the embeddings is not divisible by" \
                                              f"the number of dimensions ({self.dimensions})!"
        embedding_shape = [int(length / self.dimensions), self.dimensions]
        self.embedding_shape = embedding_shape
        embeddings = embeddings.reshape(embedding_shape)
        print(f"Done loading embeddings (shape: {embeddings.shape}).")
        return embeddings

    def load_index(self, index_path: str) -> None:
        print("Loading uri index...")
        with open(index_path, "r") as file:
            for line in [line.strip() for line in file.readlines()]:
                index, uri = line.split(",", 1)
                self.index[uri] = int(index)
        print(f"Done loading {len(self.index)} items.")

    def __getitem__(self, item) -> np.ndarray:
        if self.index and isinstance(item, str):
            return self.embeddings[self.index[item]]
        return self.embeddings[item]
    
    def save_emb(self, path):
        for i in range(self.embedding_shape[0]):
            if i == 0:
                emb = self.embeddings[0]
            else:
                emb = np.vstack((emb,self.embeddings[i]))
        np.save(path, emb)

In [64]:
embedding_file = "/auto/grad_space/zihend1/Share2ZHD/brg/brg.bin"

In [65]:
embeddings = Embedding(embedding_file, 32)

Loading embeddings...
Done loading embeddings (shape: (2329, 32)).


In [66]:
embeddings

<__main__.Embedding at 0x7f1b307e2c50>

In [67]:
embeddings.save_emb("/auto/grad_space/zihend1/Share2ZHD/brg/Embedding/corr_0_1_VERSE_32.npy")
    

In [42]:
embeddings[0]

array([ 0.02037145,  0.13432764,  0.01309271,  0.39039606, -0.76974106,
        1.0238374 , -0.58959   ,  0.41066727,  0.01498546,  0.7730111 ,
       -0.2811818 ,  0.99664915, -0.11794065, -0.5930427 ,  0.6803926 ,
        0.7091467 ,  0.33965772, -0.29503277,  0.13692263, -0.17506674,
        0.26678145, -0.14825505,  0.8479237 ,  0.762377  , -0.63523185,
        0.79347587,  0.5205625 ,  0.3945681 ,  0.17571825,  0.3311888 ,
        0.22888757, -0.29438573], dtype=float32)

In [9]:
import networkx as nx

In [28]:
# data = "hfc"
data = "brg"

In [29]:
# path = "/auto/grad_space/zihend1/"+data+"/Matrix/corr_0_1.npy"
path = "/auto/grad_space/zihend1/Share2ZHD/"+data+"/Matrix/corr_0_1.npy"

In [30]:
corr = np.load(path)

In [31]:
corr.shape

(2329, 2329)

In [32]:
np.where(corr==0)

(array([   0,    0,    0, ..., 2328, 2328, 2328]),
 array([  48,   59,   66, ..., 2283, 2284, 2285]))

In [33]:
dt = [("weight", float)]
A = np.array(corr, dtype=dt)
G = nx.from_numpy_matrix(A)

In [34]:
G[0][1]

{'weight': 0.9715059084824061}

In [35]:
nx.write_weighted_edgelist(G, data+".weighted.edgelist")

In [25]:
# with open('weighted_edgelist.txt', 'w') as f:
#     for i in range(corr.shape[0]):
#         for j in range(corr.shape[1]):
#             if corr[i][j]>0:
#                 f.write(str(i)+" "+str(j)+" "+str(corr[i][j])+"\n")

In [6]:
a=np.load("/auto/grad_space/zihend1/brg/Embedding/corr_0_1_VERSE_32_temp.npy")

In [7]:
a.shape

(10, 32)