In [None]:
import torch
import numpy as np
import tqdm
import matplotlib.pyplot as plt
import sklearn
import scipy
import networkx as nx

# 1. Model definition

In [None]:
class AutoEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.encoder = torch.nn.Linear(input_dim, hidden_dim)
        self.decoder = torch.nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        encoded = torch.sigmoid(self.encoder(x))
        decoded = torch.sigmoid(self.decoder(encoded))
        return decoded

In [None]:
class GraphEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dims):
        super().__init__()
        self.autoencoders = torch.nn.ModuleList()
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            self.autoencoders.append(AutoEncoder(prev_dim, hidden_dim))
            prev_dim = hidden_dim

    def forward(self, X, train_mode, **kwargs):
        if train_mode == 'layerwise':
            layer_number = kwargs.get('layer_number', None)
            if layer_number is None or layer_number < 0 or layer_number >= len(self.autoencoders):
                raise ValueError("Invalid layer number for layerwise training")
            encoded = torch.sigmoid(self.autoencoders[layer_number].encoder(X))
            decoded = torch.sigmoid(self.autoencoders[layer_number].decoder(encoded))
            return encoded, decoded
            
        elif train_mode == 'endtoend':
            for autoencoder in self.autoencoders:
                X = torch.sigmoid(autoencoder.encoder(X))
            encoded = X
            for autoencoder in reversed(self.autoencoders):
                X = torch.sigmoid(autoencoder.decoder(X))
            decoded = X
            return encoded, decoded
    
    def train(self,
              X,
              compile,
              train_mode,
              iters,
              optimizer,
              rho=0.01,
              beta=1.0,
              batch_size=None):
        if batch_size is None:
            batch_size = X.shape[0] - 1

        if compile=="True":
            train_model = torch.compile(self)
        else:
            train_model = self
        
        if train_mode == 'layerwise':
            for layer_number in range(len(self.autoencoders)):
                for _ in tqdm.tqdm(range(iters), desc=f"Training layer {layer_number}"):
                    batch_idx = torch.randint(0, X.shape[0] - batch_size + 1, (1,)).item()
                    X_batch = X[batch_idx : batch_idx + batch_size]
                    optimizer.zero_grad()
                    encoded, decoded = train_model(X_batch, train_mode='layerwise', layer_number=layer_number)
                    loss_1 = torch.nn.functional.mse_loss(decoded, X_batch, reduction='sum')
                    rho_hat = torch.mean(encoded, dim=0)
                    loss_2 = torch.sum(rho * torch.log(rho / rho_hat) + (1 - rho) * torch.log((1 - rho) / (1 - rho_hat)))
                    loss = loss_1 + beta * loss_2
                    loss.backward()
                    optimizer.step()
                X = torch.sigmoid(self.autoencoders[layer_number].encoder(X)).detach()
        elif train_mode == 'endtoend':
            for _ in tqdm.tqdm(range(iters)):
                batch_idx = torch.randint(0, X.shape[0] - batch_size + 1, (1,)).item()
                X_batch = X[batch_idx : batch_idx + batch_size]
                optimizer.zero_grad()
                encoded, decoded = train_model(X_batch, train_mode='endtoend')
                loss_1 = torch.nn.functional.mse_loss(decoded, X_batch, reduction='sum')
                rho_hat = torch.mean(encoded, dim=0)
                loss_2 = torch.sum(rho * torch.log(rho / rho_hat) + (1 - rho) * torch.log((1 - rho) / (1 - rho_hat)))
                loss = loss_1 + beta * loss_2
                loss.backward()
                optimizer.step()
    
    @torch.no_grad()
    def encode(self, X):
        for autoencoder in self.autoencoders:
            X = torch.sigmoid(autoencoder.encoder(X))
        return X
    
    @torch.no_grad()
    def encode_decode(self, X):
        for autoencoder in self.autoencoders:
            X = torch.sigmoid(autoencoder.encoder(X))
        for autoencoder in reversed(self.autoencoders):
            X = torch.sigmoid(autoencoder.decoder(X))
        return X

## 1.1. Kickstart test

In [None]:
X, y = sklearn.datasets.load_digits(return_X_y=True, as_frame=False)
X = X / 255.0

In [None]:
torch.manual_seed(0)
np.random.seed(0)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NB_EPOCHS = 5000
BATCH_SIZE = 256
X = torch.tensor(X, dtype=torch.float32).to(DEVICE)

graph_encoder = GraphEncoder(input_dim=X.shape[1], hidden_dims=[32, 16, 8]).to(DEVICE)
optimizer = torch.optim.Adam(graph_encoder.parameters(), lr=0.01)
nb_iters = NB_EPOCHS * (X.shape[0] // BATCH_SIZE)
graph_encoder.train(X, compile="compile", train_mode="endtoend", iters=nb_iters, optimizer=optimizer, rho=0.01, beta=1.0, batch_size=BATCH_SIZE)

In [None]:
Xh = graph_encoder.encode_decode(X).to('cpu')

In [None]:
fig, axes = plt.subplots(2, 10, figsize=(15, 4))
for column in range(10):
    axes[0, column].imshow(X[column].to("cpu").reshape(8, 8), cmap='gray')
    axes[0, column].set_title("Original")
    axes[1, column].imshow(Xh[column].to("cpu").reshape(8, 8), cmap='gray')
    axes[1, column].set_title("enc-dec")
plt.show()

# 2. Test on paper benchmarks

## 2.1. Wine

In [None]:
# Loading Wine
X, Y_wine= sklearn.datasets.load_wine(return_X_y=True, as_frame=False)
X = sklearn.preprocessing.MinMaxScaler().fit_transform(X)
S_wine = sklearn.metrics.pairwise.cosine_similarity(X, X)
NTS_wine = S_wine / np.sum(S_wine, axis=1, keepdims=True)
print("NTS_wine.shape:", NTS_wine.shape)
print("Y_wine.shape:", Y_wine.shape, "with classes:", np.unique(Y_wine))

In [None]:
torch.manual_seed(0)
np.random.seed(0)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NB_EPOCHS = 5000
BATCH_SIZE = 178
X = torch.tensor(NTS_wine, dtype=torch.float32).to(DEVICE)

graph_encoder = GraphEncoder(input_dim=X.shape[1], hidden_dims=[178, 128, 64]).to(DEVICE)
optimizer = torch.optim.Adam(graph_encoder.parameters(), lr=0.01)
nb_iters = NB_EPOCHS * (X.shape[0] // BATCH_SIZE)
graph_encoder.train(X, compile="compile", train_mode="layerwise", iters=nb_iters, optimizer=optimizer, rho=0.01, beta=1.0, batch_size=BATCH_SIZE)

In [None]:
latent = graph_encoder.encode(X).to('cpu')
print("Latent shape:", latent.shape)
kmeans = sklearn.cluster.KMeans(n_clusters=len(set(Y_wine)), random_state=42)
y_pred = kmeans.fit_predict(latent)
score = sklearn.metrics.normalized_mutual_info_score(Y_wine, y_pred)
print(f"NMI: {score:.4f}")

## 2.2. 20-Newsgroup

In [None]:
torch.manual_seed(0)
np.random.seed(0)
sklearn.random.seed(0)

In [None]:
print("Loading 20 Newsgroups dataset...")
data_vectorized = sklearn.datasets.fetch_20newsgroups_vectorized(data_home="./20-newsgroup", subset="all")

print("Converting to TF-IDF representation...")
X_tfidf = sklearn.feature_extraction.text.TfidfTransformer().fit_transform(data_vectorized.data)

print("Creating NG3, NG6, and NG9 subsets...")
n_samples = 200

ng3_cats = ["comp.graphics", "rec.sport.baseball", "talk.politics.guns"]
ng6_cats = ["alt.atheism", "comp.sys.mac.hardware", "rec.motorcycles", "rec.sport.hockey", "soc.religion.christian", "talk.religion.misc"]
ng9_cats = ["talk.politics.mideast", "talk.politics.misc", "comp.os.ms-windows.misc", "comp.sys.ibm.pc.hardware", "sci.electronics", "sci.crypt", "sci.med", "sci.space", "misc.forsale"]

def create_ng_subset(ng_subset_cats):
    ng_subset_data = []
    for cat in ng_subset_cats:
        cat_data = X_tfidf[data_vectorized["target"] == data_vectorized["target_names"].index(cat)]
        idx = np.random.choice(cat_data.shape[0], n_samples, replace=False)
        ng_subset_data.append(cat_data[idx])
    ng_subset_data = scipy.sparse.vstack(ng_subset_data)
    ng_subset_similarity = sklearn.metrics.pairwise.cosine_similarity(ng_subset_data, ng_subset_data)
    NTS_ng_subset = ng_subset_similarity / np.sum(ng_subset_similarity, axis=1, keepdims=True)
    Y_ng_subset = np.array([i for i in range(len(ng_subset_cats)) for _ in range(n_samples)])
    return sklearn.utils.shuffle(NTS_ng_subset, Y_ng_subset)

NTS_ng3, Y_ng3 = create_ng_subset(ng3_cats); print(NTS_ng3.shape, Y_ng3.shape)#; print(NTS_ng3[1][:5])
NTS_ng6, Y_ng6 = create_ng_subset(ng6_cats); print(NTS_ng6.shape, Y_ng6.shape)#; print(NTS_ng6[1][:5])
NTS_ng9, Y_ng9 = create_ng_subset(ng9_cats); print(NTS_ng9.shape, Y_ng9.shape)#; print(NTS_ng9[1][:5])

In [None]:
# Train
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NB_EPOCHS = 5000
BATCH_SIZE = 178
X = torch.tensor(NTS_ng3, dtype=torch.float32).to(DEVICE)

graph_encoder = GraphEncoder(input_dim=X.shape[1], hidden_dims=[600, 512, 256]).to(DEVICE)
optimizer = torch.optim.Adam(graph_encoder.parameters(), lr=0.01)
nb_iters = NB_EPOCHS * (X.shape[0] // BATCH_SIZE)
graph_encoder.train(X, compile="compile", train_mode="layerwise", iters=nb_iters, optimizer=optimizer, rho=0.01, beta=1.0, batch_size=BATCH_SIZE)

# Test
latent = graph_encoder.encode(X).to('cpu')
print("Latent shape:", latent.shape)
kmeans = sklearn.cluster.KMeans(n_clusters=len(set(Y_ng3)), random_state=42)
y_pred = kmeans.fit_predict(latent)
score = sklearn.metrics.normalized_mutual_info_score(Y_ng3, y_pred)
print(f"NMI: {score:.4f}")

In [None]:
# Train
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NB_EPOCHS = 5000
BATCH_SIZE = 178
X = torch.tensor(NTS_ng6, dtype=torch.float32).to(DEVICE)

graph_encoder = GraphEncoder(input_dim=X.shape[1], hidden_dims=[1200, 1024, 512, 256, 128]).to(DEVICE)
optimizer = torch.optim.Adam(graph_encoder.parameters(), lr=0.01)
nb_iters = NB_EPOCHS * (X.shape[0] // BATCH_SIZE)
graph_encoder.train(X, compile="compile", train_mode="layerwise", iters=nb_iters, optimizer=optimizer, rho=0.01, beta=1.0, batch_size=BATCH_SIZE)

# Test
latent = graph_encoder.encode(X).to('cpu')
print("Latent shape:", latent.shape)
kmeans = sklearn.cluster.KMeans(n_clusters=len(set(Y_ng6)), random_state=42)
y_pred = kmeans.fit_predict(latent)
score = sklearn.metrics.normalized_mutual_info_score(Y_ng6, y_pred)
print(f"NMI: {score:.4f}")

In [None]:
# Train
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NB_EPOCHS = 5000
BATCH_SIZE = 178
X = torch.tensor(NTS_ng9, dtype=torch.float32).to(DEVICE)

graph_encoder = GraphEncoder(input_dim=X.shape[1], hidden_dims=[600, 512, 256]).to(DEVICE)
optimizer = torch.optim.Adam(graph_encoder.parameters(), lr=0.01)
nb_iters = NB_EPOCHS * (X.shape[0] // BATCH_SIZE)
graph_encoder.train(X, compile="compile", train_mode="layerwise", iters=nb_iters, optimizer=optimizer, rho=0.01, beta=1.0, batch_size=BATCH_SIZE)

# Test
latent = graph_encoder.encode(X).to('cpu')
print("Latent shape:", latent.shape)
kmeans = sklearn.cluster.KMeans(n_clusters=len(set(Y_ng9)), random_state=42)
y_pred = kmeans.fit_predict(latent)
score = sklearn.metrics.normalized_mutual_info_score(Y_ng9, y_pred)
print(f"NMI: {score:.4f}")

## 2.3. DIP

## BioGrid