In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.model_selection import train_test_split

from synthetic import generate_dataset
from MLP import MLP_classifier
from GNN import GNN_classifier
from graphs import convert_graph_dataset

## Generate a synthetic dataset

Current generative process:
* For each feature, draw from a Gaussian with mean 0 and variance 1, and take the absolute value.
* Each class is defined by 3 characteristic features. For each characteristic feature, the value "signal" is added.
* Normalize the features
* Diffuse the values through the graph: for each edge (a,b), the value $x_{a}$ of a is decreased by $\text{diffusion_coefficient} \times x_{a}/\text{degree}(a)$ and the value of b is increased by the same amount.
* Add an additional gaussian noise with mean 0 and variance "noise" on top of these values.

In [41]:
n_obs = 500 # per class
n_features = 100
n_classes = 3

In [69]:
# create a synthetic dataset and split between train and test
X,y,graph = generate_dataset(n_classes,n_obs,n_features,nb_edges=3,nb_characteristic_features=3, \
                             signal=1.0,diffusion_coefficient=0.5,model="ER",noise=0.2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

## Train and test a fully-connected MLP

Note: Actually, just use a perceptron for now, because it performs better than MLPs.

In [86]:
# create a pytorch DataLoader

tensor_x_train = torch.Tensor(X_train).float()
tensor_x_test = torch.Tensor(X_test).float()
tensor_y_train = torch.Tensor(y_train).long()
tensor_y_test = torch.Tensor(y_test).long()

train_dataset = TensorDataset(tensor_x_train,tensor_y_train) 
test_dataset = TensorDataset(tensor_x_test,tensor_y_test) 
train_dataloader = DataLoader(train_dataset,shuffle=True,batch_size=32) 
test_dataloader = DataLoader(test_dataset,shuffle=True,batch_size=32)

In [87]:
classifier = MLP_classifier(n_features=n_features,n_classes=n_classes,n_layers=1) #n_layers=1: just a perceptron

In [None]:
classifier.fit(train_dataloader,epochs=800,verbose=True)

[1] loss: 1.090
[81] loss: 0.697
[161] loss: 0.614
[241] loss: 0.579
[321] loss: 0.559
[401] loss: 0.547
[481] loss: 0.539
[561] loss: 0.532
[641] loss: 0.528


In [None]:
classifier.eval(test_dataloader,verbose=True)

## Train and test a GNN

In [None]:
# Convert the dataset to a graph dataset

train_dataloader = convert_graph_dataset(graph,X_train,y_train)
test_dataloader = convert_graph_dataset(graph,X_test,y_test)

In [None]:
classifier = GNN_classifier(n_features=n_features,n_classes=n_classes)

In [None]:
classifier.fit(train_dataloader,epochs=150,verbose=True)

In [None]:
classifier.eval(test_dataloader,verbose=True)