In [None]:
import pickle
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score
import sys
sys.path.append("../scripts")
from models import *
from tg_functions import *
graph_num = 17

with open(f'../data/graphs/{graph_num}/linegraph_tg.pkl', 'rb') as f:
    data = pickle.load(f)



In [None]:
import torch_geometric.transforms as T
import numpy as np
from torch_geometric.nn.models import Node2Vec

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

n2v = Node2Vec(data.edge_index, embedding_dim=64, walk_length=50,
                 context_size=10, walks_per_node=10,
                 num_negative_samples=1, p=2.0, q=0.5, sparse=True).to(device)

loader = n2v.loader(batch_size=128, shuffle=True)
optimizer = torch.optim.SparseAdam(list(n2v.parameters()), lr=0.0001)

def train():
    n2v.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        pos_rw, neg_rw = pos_rw.to(device), neg_rw.to(device)
        optimizer.zero_grad()
        loss = n2v.loss(pos_rw, neg_rw)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

for i in range(1, 51):
    loss = train()
    print('Epoch: {:02d}, Loss: {:.4f}'.format(i, loss))

In [None]:
from sklearn.linear_model import LinearRegression

with open(f'../data/graphs/{graph_num}/linegraph_tg.pkl', 'rb') as f:
    data = pickle.load(f)

y = data.y
x = n2v(data.edge_index).cpu().detach().numpy()
x = np.array(x)
y_raw = data.y[data.y > 0]
x_raw = data.x[data.y > 0].numpy()

def split_data(x, y):
    X_train, X_temp, y_train, y_temp = train_test_split(x, y, test_size=0.3, random_state=100)
    X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=100)
    return X_train, X_valid, X_test, y_train, y_valid, y_test

x_train, x_valid, x_test, y_train, y_valid, y_test = split_data(x_raw, y_raw)

model = LinearRegression()
model.fit(x_train, y_train)
print('Linear Regression')
print("Train R2: ", model.score(x_train, y_train))
print("Valid R2: ", model.score(x_valid, y_valid))
print("Test R2: ", model.score(x_test, y_test))

print("Train MAE: ", mean_absolute_error(y_train, model.predict(x_train)))
print("Valid MAE: ", mean_absolute_error(y_valid, model.predict(x_valid)))
print("Test MAE: ", mean_absolute_error(y_test, model.predict(x_test)))


print('Binary classification')
model = LogisticRegression(max_iter=int(1e5), solver='saga', random_state=100)

y_bin = torch.bucketize(y_raw, boundaries=torch.tensor([3000])).numpy()

x_train, x_valid, x_test, y_train, y_valid, y_test = split_data(x_raw, y_bin)
model.fit(x_train, y_train)

print('Train Accuracy: ', accuracy_score(y_train, model.predict(x_train)))
print('Valid Accuracy: ', accuracy_score(y_valid, model.predict(x_valid)))
print('Test Accuracy: ', accuracy_score(y_test, model.predict(x_test)))

print('Multi-class classification')
model = LogisticRegression(max_iter=int(1e5), solver='saga', random_state=100)

bins_10 = torch.tensor([400, 800, 1300, 2100, 3000, 3700, 4700, 7020, 9660])
y_multi = torch.bucketize(y_raw, boundaries=bins_10).numpy()

x_train, x_valid, x_test, y_train, y_valid, y_test = split_data(x_raw, y_bin)
model.fit(x_train, y_train)

print('Train Accuracy: ', accuracy_score(y_train, model.predict(x_train)))
print('Valid Accuracy: ', accuracy_score(y_valid, model.predict(x_valid)))
print('Test Accuracy: ', accuracy_score(y_test, model.predict(x_test)))