In [1]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import scipy.sparse as sp
import itertools
import wandb
import os

In [3]:
##Login wandb client
wandb.login()



True

In [2]:
run = wandb.init(
        # Set the project where this run will be logged
    project="link_model",
    # Track hyperparameters and run metadata
    config={
        "learning_rate": 0.01,
        "epochs": 10,
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mamaboh[0m ([33mavri[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
import dgl.data

dataset = dgl.data.CoraGraphDataset()
g = dataset[0]

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [5]:
# Split edge set for training and testing
u, v = g.edges()

eids = np.arange(g.number_of_edges())
eids = np.random.permutation(eids)
test_size = int(len(eids) * 0.1)
train_size = g.number_of_edges() - test_size
test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]]

# Find all negative edges and split them for training and testing
adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes())
neg_u, neg_v = np.where(adj_neg != 0)

neg_eids = np.random.choice(len(neg_u), g.number_of_edges())
test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
train_neg_u, train_neg_v = neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]]

In [6]:
train_g = dgl.remove_edges(g, eids[:test_size])

In [7]:
from dgl.nn import SAGEConv

# ----------- 2. create model -------------- #
# build a two-layer GraphSAGE model
class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, 'mean')
        self.conv2 = SAGEConv(h_feats, h_feats, 'mean')

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [8]:
## Positive and negative graph edges
train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())

In [9]:
import dgl.function as fn

class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
            return g.edata['score'][:, 0]

In [10]:
model = GraphSAGE(train_g.ndata['feat'].shape[1], 16)
# You can replace DotPredictor with MLPPredictor.
#pred = MLPPredictor(16)
pred = DotPredictor()

def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
    return F.binary_cross_entropy_with_logits(scores, labels)

def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    return roc_auc_score(labels, scores)

In [27]:
epochs=10
lr=0.001

In [29]:
run = wandb.init(
    # Set the project where this run will be logged
    project="link_model",
    # Track hyperparameters and run metadata
    tags=['baseline','production'],
    config={
        "learning_rate": lr,
        "epochs": epochs,
    })

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.058553…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016752479166704384, max=1.0…

In [30]:
# in this case, loss will in training loop
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), pred.parameters()), lr=lr)

# ----------- 4. training -------------------------------- #
all_logits = []
for e in range(epochs):
    # forward
    h = model(train_g, train_g.ndata['feat'])
    pos_score = pred(train_pos_g, h)
    neg_score = pred(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)

    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

 
    print('In epoch {}, loss: {}'.format(e, loss))
    wandb.log({"positive score": pos_score, "loss": loss})

In epoch 0, loss: 0.7030053734779358
In epoch 1, loss: 0.7018020749092102
In epoch 2, loss: 0.7007216811180115
In epoch 3, loss: 0.6997526288032532
In epoch 4, loss: 0.6988801956176758
In epoch 5, loss: 0.6980986595153809
In epoch 6, loss: 0.6973963379859924
In epoch 7, loss: 0.6967640519142151
In epoch 8, loss: 0.6961938738822937
In epoch 9, loss: 0.6956766843795776


## using wandb to get metrics

In [33]:
assert os.getenv('WANDB_API_KEY'), 'You must set the WANDB_API_KEY environment variable'

In [34]:
run = wandb.init(job_type="CI/CD")
path = 'avri/link_model/'
model_name = 'job-https___github.com_amaboh_wandb_Mlops_project_Untitled.ipynb'
version = 'prod candidate'

artifact = run.use_artifact(f'{path}{model_name}:{version}', 
                            type='job')

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,█▇▆▅▄▃▃▂▁▁

0,1
loss,0.69568


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016752902083438433, max=1.0…

In [35]:
run = artifact.logged_by()

In [44]:
api = wandb.Api()
tags= ['baseline']

baseline_runs=api.runs('avri/link_model/job-https___github.com_amaboh_wandb_Mlops_project_Untitled.ipynb', 
                       {"tags": {"$in": tags}}) # this is the Mongo Client

In [46]:
import wandb
import wandb.apis.reports as wr

[34m[1mwandb[0m: Thanks for trying out the Report API!
[34m[1mwandb[0m: For a tutorial, check out https://colab.research.google.com/drive/1CzyJx1nuOS4pdkXa2XPaRQyZdmFmLmXV
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Try out tab completion to see what's available.
[34m[1mwandb[0m:   ∟ everything:    `wr.<tab>`
[34m[1mwandb[0m:       ∟ panels:    `wr.panels.<tab>`
[34m[1mwandb[0m:       ∟ blocks:    `wr.blocks.<tab>`
[34m[1mwandb[0m:       ∟ helpers:   `wr.helpers.<tab>`
[34m[1mwandb[0m:       ∟ templates: `wr.templates.<tab>`
[34m[1mwandb[0m:       
[34m[1mwandb[0m: For bugs/feature requests, please create an issue on github: https://github.com/wandb/wandb/issues


In [53]:
PROJECT = 'link_model'
ENTITY = 'avri'

report = wr.Report(
    entity=ENTITY,
    project=PROJECT,
    title='Compare Runs',
    description="comparing runs of model"
)  

In [54]:
pg = wr.PanelGrid(
    runsets=[
        wr.Runset(ENTITY, PROJECT, "Run Comparison").set_filters_with_python_expr("Name in ['fallen-sky-5', 'apricot-shape-4']")
    ],
    panels=[
        wr.RunComparer(diff_only='split', layout={'w': 24, 'h': 15}),
    ]
)

report.blocks = report.blocks[:1] + [pg] + report.blocks[1:]
report.save()

In [55]:
report.url

'https://wandb.ai/avri/link_model/reports/Compare-Runs--Vmlldzo1MTg2NDc3'

--- Logging error ---
Traceback (most recent call last):
  File "/Users/ama/mambaforge/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/Users/ama/mambaforge/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 5] Input/output error
Call stack:
  File "/Users/ama/mambaforge/lib/python3.10/threading.py", line 973, in _bootstrap
    self._bootstrap_inner()
  File "/Users/ama/mambaforge/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/ama/mambaforge/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 49, in run
    self._run()
  File "/Users/ama/mambaforge/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 100, in _run
    self._process(record)
  File "/Users/ama/mambaforge/lib/python3.10/site-packages/wandb/sdk/internal/internal.py", line 279, in _process
    self._hm.handle(record)
  File "/Users/ama/mambaforge/lib/python3.10/site

--- Logging error ---
Traceback (most recent call last):
  File "/Users/ama/mambaforge/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/Users/ama/mambaforge/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 5] Input/output error
Call stack:
  File "/Users/ama/mambaforge/lib/python3.10/threading.py", line 973, in _bootstrap
    self._bootstrap_inner()
  File "/Users/ama/mambaforge/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/ama/mambaforge/lib/python3.10/site-packages/wandb/sdk/service/streams.py", line 48, in run
    self._target(**self._kwargs)
  File "/Users/ama/mambaforge/lib/python3.10/site-packages/wandb/sdk/internal/internal.py", line 174, in wandb_internal
    logger.error(f"Thread {thread.name}:", exc_info=exc_info)
Message: 'Thread SenderThread:'
Arguments: ()
Thread SenderThread:
Traceback (most recent call last):
  File "/Users/ama/mambaforge/lib/pyth

BrokenPipeError: [Errno 32] Broken pipe
