# Graphstorm Standalone Mode Demonstration

## Setup (option 0): 

Run these codes in python REPL instead of Jupyter notebook. Please follow the instructions on creating a Graphstorm docker image, and start running the docker image. 
```bash
nvidia-docker run \
    -v /mnt/efs/gsf-data:/data \
    --network=host \
    -d graphstorm:0712
```

After you have a running Docker container, run the following to start the Python REPL:

```bash
docker exec -it test /usr/bin/python3
```

## Setup (option 1): 
Run this in the Graphstorm docker images with Jupyter support.

### Build a GraphStorm Docker image from source code

Please use the following command to build a Docker image from source:

```bash
git clone https://github.com/awslabs/graphstorm.git
cd /path-to-graphstorm/docker/
docker build --no-cache -f docker/Dockerfile.jupyter" . -t graphstorm:jupyter
```

### Create a GraphStorm Jupyter Server

To start the image, run the following command:

```bash
nvidia-docker run --network=host -v /dev/shm:/dev/shm/ -d --name test graphstorm:jupyter
```

The jupyter server with Graphstorm installed will be running on port 8888.

### Connect notebook to the Jupyter server

TODO


In [None]:
%%bash
python3 $GS_HOME/python/graphstorm/run/gsgnn_np/gsgnn_np.py \
    --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json \
    --cf $GS_HOME/training_scripts/gsgnn_np/ml_nc.yaml


## Node classification

In [None]:
import os
import torch as th
# import gs related stuffs
import graphstorm as gs
# from graphstorm.config import get_argument_parser
# from graphstorm.config import GSConfig
from graphstorm.trainer import GSgnnNodePredictionTrainer
from graphstorm.dataloading import GSgnnNodeTrainData, GSgnnNodeDataLoader
from graphstorm.eval import GSgnnAccEvaluator
from graphstorm.eval import GSgnnRegressionEvaluator
from graphstorm.model.utils import save_embeddings
from graphstorm.model import do_full_graph_inference
from graphstorm.utils import rt_profiler, sys_tracker
# setup_device
# from graphstorm.run.gsgnn_np import get_evaluator

In [None]:
device = 'cuda:0' if th.cuda.is_available() else 'cpu'

In [None]:
# init the GS dist cluster
os.environ['RANK'] = '0'
os.environ['WORLD_SIZE'] = '1'
os.environ['MASTER_ADDR'] = '127.0.0.1'
os.environ['MASTER_PORT'] = '4321'
gs.initialize(ip_config='127.0.0.1', backend='gloo')

In [None]:
# load the training config
import yaml
from argparse import Namespace
config_path = 'graphstorm/training_scripts/gsgnn_np/ml_nc.yaml'
with open(config_path, "r", encoding='utf-8') as stream:
    config = yaml.safe_load(stream)

config = Namespace(**config['gsf'])


In [None]:
# load the graph config
import json
part_config_path = '/data/movielen_100k_train_val_1p_4t/movie-lens-100k.json'
data_config = json.load(open(part_config_path, 'r'))
data_config

In [None]:
# load the training data
train_data = GSgnnNodeTrainData(data_config['graph_name'],
                                part_config_path,
                                train_ntypes=config.node_classification['target_ntype'],
                                eval_ntypes=config.node_classification['target_ntype'],
                                # node_feat_field=config.node_feat_name,
                                node_feat_field=None,
                                label_field=config.node_classification['label_field'])

In [None]:
# create model
config.glem = None
config.alpha_l2norm = 0.
config.node_feat_name = None
config.model_encoder_type = 'rgcn'
config.node_lm_configs = None
config.hidden_size = 16
config.dropout = 0.1
config.use_node_embeddings = False
config.num_bases = -1
config.num_layers = 1
config.use_self_loop = True
config.task_type = 'node_classification'
config.num_classes = 19
config.multilabel = False
config.multilabel_weights = None
config.imbalance_class_weights= None
config.lr = 0.01
config.sparse_optimizer_lr = 0.01
config.lm_tune_lr = 0.01
config.wd_l2norm = 0.

model = gs.create_builtin_node_gnn_model(
    train_data.g, 
    config, 
    train_task=True)

In [None]:
# create a trainer for the model
trainer = GSgnnNodePredictionTrainer(model, 
                                     gs.get_rank(),
                                    topk_model_to_save=1)


In [None]:
trainer.setup_device(device=device)

In [None]:
# set up evaluator:
multilabel = config.multilabel
evaluator = GSgnnAccEvaluator(100,
                            ['accuracy'],
                            multilabel)
# evaluator = get_evaluator(config)
trainer.setup_evaluator(evaluator)


In [None]:
# set up data loader
config.fanout = [4]
config.batch_size = 32

dataloader = GSgnnNodeDataLoader(train_data, train_data.train_idxs, fanout=config.fanout,
                                    batch_size=config.batch_size,
                                    device=device, train_task=True)

val_dataloader = GSgnnNodeDataLoader(train_data, train_data.val_idxs, fanout=config.fanout,
                                        batch_size=config.batch_size,
                                        device=device, train_task=False)

test_dataloader = None

In [None]:
# Preparing input layer for training or inference.
# The input layer can pre-compute node features in the preparing step if needed.
# For example pre-compute all BERT embeddings
model.prepare_input_encoder(train_data)


In [None]:
config.num_epochs = 2
trainer.fit(train_loader=dataloader, val_loader=val_dataloader,
            test_loader=test_dataloader, num_epochs=config.num_epochs,
            save_model_path=None,
            )