# TigerGraph Data Science Library 101 - Classification Algorithm
This notebook shows the examples of using the most common classification algorithms in TigerGraph Graph Science Library. More detailed explanations of these algorithms can be found in the official documentation (https://docs.tigergraph.com/graph-ml/current/classification-algorithms/).


## Step1: Setting things up
- Connect and Load data
- Visualize the graph schema 
- Get basic stats, e.g., counts of nodes & edges

### Create connection

In [1]:
import json
import pandas as pd
from pyTigerGraph import TigerGraphConnection

# Read in DB configs
with open('../config.json', "r") as config_file:
    config = json.load(config_file)

conn = TigerGraphConnection(
    host=config["host"],
    username=config["username"],
    password=config["password"],
)

### Download movie dataset

In [2]:
from pyTigerGraph.datasets import Datasets

dataset_movie = Datasets("movie")

Downloading:   0%|          | 0/2623 [00:00<?, ?it/s]

### Ingest data

In [3]:
from pyTigerGraph.visualization import drawSchema

conn.ingestDataset(dataset_movie, getToken=config["getToken"])

---- Checking database ----
A graph with name movie already exists in the database. Skip ingestion.


### Visualize schema

In [4]:
drawSchema(conn.getSchema(force=True))

CytoscapeWidget(cytoscape_layout={'name': 'circle', 'animate': True, 'padding': 1}, cytoscape_style=[{'selecto…

### Print graph stats

In [5]:
vertices = conn.getVertexTypes()
total_count = 0
for vertex in vertices:
    vertex_cnt = conn.getVertexCount(vertex)
    total_count += vertex_cnt
    print("Node count: ({} : {}) ".format(vertex, vertex_cnt))
print("Total node count: ", total_count)

Node count: (Person : 7) 
Node count: (Movie : 9) 
Total node count:  16


In [6]:
import pprint
edge_count = conn.getEdgeCount()
print("Edges count: total ", sum(edge_count.values()))
pprint.pprint(edge_count) 

Edges count: total  30
{'Likes': 15, 'Similarity': 0, 'reverse_Likes': 15}


In [7]:
feat = conn.gds.featurizer()

In [13]:
import multiprocessing 

def install_algos(algo_list):
    for algo_name in algo_list:
        print("intall: {}".format(algo_name))
        feat.installAlgorithm(algo_name)

def check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, algoname):
    if asyncFeaturizerResult.wait():
        results = asyncFeaturizerResult.results
        y = json.dumps(results, indent = 1)
        print("process({}) finished, run {} Results:{}\n".format(process_id, algoname, y))
        if y:
            process_return.put(True)
        else:
            process_return.put(False)


def run_algo1(process_id, process_return):
    params = {
        "source": {"id": "Neil", "type": "Person"},
        "v_type_set": ["Person"],
        "e_type_set": ["Likes"],
        "reverse_e_type_set": ["reverse_Likes"],
        "weight_attribute": "weight",
        "label": "known_label",
        "top_k": 5,
        "print_results": True,
        "file_path": "",
        "result_attribute": "predicted_label"
    }
    asyncFeaturizerResult = feat.runAlgorithm("tg_knn_cosine_ss", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_knn_cosine_ss")

def run_algo2(process_id, process_return):
    params = {
        "v_type_set": ["Person"],
        "e_type_set": ["Likes"],
        "reverse_e_type_set": ["reverse_Likes"],
        "weight_attribute": "weight",
        "label": "known_label",
        "top_k": 3,
        "print_results": True,
        "file_path": "",
        "result_attribute": "predicted_label"
    }
    asyncFeaturizerResult = feat.runAlgorithm("tg_knn_cosine_all", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_knn_cosine_all")
    
def run_algo3(process_id, process_return):    
    params = {
        "v_type_set": ["Person"],
        "e_type_set": ["Likes"],
        "reverse_e_type_set": ["reverse_Likes"],
        "weight_attribute": "weight",
        "label": "known_label",
        "min_k": 2,
        "max_k": 5
    }

    asyncFeaturizerResult = feat.runAlgorithm("tg_knn_cosine_cv", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_knn_cosine_cv")
    
def run_algo4(process_id, process_return):     
    params = {
        "source": {"id": "Alex", "type": "Person"},
        "e_type_set": ["Likes"],
        "reverse_e_type_set": ["reverse_Likes"],
        "weight_attribute": "weight",
        "top_k": 5,
        "print_limit": 5,
        "print_results": True,
        "file_path": "",
        "similarity_edge": "Similarity"
    }

    asyncFeaturizerResult = feat.runAlgorithm("tg_cosine_nbor_ss", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_cosine_nbor_ss")
    
def run_algo5(process_id, process_return):       
    params = {
        "source": {"id": "Neil", "type": "Person"},
        "e_type": "Likes",
        "reverse_e_type": "reverse_Likes",
        "top_k": 5,
        "print_results": True,
        "similarity_edge_type": "Similarity",
        "file_path": "",
    }

    asyncFeaturizerResult = feat.runAlgorithm("tg_jaccard_nbor_ss", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_jaccard_nbor_ss")
    
def run_algo6(process_id, process_return):     
    params = {
        "top_k": 10,
        "v_type_set": ["Person"],
        "feat_v_type": ["Movie"],
        "e_type_set": ["Likes"],
        "reverse_e_type_set": ["reverse_Likes"],
        "similarity_edge": "Similarity",
        "src_batch_num": 50,
        "nbor_batch_num": 10,
        "print_results": True,
        "print_limit": 50,
        "file_path": ""
    }

    asyncFeaturizerResult = feat.runAlgorithm("tg_jaccard_nbor_ap_batch", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_jaccard_nbor_ap_batch")
    
def run_algo7(process_id, process_return):    
    params = {
        "v_type": "Person",
        "e_type": "Similarity",
        "max_change": 0.001,
        "maximum_iteration": 25,
        "damping": 0.85,
        "top_k": 100,
        "print_results": True,
        "result_attribute": "pagerank",
        "file_path": "",
        "display_edges": False
      }

    asyncFeaturizerResult = feat.runAlgorithm("tg_pagerank", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_pagerank")
    
def run_algo8(process_id, process_return):     
    params = {
        "v_type": "Person",
        "e_type": "Similarity",
        "max_change": 0.001,
        "maximum_iteration": 25,
        "damping": 0.85,
        "top_k": 100,
        "print_results": True,
        "result_attribute": "article_rank",
        "file_path": ""
      }
    asyncFeaturizerResult = feat.runAlgorithm("tg_article_rank", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_article_rank")
    
def run_algo9(process_id, process_return):      
    params = {
        "v_type": "Person",
        "e_type": "Similarity",
        "maximum_iteration": 100,
        "print_results": True,
        "file_path": ""
    }

    asyncFeaturizerResult = feat.runAlgorithm("tg_maximal_indep_set", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_maximal_indep_set")
    
def run_algo10(process_id, process_return):    
    params = {
        "v_type_set": ["Person"],
        "e_type_set": ["Similarity", "Likes"],
        "max_colors": 999999,
        "print_color_count": True,
        "print_stats": True,
        "file_path": ""
    }

    asyncFeaturizerResult = feat.runAlgorithm("tg_greedy_graph_coloring", params=params, runAsync=True)
    check_asyncFeaturizerResult(asyncFeaturizerResult, process_id, process_return, "tg_greedy_graph_coloring")
    
def check_multiprocessing_status(algoname_fun_dict):       
    jobs = []
    process_returns = multiprocessing.Queue()
    for i, (algoname, fun) in enumerate(algoname_fun_dict.items()):
        p = multiprocessing.Process(target=fun, args=(i, process_returns))
        jobs.append(p)
        p.start()

    #collect return
    for j in jobs:
        j.join()

    #get process_returns 
    process_results = [process_returns.get() for j in jobs]
    
    print("original multiprocessing num:{}, and success job num:{}".format(len(algoname_fun_dict),sum(process_results)))
    print("check success status:{}".format(len(algoname_fun_dict)==sum(process_results)))
    

    
algoname_fun_dict ={
    "tg_knn_cosine_ss" :run_algo1,
    "tg_knn_cosine_all" :run_algo2,
    "tg_knn_cosine_cv" :run_algo3,
    "tg_cosine_nbor_ss" :run_algo4,
    "tg_jaccard_nbor_ss" :run_algo5,
    "tg_jaccard_nbor_ap_batch" :run_algo6,
    "tg_pagerank" :run_algo7,
    "tg_article_rank" :run_algo8,
    "tg_maximal_indep_set" :run_algo9,
    "tg_greedy_graph_coloring" :run_algo10,

}

install_algos(algoname_fun_dict.keys())
check_multiprocessing_status(algoname_fun_dict)


intall: tg_knn_cosine_ss
intall: tg_knn_cosine_all
intall: tg_knn_cosine_cv
intall: tg_cosine_nbor_ss
intall: tg_jaccard_nbor_ss
intall: tg_jaccard_nbor_ap_batch
intall: tg_pagerank
intall: tg_article_rank
intall: tg_maximal_indep_set
intall: tg_greedy_graph_coloring
process(3) finished, run tg_cosine_nbor_ss Results:[
 {
  "neighbours": [
   {
    "v_id": "Kevin",
    "v_type": "Person",
    "attributes": {
     "neighbours.@sum_similarity": 0.14248
    }
   },
   {
    "v_id": "Jing",
    "v_type": "Person",
    "attributes": {
     "neighbours.@sum_similarity": 0.42173
    }
   }
  ]
 }
]

process(2) finished, run tg_knn_cosine_cv Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]

process(4) finished, run tg_jaccard_nbor_ss Results:[
 {
  "Others": [
   {
    "v_id": "Kat",
    "v_type": "Person",
    "attributes": {
     "Others.@sum_similarity": 0.5
    }
   },
   {
    "v_id": "Kevin",
    "v_type": "Person",
    "attributes"