# TigerGraph Data Science Library 101 - Classification Algorithm
This notebook shows the examples of using the most common classification algorithms in TigerGraph Graph Science Library. More detailed explanations of these algorithms can be found in the official documentation (https://docs.tigergraph.com/graph-ml/current/classification-algorithms/).


## Step1: Setting things up
- Connect and Load data
- Visualize the graph schema 
- Get basic stats, e.g., counts of nodes & edges

### Create connection

In [19]:
import json
import pandas as pd
from pyTigerGraph import TigerGraphConnection

# Read in DB configs
with open('../config.json', "r") as config_file:
    config = json.load(config_file)

conn = TigerGraphConnection(
    host=config["host"],
    username=config["username"],
    password=config["password"],
)

### Download movie dataset

In [20]:
from pyTigerGraph.datasets import Datasets

dataset_movie = Datasets("movie")

A folder with name movie already exists in ./tmp. Skip downloading.


### Ingest data

In [21]:
from pyTigerGraph.visualization import drawSchema

conn.ingestDataset(dataset_movie, getToken=config["getToken"])

---- Checking database ----
A graph with name movie already exists in the database. Skip ingestion.


### Visualize schema

In [22]:
drawSchema(conn.getSchema(force=True))

CytoscapeWidget(cytoscape_layout={'name': 'circle', 'animate': True, 'padding': 1}, cytoscape_style=[{'selectoâ€¦

### Print graph stats

In [23]:
vertices = conn.getVertexTypes()
total_count = 0
for vertex in vertices:
    vertex_cnt = conn.getVertexCount(vertex)
    total_count += vertex_cnt
    print("Node count: ({} : {}) ".format(vertex, vertex_cnt))
print("Total node count: ", total_count)

Node count: (Person : 7) 
Node count: (Movie : 9) 
Total node count:  16


In [24]:
import pprint
edge_count = conn.getEdgeCount()
print("Edges count: total ", sum(edge_count.values()))
pprint.pprint(edge_count) 

Edges count: total  42
{'Likes': 15, 'Similarity': 12, 'reverse_Likes': 15}


In [25]:
feat = conn.gds.featurizer()

In [26]:
import multiprocessing 


# install the algo first
feat.installAlgorithm("tg_knn_cosine_cv")

# define a runAlgorithm for a installed algo
def run_tg_knn_cosine_cv(process_id, process_return):
    params = {
        "v_type_set": ["Person"],
        "e_type_set": ["Likes"],
        "reverse_e_type_set": ["reverse_Likes"],
        "weight_attribute": "weight",
        "label": "known_label",
        "min_k": 2,
        "max_k": 5
    }

    asyncFeaturizerResult = feat.runAlgorithm("tg_knn_cosine_cv", params=params, runAsync=True)
    if asyncFeaturizerResult.wait():
        results = asyncFeaturizerResult.results
        y = json.dumps(results, indent = 1)
        print("process({}) finished, Results:{}\n\n".format(process_id, y))
        if y:
            process_return.put(True)
        else:
            process_return.put(False)

multiprocessing_num = 10        
jobs = []
process_returns = multiprocessing.Queue()
for i in range(multiprocessing_num):
    p = multiprocessing.Process(target=run_tg_knn_cosine_cv, args=(i, process_returns))
    jobs.append(p)
    p.start()

#collect return
for j in jobs:
    j.join()

#get process_returns 
process_results = [process_returns.get() for j in jobs]

print("original multiprocessing num:{}, and success job num:{}".format(multiprocessing_num,sum(process_results)))
print("check success status:{}".format(multiprocessing_num==sum(process_results)))

process(4) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]

process(0) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]

process(5) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]




process(6) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]


process(8) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]

process(2) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]



process(3) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
 {
  "best_k": 2
 }
]


process(9) finished, Results:[
 {
  "@@correct_rate_list": [
   0.25,
   0.25,
   0.25,
   0.25
  ]
 },
