# Exploring Algorithms

Using TigerGraph schema-free public algorithms at https://github.com/tigergraph/gsql-graph-algorithms


In [None]:
import pandas as pd
# Set up pyTigerGraph access
import pyTigerGraph as tg

# Prepare to pull our GSQL algorithms from GitHub
import requests


host       = "http://protomolecule.magichome" # must include the protocol http or https
username   = "tigergraph"
password   = "Tigergraph"
restppPort = 9000       # default 9000
gsPort     = 14240       # default 14240

# graphName  = "Northwind" # leave blank to use Global
# mysecret   = "7vcupj59t7p4ji9t0k3s72nm4bheilig"
# token="4vsf41mpl57e6m19pf0c5q1eah6h6jm5"

graphName = "Patents"
mysecret = "iksujn9605n4ltklm012a6uiann87rrn"
token = "15bg00akkb2ume2of295ld38h6ge9270"

graphName = "social"
mysecret = "ifn4pc4jnkktim9u80ahhvebkj38g71n"
token = "9icvamj2undegsfc72e5uph8udj2f3nf"

# First establish a basic connection using a secret.  Do *not* do this if you already have a token
# conn = tg.TigerGraphConnection(host=host, restppPort=restppPort, gsPort=gsPort, graphname=graphName, password=password)
# token = conn.getToken(mysecret, setToken=True, lifetime=None)[0]

# Next use the new token to establish a full access connection for use with GSQL

conn = tg.TigerGraphConnection(host=host, restppPort=restppPort, gsPort=gsPort, graphname=graphName, password=password, apiToken=token)
# token


# Algorithms

## PageRank - basic

- [PageRank on Wikipedia](https://en.wikipedia.org/wiki/PageRank#Algorithmhttps://en.wikipedia.org/wiki/PageRank#Algorithm)
- [TigerGraph algos link for PageRank](https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality/pagerank)
- [How To Calculate PageRank](http://www.seopt.com/2007/10/how-to-calculate-pagerank/)
- [How is PageRank calculated?](https://www.scribd.com/document/54984557/How-is-PageRank-Calculatedhttps://www.scribd.com/document/54984557/How-is-PageRank-Calculated)

> What happens when I link to your page and you link to mine? Then I need your PageRank to calculate mine, but you must know mine to calculate yours. The calculation seems to break down.
>
> The answer is that the PageRank formula must be calculated several times–it must be reiterated. Only after several iterations can we find any one page PageRank.

PageRank is a centrality algo

## TigerGraph's schema-free basic PageRank

In [None]:
!curl -s 'https://raw.githubusercontent.com/tigergraph/gsql-graph-algorithms/master/algorithms/Centrality/pagerank/tg_pagerank.gsql' | pygmentize -l gsql

In [None]:
print(conn.gsql('USE GRAPH ' + graphName
              + '\n'
              + requests.get('https://raw.githubusercontent.com/tigergraph/gsql-graph-algorithms/master/algorithms/Centrality/pagerank/tg_pagerank.gsql').text
              + 'INSTALL QUERY tg_pagerank'))

In [None]:
# Check PageRank for Reps - rep_order
max_change    = 0.001     # default 0.001
max_iter      = 25        # default 25
damping       = 0.85      # default 0.85
top_k         = 100       # default 100
print_accum   = True      # default TRUE
result_attr   = ""        # default ""
file_path     = ""        # default ""
display_edges = False     # default FALSE
vtype = 'Reps'
etype = 'rep_order'

vtype = 'Application'
etype = 'has_parent'

vtype = "Person"
etype = "Friend"
pagerankj = conn.runInstalledQuery('tg_pagerank', 
                                   params={'v_type': vtype,
                                           'max_iter': 50,
                                           'e_type': etype,
                                           'damping': damping,
                                           'top_k': top_k,
                                           'print_accum': print_accum,
                                           'result_attr': result_attr,
                                           'file_path': file_path,
                                           'display_edges': display_edges
                                          })
pd.DataFrame(pagerankj[0]['@@top_scores_heap'])

In [None]:
# Get all vertex types
conn.getVertexTypes()

In [None]:
# Check PageRank for Reps - rep_order
max_change    = 0.001     # default 0.001
max_iter      = 25        # default 25
damping       = 0.85      # default 0.85
top_k         = 100       # default 100
print_accum   = True      # default TRUE
result_attr   = ""        # default ""
file_path     = ""        # default ""
display_edges = False     # default FALSE
vtype = 'Reps'
etype = 'rep_order'

vtype = 'Application'
etype = 'has_parent'

vtype = "Person"
etype = "Friend"
pagerankj = conn.runInstalledQuery('tg_pagerank', 
                                   params={'v_type': vtype,
                                           'max_iter': 50,
                                           'e_type': etype,
                                           'damping': damping,
                                           'top_k': top_k,
                                           'print_accum': print_accum,
                                           'result_attr': result_attr,
                                           'file_path': file_path,
                                           'display_edges': display_edges
                                          })
pd.DataFrame(pagerankj[0]['@@top_scores_heap'])

## Dudas

Ok - here's something mystifying to me - there are only a few connected nodes in the graph - currently only these
<div>
<img src="images/simple-friend-graph-01.png" width="500"/>
</div>

**Only** Bob, Chase, Alex, Fiona and Justin have friends.  Justin with two incoming edges and no outgoing edges, has a score of 1, as do **all** unconnected nodes.  It appears that this version of PageRank gives a score of 1 to all nodes with zero outdegree.  

In [None]:
conn.getVertexDataframe('Person')

conn.getEdgesDataframe('Person', 'Fiona')

## PageRank exploration on larger graphs

In [None]:
# Use Customer360
graphName = "Customer360"
mysecret = "q6g6qa738ufhnm4fqmu7pl60ik6jcrld"
token = "7vib70gv06eaesnocp5u3tvgmgmg8e1a"
# token = "9icvamj2undegsfc72e5uph8udj2f3nf"

# First establish a basic connection using a secret.  Do *not* do this if you already have a token
# conn = tg.TigerGraphConnection(host=host, restppPort=restppPort, gsPort=gsPort, graphname=graphName, password=password)
# token = conn.getToken(mysecret, setToken=True, lifetime=None)[0]

# Next use the new token to establish a full access connection for use with GSQL

conn = tg.TigerGraphConnection(host=host, restppPort=restppPort, gsPort=gsPort, graphname=graphName, password=password, apiToken=token)
# token


In [None]:
# all vertex types
conn.getVertexTypes()


In [None]:
conn.runInstalledQuery('bookRecommendation', params={'inputProfile': 11178 })

# Latex examples (use Markdown block)


Euler's identity: $$ e^{i \pi} + 1 = 0 $$

$$
\frac{arg 1}{arg 2} \\
x^2\\
e^{i\pi}\\
A_i\\
B_{ij}\\
\sqrt[n]{arg}
$$


$$
\frac{arg 1}{arg 2} \
x^2
$$


Given : $\pi = 3.14$ , $\alpha = \frac{3\pi}{4}\, rad$
$$
\omega = 2\pi f 
$$
$$
f = \frac{c}{\lambda}\\
\lambda_0=\theta^2+\delta\\
\Delta\lambda = \frac{1}{\lambda^2}
$$

$\sum_{i=0}^n i^2 = \frac{(n^2+n)(2n+1)}{6}$


$$ 
\sum_{i=0}^n i^2 = \frac{(n^2+n)(2n+1)}{6}
$$
$$
1 + 2
$$


In [None]:
# from pygments import highlight
# from pygments.lexers import SqlLexer
# from pygments.formatters import HtmlFormatter
# from IPython.core.display import HTML
# from pygments.lexers import gsql
# # from pygments.formatters import gsql


# display(gsql, 'CREATE graph poodle()')

In [None]:
!pygmentize /home/gregj/projects/homelab/graph/tigergraph/solutions/patents/01-create-schema.gsql