# **Load Data**

## **Small test sample**

In [0]:
# Small test sample
graph = {
    'a' : [],
    'b' : ['a', 'me'],
    'c' : ['b','d'],
    'd' : ['e'],
    'e' : ['c'],
    'me': ['b','you'],
    'you': ['d','e']
}

## **SNS data**

In [9]:
#put the nicknames.txt and links.txt in the same directory with this notebook
#change the path to your data path

#Load nicknames.txt
dic_person = {}
f = open("/content/drive/My Drive/STEP/nicknames.txt")
lines = f.readlines()
for line in lines:
  person_id = line.split('\t',2)[0]
  person_name = line.split('\t',2)[1].split('\n',2)[0]
  dic_person[person_id] = person_name
f.close()

#Load links.txt
graph = {}
for key in dic_person:
  graph[dic_person[key]] = []
f = open("/content/drive/My Drive/STEP/links.txt")
lines = f.readlines()
for line in lines:
  id_1 = line.split('\t',2)[0]
  id_2 = line.split('\t',2)[1].split('\n',2)[0]
  graph[dic_person[id_1]].append(dic_person[id_2])
f.close()

graph['brent']  # 'brent' is my name :-)

['adrian',
 'lance',
 'cheryl',
 'barry',
 'austin',
 'jacqueline',
 'bruce',
 'duane',
 'alexander']

In [0]:
len(graph)

54

# **How many people are following each other?**  (e.g. A follows B and B follows A)

In [0]:
count_pair=0
pairs=set()
for person_1 in graph.keys():
  for person_2 in graph[person_1]:
    if(person_1 in graph[person_2]):
      pair = tuple(sorted((person_1,person_2)))
      if pair not in pairs:
        count_pair+=1
        pairs.add(pair)


print('pair number: %d' %(count_pair))
for pair in pairs:
  print(pair)

pair number: 219
('jamie', 'jeremy')
('cecil', 'jeremy')
('duane', 'gene')
('brett', 'darryl')
('brett', 'judith')
('frances', 'francis')
('jacqueline', 'kathleen')
('brett', 'jacqueline')
('barry', 'hugh')
('frances', 'judith')
('barry', 'jamie')
('brent', 'jacqueline')
('brett', 'herman')
('cecil', 'cynthia')
('diane', 'francis')
('francis', 'kathleen')
('darryl', 'herman')
('duane', 'lance')
('hugh', 'jacqueline')
('danielle', 'emma')
('barry', 'janice')
('frederick', 'jeremy')
('barry', 'kathleen')
('duane', 'howard')
('aaron', 'darryl')
('aaron', 'judith')
('diane', 'howard')
('hugh', 'johnnie')
('jamie', 'johnnie')
('emma', 'howard')
('eugene', 'janice')
('brett', 'johnnie')
('aaron', 'jon')
('darryl', 'jay')
('austin', 'brenda')
('alexander', 'cecil')
('debra', 'janice')
('jeremy', 'joan')
('brenda', 'frederick')
('herman', 'jamie')
('frances', 'johnnie')
('herman', 'jay')
('austin', 'jamie')
('frederick', 'johnnie')
('janice', 'jared')
('daniel', 'debra')
('emma', 'herman')
('f

# **Can “adrian” reach me by following the link?**

In [0]:
#Find if a path exists from current_node (adrian) to end_node (brent)
def dfs(visited_node, graph, current_node, end_node):
  # Successfully reached
  if current_node == end_node:
    return True
  # Haven't reached yet, continue exploring all the neighbour nodes of current node
  for neighbour_node in graph[current_node]:
    if neighbour_node not in visited_node:
      visited_node.append(neighbour_node)
      result = dfs(visited_node, graph, neighbour_node, end_node)
      if result:
        return True
  return False

person_1 = 'adrian'
person_2 = 'brent'
Linked = dfs([], graph, person_1, person_2)
if Linked:
  print('Linked :-)')
else:
  print('Not Linked :-(')

Linked :-)


# **Find all isolated people that someone cannot reach through any link he/she (or his/her friend) has?**

In [0]:
#Find all linked people of someone (current_node)
def dfs(graph, current_node):
  if current_node not in visited_node:
    visited_node.add(current_node)
    for neighbour_node in graph[current_node]:
      dfs(graph, neighbour_node)
      
person = 'brent'
visited_node = set()
isolated_node = set()
dfs(graph, person)
#find all isolated people
for key in graph.keys():
  if key not in visited_node:
    isolated_node.add(key)
if isolated_node:
  print('You are not linked with:')
  print(isolated_node)
else:
  print('Wow! You are linked to all the people!')
  print(len(visited_node), 'People Linked.')

You are not linked with:
{'betty', 'carolyn', 'lawrence'}


# **Find the shortest path from person_1 to person_2?**

In [0]:
from collections import deque

#Find shortest path from one node to another
def bfs(visited_node, graph, current_node, end_node):
  #store all the paths
  queue = deque([[current_node]])
  while len(queue) > 0:
    current_path = queue.popleft()
    current_node = current_path[-1]
    if current_node not in visited_node:
      neighbours = graph[current_node]
      for neighbour in neighbours:
        #form new path
        temp = current_path.copy()
        temp.append(neighbour)
        if neighbour == end_node:
          return temp
        queue.append(temp)
  return None
    

person_1 = 'adrian'
person_2 = 'brent'
visited_node = []
visited_node = bfs(visited_node, graph, person_1, person_2)
print(visited_node)

['adrian', 'emma', 'barry', 'brent']


# **Who is the furthest person?**

In [0]:
#Find the furthest person
from collections import deque
def bfs_distance(visited_node, graph, start_node):
  #A dictionary to store distance for each person from a certain person
  distance = {}
  distance[start_node] = 0
  queue = deque()
  queue.append(start_node)
  while len(queue) > 0:
    current_node = queue.popleft() 
    for neighbour in graph[current_node]:
      if neighbour not in visited_node:
        visited_node.add(neighbour)
        queue.append(neighbour)
        #add 1 to distance for current 'neighbour'
        distance[neighbour] = distance[current_node]+1
  return distance

visited_node = set()
visited_node.add('brent')
distance = bfs_distance(visited_node, graph, 'brent')  
distance = sorted(distance.items(), key=lambda d: d[1]) 
max_distance = [pair for pair in distance if pair[1]==distance[-1][1]]
print('======================================')
print('Max distance:')
print(max_distance[0][1])
print('======================================')
print('Page name:')
for person in max_distance:
  print(person[0])

Max distance:
3
Page name:
herman
luis
cody
danielle
jeremy
daniel
frederick
jimmie
joan


# **Find all possible paths from person_1 to person_2?**

In [0]:
#!!! This code was tested on small test sample and it worked, however, for the large SNS data, it needs quite a long time for execution. Will check it again later.

#Find all possible paths from start_node (person_1) to end_node (person_2)
def dfs_all_paths(visited_node, graph, start_node, end_node):
  global count
  visited_node.append(start_node)

  #When reach the end_node:
  #1. save the possible path
  #2. pop one item from visited_node so that we can search if there are other possbile paths starting from the previous node
  if start_node == end_node:
    #possible_path = visited_node.copy()
    #possible_paths.append(possible_path)
    count+=1
    visited_node.pop()
  #Not reach the end_node: continue exploring all the neighbour nodes of current node
  else:
    for neighbour_node in graph[start_node]:
      if neighbour_node not in visited_node:
        dfs_all_paths(visited_node, graph, neighbour_node, end_node)
    #when finish checking all the neighbour nodes of current node, get one step back to search if there are other possible paths starting from the previous node
    visited_node.pop()

# Driver Code
person_1 = 'brent'
person_2 = 'austin'
count = 0
#possible_paths = []
dfs_all_paths([], graph, person_1, person_2)
print('There are %d possible paths from \'%s\' to \'%s\':' %(len(possible_paths),person_1,person_2))
#for path in possible_paths:
#  print(path)
print(count)
#print(possible_paths[:5])

# **Does the graph have cycles?**

In [10]:
#Find if the graph contains cycles
def cycle(visited_node, current_node, graph, stack):
  visited_node.append(current_node)
  stack.append(current_node)
  for neighbour_node in graph[current_node]:
    if neighbour_node not in stack:
      if neighbour_node not in visited_node:
        cycle(visited_node, neighbour_node, graph, stack)
    else:
      print('======================================')
      print('cycle exist!')
      index = stack.index(neighbour_node)
      temp = stack[index:]
      for node in stack[index:]:
        print(node+' -> ',end='')
      print(neighbour_node)
  stack.pop(-1)

visited_node = []
stack = []
for key in graph.keys():
  if key not in visited_node:
    cycle(visited_node, key, graph, stack)


cycle exist!
aaron -> frances -> aaron
cycle exist!
frances -> herman -> jack -> frances
cycle exist!
aaron -> frances -> herman -> jack -> aaron
cycle exist!
herman -> jack -> herman
cycle exist!
frances -> herman -> jack -> dennis -> frances
cycle exist!
dennis -> lance -> dennis
cycle exist!
frances -> herman -> jack -> dennis -> lance -> francis -> frances
cycle exist!
francis -> edwin -> francis
cycle exist!
edwin -> cecil -> edwin
cycle exist!
edwin -> cecil -> debra -> edwin
cycle exist!
cecil -> debra -> cecil
cycle exist!
edwin -> cecil -> debra -> austin -> edwin
cycle exist!
debra -> austin -> debra
cycle exist!
frances -> herman -> jack -> dennis -> lance -> francis -> edwin -> cecil -> debra -> austin -> brent -> adrian -> emma -> frances
cycle exist!
debra -> austin -> brent -> adrian -> emma -> debra
cycle exist!
aaron -> frances -> herman -> jack -> dennis -> lance -> francis -> edwin -> cecil -> debra -> austin -> brent -> adrian -> emma -> aaron
cycle exist!
lance -> 

# **Find the longest link that we can form from someone?**

In [0]:
#!!! This code was tested on small test sample and it worked, however, for the large SNS data, it needs quite a long time for execution. Will check it again later.

#Find the longest link that we can form from someone (start_node)
def find_furtherst_person(visited_node, graph, start_node):
  global distance
  visited_node.append(start_node)
  #when reach a 'leaf node', count the current distance and update if it's larger than the previous stored distance
  if graph[start_node] == []:
    if len(visited_node) > distance:
      distance = len(visited_node)
      path.append(visited_node.copy())
    visited_node.pop()
  else:
    for neighbour_node in graph[start_node]:
      if neighbour_node not in visited_node:
        find_furtherst_person(visited_node, graph, neighbour_node)
    visited_node.pop()

# Driver Code
person = 'brent'
distance = 0
path = []
find_furtherst_person([], graph, person)
print('The longest link from \'%s\' is:' %(person))
print(path[-1])
print('The distance is %d' %(distance-1))

The longest link from 'me' is:
['me', 'you', 'd', 'e', 'c', 'b', 'a']
The distance is 6
