# **Load Data**

## **Small test sample**

In [0]:
# Small test sample
graph = {
    'a' : [],
    'b' : ['a', 'me'],
    'c' : ['b','d'],
    'd' : ['e'],
    'e' : ['c'],
    'me': ['b','you'],
    'you': ['d','e']
}

## **SNS data**

In [147]:
#put the nicknames.txt and links.txt in the same directory with this notebook
#change the path to your data path

#Load nicknames.txt
dic_person = {}
f = open("/content/drive/My Drive/STEP/nicknames.txt")
lines = f.readlines()
for line in lines:
  person_id = line.split('\t',2)[0]
  person_name = line.split('\t',2)[1].split('\n',2)[0]
  dic_person[person_id] = person_name
f.close()

#Load links.txt
graph = {}
f = open("/content/drive/My Drive/STEP/links.txt")
lines = f.readlines()
for line in lines:
  id_1 = line.split('\t',2)[0]
  id_2 = line.split('\t',2)[1].split('\n',2)[0]
  if dic_person[id_1] not in graph.keys():
    graph[dic_person[id_1]] = [dic_person[id_2]]
  else:
    graph[dic_person[id_1]].append(dic_person[id_2])
f.close()

graph['brent']  # 'brent' is my name :-)

['adrian',
 'lance',
 'cheryl',
 'barry',
 'austin',
 'jacqueline',
 'bruce',
 'duane',
 'alexander']

# **How many people are following each other?**  (e.g. A follows B and B follows A)

In [116]:
count_pair=0
pairs=set()
for person_1 in graph.keys():
  for person_2 in graph[person_1]:
    if(person_1 in graph[person_2]):
      pair = tuple(sorted((person_1,person_2)))
      if pair not in pairs:
        count_pair+=1
        pairs.add(pair)


print('pair number: %d' %(count_pair))
for pair in pairs:
  print(pair)

pair number: 219
('diane', 'francis')
('jeremy', 'jimmie')
('barry', 'jay')
('francis', 'janice')
('daniel', 'kevin')
('johnnie', 'judith')
('frances', 'francis')
('hugh', 'jacqueline')
('gene', 'jeremy')
('brett', 'herman')
('brenda', 'gene')
('bruce', 'darryl')
('diane', 'helen')
('cody', 'emma')
('duane', 'eugene')
('francis', 'jay')
('frederick', 'jeremy')
('cecil', 'herman')
('brett', 'johnnie')
('cecil', 'debra')
('cody', 'janice')
('francis', 'lance')
('cecil', 'eugene')
('cecil', 'cynthia')
('debra', 'emma')
('duane', 'gene')
('adrian', 'alan')
('alexander', 'kathleen')
('alexander', 'jacqueline')
('barry', 'hugh')
('alexander', 'jared')
('aaron', 'emma')
('cody', 'danielle')
('emma', 'luis')
('brenda', 'kevin')
('cody', 'jeremy')
('austin', 'jamie')
('brett', 'jay')
('francis', 'herman')
('edwin', 'jay')
('aaron', 'jon')
('jon', 'judith')
('jacqueline', 'joel')
('jaime', 'joel')
('aaron', 'judith')
('aaron', 'francis')
('cecil', 'janice')
('dennis', 'francis')
('danielle', 'fr

# **Can “adrian” reach me by following the link?**

In [135]:
#Find if a path exists from start_node (adrian) to end_node (brent)
def dfs(visited_node, graph, start_node, end_node):
  # Successfully reached
  if start_node == end_node:
    return True
  # Haven't reached yet, continue exploring all the neighbour nodes of current node
  for neighbour_node in graph[start_node]:
    if neighbour_node not in visited_node:
      visited_node.append(neighbour_node)
      result = dfs(visited_node, graph, neighbour_node, end_node)
      if result:
        return True
  return False

person_1 = 'adrian'
person_2 = 'brent'
Linked = dfs([], graph, person_1, person_2)
if Linked:
  print('Linked :-)')
else:
  print('Not Linked :-(')

Linked :-)


# **Find all isolated people that someone cannot reach through any link he/she (or his/her friend) has?**

In [134]:
#Find all linked people of someone (start_node)
def dfs(graph, start_node):
  if start_node not in visited_node:
    visited_node.add(start_node)
    for neighbour_node in graph[start_node]:
      dfs(graph, neighbour_node)
      
person = 'austin'
visited_node = set()
isolated_node = set()
dfs(graph, person)
#find all isolated people
for key in graph.keys():
  if key not in visited_node:
    isolated_node.add(key)
if isolated_node:
  print(isolated_node)
else:
  print('Wow! You are linked to all the people!')
  print(len(visited_node), 'People Linked.')

Wow! You are linked to all the people!
51 People Linked.


# **Find the shortest path from person_1 to person_2?**

In [125]:
from collections import deque

#Find shortest path from start_node to end_node
def bfs(visited_node, graph, start_node, end_node):
  #store all the paths
  queue = deque([[start_node]])
  while len(queue) > 0:
    current_path = queue.popleft()
    current_node = current_path[-1]
    if current_node not in visited_node:
      neighbours = graph[current_node]
      for neighbour in neighbours:
        #form new path
        temp = current_path.copy()
        temp.append(neighbour)
        if neighbour == end_node:
          return temp
        queue.append(temp)
  return None
    

person_1 = 'adrian'
person_2 = 'brent'
visited_node = []
visited_node = bfs(visited_node, graph, person_1, person_2)
print(visited_node)

['adrian', 'emma', 'barry', 'brent']


# **Who is the furthest person?**

In [131]:
person = 'brent'
distance = 0
furthest_path = []


for key in graph.keys():
  if key != person:
    path = bfs([], graph, person, key)
    if path and len(path) >= distance:
      distance = len(path)
      furthest_path.append(path)

for path in furthest_path:
  if len(path) == distance:
    print('The furthest person is \'%s\'' %(path[-1]))
    print('The furthest path is: ', path)
    print('The distance is: ', len(path))

The furthest person is 'cody'
The furthest path is:  ['brent', 'adrian', 'emma', 'cody']
The distance is:  4
The furthest person is 'daniel'
The furthest path is:  ['brent', 'adrian', 'jamie', 'daniel']
The distance is:  4
The furthest person is 'danielle'
The furthest path is:  ['brent', 'adrian', 'emma', 'danielle']
The distance is:  4
The furthest person is 'frederick'
The furthest path is:  ['brent', 'adrian', 'jamie', 'frederick']
The distance is:  4
The furthest person is 'herman'
The furthest path is:  ['brent', 'adrian', 'emma', 'herman']
The distance is:  4
The furthest person is 'jeremy'
The furthest path is:  ['brent', 'adrian', 'emma', 'jeremy']
The distance is:  4
The furthest person is 'jimmie'
The furthest path is:  ['brent', 'lance', 'dennis', 'jimmie']
The distance is:  4
The furthest person is 'joan'
The furthest path is:  ['brent', 'austin', 'joel', 'joan']
The distance is:  4
The furthest person is 'luis'
The furthest path is:  ['brent', 'adrian', 'emma', 'luis']
Th

# **Find all possible paths from person_1 to person_2?**

In [155]:
#!!! This code was tested on small test sample and it worked, however, for the large SNS data, it needs quite a long time for execution. Will check it again later.

#Find all possible paths from start_node (person_1) to end_node (person_2)
def dfs_all_paths(visited_node, graph, start_node, end_node):
  visited_node.append(start_node)

  #When reach the end_node:
  #1. save the possible path
  #2. pop one item from visited_node so that we can search if there are other possbile paths starting from the previous node
  if start_node == end_node:
    possible_path = visited_node.copy()
    if possible_path not in possible_paths:
      possible_paths.append(possible_path)
    visited_node.pop()
  #Not reach the end_node: continue exploring all the neighbour nodes of current node
  else:
    for neighbour_node in graph[start_node]:
      if neighbour_node not in visited_node:
        dfs_all_paths(visited_node, graph, neighbour_node, end_node)
    #when finish checking all the neighbour nodes of current node, get one step back to search if there are other possible paths starting from the previous node
    visited_node.pop()

# Driver Code
person_1 = 'you'
person_2 = 'me'
possible_paths = []
dfs_all_paths([], graph, person_1, person_2)
print('There are %d possible paths from \'%s\' to \'%s\':' %(len(possible_paths),person_1,person_2))
for path in possible_paths:
  print(path)

There are 2 possible paths from 'you' to 'me':
['you', 'd', 'e', 'c', 'b', 'me']
['you', 'e', 'c', 'b', 'me']


# **Find the longest link that we can form from someone?**

In [154]:
#!!! This code was tested on small test sample and it worked, however, for the large SNS data, it needs quite a long time for execution. Will check it again later.

#Find the longest link that we can form from someone (start_node)
def find_furtherst_person(visited_node, graph, start_node):
  global distance
  visited_node.append(start_node)
  #when reach a 'leaf node', count the current distance and update if it's larger than the previous stored distance
  if graph[start_node] == []:
    if len(visited_node) > distance:
      distance = len(visited_node)
      path.append(visited_node.copy())
    visited_node.pop()
  else:
    for neighbour_node in graph[start_node]:
      if neighbour_node not in visited_node:
        find_furtherst_person(visited_node, graph, neighbour_node)
    visited_node.pop()

# Driver Code
person = 'me'
distance = 0
path = []
find_furtherst_person([], graph, person)
print('The longest link from \'%s\' is:' %(person))
print(path[-1])
print('The distance is %d' %(distance-1))

The longest link from 'me' is:
['me', 'you', 'd', 'e', 'c', 'b', 'a']
The distance is 6


# **More contents will be updated later...**