# Small World Phenomenon
## Breadth-First Search algorithm
### Refers to the preposition that we are all linked by short chains of acquaintances. 'Six degrees of seperation' refers to the idea that all pairs of people are at most six social connections away from each other.

### Import libraries

In [1]:
import itertools as itt
import operator as op
from typing import List, Dict, AnyStr, Tuple
import random

### Read in .csv file of data and return a list of pairs of people that know each other.

In [2]:
def loadGraph(edgeFilename):
    array = []
    lst = []
    print('Getting Graph.....')
    with open(edgeFilename, "r") as f:
        for row in f:
            row = row.strip()
            row = row.split(" ")
            tup = (int(row[0]), int(row[-1]))
            array.append(tup)
    for j in array:
        pair = [j[0], j[-1]]
        lst.append(pair)
    return lst

### Convert list into dictionary where the key is a node and the values are a list of nodes that the key knows.

In [3]:
def to_dict(graph : List[Tuple[AnyStr,AnyStr]]) -> Dict[AnyStr,List[AnyStr]]:
    myDict = {}
    key = op.itemgetter(0)
    val = op.itemgetter(1)
    graph = [list(sorted(e)) for e in graph]
    keys = set(itt.chain.from_iterable(graph))
    for k in keys:
        for v in graph:
            if k in v:
                for j in v:
                    if k != j:
                        if k not in myDict:
                            myDict.update({k : [j]})
                        else:
                            myDict[k].extend([j])
    return myDict

### Define MyQueue class

In [4]:
class MyQueue:
    def __init__(self):
        self.items = []

    def __str__(self):
        return f"{self.items}"

    def emptyQueue(self):
        return self.items == []

    def enqueue(self, item):
        return self.items.insert(0, item)

    def dequeue(self):
        return self.items.pop()

    def __iter__(self):
        return (x for x in self.items)

### Breadth-First Search algorithm

In [5]:
def bfs(graph, s):
    array = []
    visited = []
    visited.insert(0, s)
    queue = MyQueue()
    queue.enqueue([s, 0])
    while not queue.emptyQueue():
        curr = queue.dequeue()
        s = curr[0]
        dist = curr[1]
        for i in graph[s]:
            if i not in visited:
                visited.insert(0, i)
                queue.enqueue([i, dist+1])
                array.append({i : dist+1})
    return array

### Distance Distribution

In [6]:
def distanceDistribution(G):
    exists = []
    items = []
    dict1 = {}
    for x in G:
        x = x.values()
        x = ''.join([c for c in str(x)])[13:-2]
        items.append(int(x))
    for i in range(len(items)):
        freq = 1
        if items[i] not in exists:
            exists.append(items[i])
            for j in range(i+1, len(items)):
                if items[i] == items[j]:
                    freq+=1
            dict1.update({items[i] : freq})
    return dict1

### The main function that runs all functions combined

In [7]:
def main():
    result = {}
    loaded = loadGraph('edges.txt')
    graph = to_dict(loaded)
    random_number = random.choice(random.choice(loaded))
    print(f'Source Number:\t{random_number}')
    res = distanceDistribution(bfs(graph, random_number))
    paths = sum(res.values())

    for k,v in res.items():
        result.update({k : '{:.1%}'.format(v/paths)})
    print(result)

### Call main( )

In [8]:
if __name__ == '__main__':
    main()

Getting Graph.....
Source Number:	3545
{1: '2.1%', 2: '11.5%', 3: '3.8%', 4: '35.0%', 5: '46.3%', 6: '1.4%'}
