# Assignment #4: NumPy + A few tuples/lists




### In this assignment, we will practice on NumPy, Tuple, Set, Dict using social network use case. 

**Please read the explanation in "H4.docx"**

# **Important**
- Do not delete or modify the first line in the given code cell.
- You work must be add in the provided area only. 
- You must not change the declaration of the provided functions.
- You are allowed to add your own functions.
- If you want to write your own program to test, add new code cell at the very end of the file and add your code there.


In [30]:
import numpy as np

In [31]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Task#1

def generate_adjacency_matrix(filename):
  # write your code here
  network_lines = [line.replace("\n", "").split(",") for line in open(filename).readlines()]
  person_names = sorted(set([item for sublist in network_lines for item in sublist]))
  A = np.zeros((len(person_names), len(person_names)), int)
  for line in network_lines:
    A[person_names.index(line[0]), person_names.index(line[1])] = 1
  return A, person_names

In [32]:
"""
from google.colab import files
uploaded = files.upload()
print('uploaded = ', uploaded)
"""
A, person_names = generate_adjacency_matrix("social_network.txt")
print(A)
print(person_names)

[[0 1 0 1 1]
 [1 0 0 1 0]
 [0 0 0 0 1]
 [1 1 0 0 1]
 [1 0 1 1 0]]
['A', 'B', 'C', 'D', 'E']


In [33]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Task#2

def get_degree_matrix(A):
  # write your code here
  D = np.diag(A.sum(axis=0))
  return D

In [34]:
D = get_degree_matrix(A)
print(D)

[[3 0 0 0 0]
 [0 2 0 0 0]
 [0 0 1 0 0]
 [0 0 0 3 0]
 [0 0 0 0 3]]


In [35]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Task#3

def get_names_with_highest_number_of_friends(D, person_names):
  # write your code here
  friends_sum = D.sum(axis=0)
  highest_index = [index for index, item in enumerate(friends_sum) if item == max(friends_sum)]
  all_max = [(person_names[index], index) for index in highest_index]
  return all_max

In [36]:
persons = get_names_with_highest_number_of_friends(D, person_names)
print(persons)

[('A', 0), ('D', 3), ('E', 4)]


In [37]:
# HELPER FUNCTION
def get_liked_page_matrix(infile):
  liked_pages = []
  persons = []
  fr = open(infile, 'r')
  for line in fr:
    name, bits = line.strip().split(',')
    liked_page = [int(c) for c in bits]
    liked_pages.append(liked_page)
    persons.append(name)
  liked_pages_array = np.array(liked_pages)
  return liked_pages_array,persons

In [38]:
"""
from google.colab import files
uploaded = files.upload()
print('uploaded = ', uploaded)
"""
liked_pages, persons = get_liked_page_matrix('liked_pages.txt')
print(liked_pages)
print(persons)


[[0 1 0 1 1 1 0]
 [1 0 0 1 0 1 1]
 [0 0 0 0 1 0 1]
 [1 1 0 0 1 0 0]
 [0 1 1 1 0 1 0]]
['A', 'B', 'C', 'D', 'E']


In [39]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Task#4

def get_distance_matrix(Lp):
  # write your code here
  distance_matrix = np.zeros((Lp.shape[0], Lp.shape[0]), float)
  for x, y in ((x,y) for x in range(distance_matrix.shape[0]) for y in range(distance_matrix.shape[0])):
    a, b, c = 0, 0 ,0
    for i, j in zip(Lp[x], Lp[y]):
      if (i == 1): a += 1
      if (j == 1): b += 1
      if (i == j and i == 1): c += 1
    distance_matrix[x, y] = (c / (a + b - c)) if not x == y else 0
  return distance_matrix

get_distance_matrix(liked_pages)

array([[0.        , 0.33333333, 0.2       , 0.4       , 0.6       ],
       [0.33333333, 0.        , 0.2       , 0.16666667, 0.33333333],
       [0.2       , 0.2       , 0.        , 0.25      , 0.        ],
       [0.4       , 0.16666667, 0.25      , 0.        , 0.16666667],
       [0.6       , 0.33333333, 0.        , 0.16666667, 0.        ]])

In [40]:
Dt = get_distance_matrix(liked_pages)
print(Dt)

[[0.         0.33333333 0.2        0.4        0.6       ]
 [0.33333333 0.         0.2        0.16666667 0.33333333]
 [0.2        0.2        0.         0.25       0.        ]
 [0.4        0.16666667 0.25       0.         0.16666667]
 [0.6        0.33333333 0.         0.16666667 0.        ]]


In [41]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Task#5

def get_all_most_similar_pairs(Dt, persons):
  # write your code here
  pairs = []
  x, y,  = np.unravel_index(Dt.argmax(), Dt.shape)
  max_similar = Dt[x, y]
  for x, y in ((x,y) for x in range(Dt.shape[0]) for y in range(Dt.shape[0])):
    if (Dt[x, y] == max_similar):
      pairs.append(sorted([persons[x], persons[y]]))
  pairs = list(set(tuple(sorted(pair)) for pair in pairs))
  return pairs

In [42]:
pairs = get_all_most_similar_pairs(Dt, persons)
print(pairs)

[('A', 'E')]


In [43]:
A, person_names = generate_adjacency_matrix("social_network.txt")
D = get_degree_matrix(A)
persons = get_names_with_highest_number_of_friends(D, person_names)
liked_pages, persons = get_liked_page_matrix('liked_pages.txt')
Dt = get_distance_matrix(liked_pages)
pairs = get_all_most_similar_pairs(Dt, persons)
print(A)
print(person_names)
print(D)
print(persons)
print(liked_pages)
print(persons)
print(Dt)
print(pairs)

[[0 1 0 1 1]
 [1 0 0 1 0]
 [0 0 0 0 1]
 [1 1 0 0 1]
 [1 0 1 1 0]]
['A', 'B', 'C', 'D', 'E']
[[3 0 0 0 0]
 [0 2 0 0 0]
 [0 0 1 0 0]
 [0 0 0 3 0]
 [0 0 0 0 3]]
['A', 'B', 'C', 'D', 'E']
[[0 1 0 1 1 1 0]
 [1 0 0 1 0 1 1]
 [0 0 0 0 1 0 1]
 [1 1 0 0 1 0 0]
 [0 1 1 1 0 1 0]]
['A', 'B', 'C', 'D', 'E']
[[0.         0.33333333 0.2        0.4        0.6       ]
 [0.33333333 0.         0.2        0.16666667 0.33333333]
 [0.2        0.2        0.         0.25       0.        ]
 [0.4        0.16666667 0.25       0.         0.16666667]
 [0.6        0.33333333 0.         0.16666667 0.        ]]
[('A', 'E')]
