# Assignment #4: NumPy + Tuple, Set, Dict




### In this assignment, we will practice on NumPy, Tuple, Set, Dict using social network use case.

**Please read the explanation in [here](https://drive.google.com/file/d/13h-wDYixvxn2er8zO13z4_hNgcT6S7nt/view?usp=share_link).**

# **Important**
- Do not delete or modify the first line in the given code cell.
- You work must be add in the provided area only.
- You must not change the declaration of the provided functions.
- You are allowed to add your own functions.
- If you want to write your own program to test, add new code cell at the very end of the file and add your code there.


In [None]:
import numpy as np

In [None]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Function 1

def generate_followed_by_dict(infile):
    # write your code here
    followed_by = {}
    f = open(infile)
    for line in f:
        x = line.strip().split(',')
        followed_by[x[0]] = x[1:]
    f.close()

    return followed_by

In [None]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Function 2

def generate_followed_by_matrix(followed_by_dict):
    # write your code here
    names = list(followed_by_dict.keys())
    for e in followed_by_dict.values(): names += e
    names = sorted(list(set(names)))

    m = np.zeros([len(names), len(names)], int)
    for person, follower in followed_by_dict.items():
        for ppl in follower:
            m[names.index(person), names.index(ppl)] = 1

    return m, names

In [None]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Function 3

def generate_degree_matrix(A):
    # write your code here
    m = np.identity(A.shape[0], int) * np.sum(A, axis = 1)
    return m

In [None]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Function 4

def get_top_influencer(M, person_names):
    # write your code here
    s = np.sum(M, axis = 1)
    most = np.max(s)
    names = np.array(person_names)
    return list(names[s == most])

In [None]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Function 5

def tanimoto_coefficient(a, b):
    sa, sb = np.sum(a), np.sum(b)
    A, B = a == 1, b == 1
    comAB = (A & B) * 1
    c = np.sum(comAB)
    return round(c / (sa + sb - c), 2)

def generate_similarity_matrix_among_influencers(M, person_names):
    # write your code here
    s = np.sum(M, axis = 1)
    r0 = M[s > 0]
    out = np.zeros((r0.shape[0], r0.shape[0]), float)
    for i in range(r0.shape[0]):
        for j in range(r0.shape[0]):
            if i != j:
                out[i, j] = tanimoto_coefficient(r0[i], r0[j])

    names = np.array(person_names)

    return out, names[s > 0]

In [None]:
# DO NOTE DELETE THIS CELL / WRITE YOUR CODE HERE
# Function 6

def  get_all_pairs_of_most_similar_influencers(S, only_influencers):
    # write your code here
    m = np.max(S)
    if m == 0: return []
    out = []
    for i in range(len(only_influencers)):
        for j in range(len(only_influencers)):
            if S[i, j] == m:
                if (i, j) not in out and (j, i) not in out:
                    out.append((i, j))

    return [(only_influencers[ii], only_influencers[jj]) for ii, jj in out]

### --- Testcase 1 --- ###
#### You can get infile_1.txt from [here](https://drive.google.com/file/d/1-JO6bfUHy4G_Y1ouAYmap5SgBgZgds87/view?usp=share_link) ####

In [None]:
# Testcase 1
# link to get infile_1.txt
followed_by_dict = generate_followed_by_dict("infile_1.txt")
print(followed_by_dict)
M, person_names = generate_followed_by_matrix(followed_by_dict)
print("M = ", M)
print("person_names", person_names)
D = generate_degree_matrix(M)
print("D = ", D)
print("top influencers = ", get_top_influencer(M, person_names))
S, all_influencers = generate_similarity_matrix_among_influencers(M, person_names)
print("similarity_matrix = ", S)
print(all_influencers)

pairs = get_all_pairs_of_most_similar_influencers(S, all_influencers)
print(pairs)

#### Expected output of Testcase 1 ####

{'A': ['E'], 'B': ['A', 'D'], 'C': ['E', 'B'], 'D': ['A', 'E']}
M =  [[0 0 0 0 1]
 [1 0 0 1 0]
 [0 1 0 0 1]
 [1 0 0 0 1]
 [0 0 0 0 0]]
person_names ['A', 'B', 'C', 'D', 'E']
D =  [[1 0 0 0 0]
 [0 2 0 0 0]
 [0 0 2 0 0]
 [0 0 0 2 0]
 [0 0 0 0 0]]
top influencers =  ['B', 'C', 'D']
similarity_matrix =  [[0.   0.   0.5  0.5 ]
 [0.   0.   0.   0.33]
 [0.5  0.   0.   0.33]
 [0.5  0.33 0.33 0.  ]]
['A' 'B' 'C' 'D']
[('A', 'C'), ('A', 'D')]

### --- Testcase 2 --- ###
#### You can get infile_2.txt from [here](https://drive.google.com/file/d/1CPhOVlGJTbGTjmXYBSZ7ksnrAbt7YGyP/view?usp=share_link) ####

In [None]:
# Testcase 2
followed_by_dict = generate_followed_by_dict("infile_2.txt")
print(followed_by_dict)
M, person_names = generate_followed_by_matrix(followed_by_dict)
print("M = ", M)
print("person_names", person_names)
D = generate_degree_matrix(M)
print("D = ", D)
print("top influencers = ", get_top_influencer(M, person_names))
SM, all_influencers = generate_similarity_matrix_among_influencers(M, person_names)
print("similarity_matrix = ", SM)
print(all_influencers)

pairs = get_all_pairs_of_most_similar_influencers(SM, all_influencers)
print(pairs)

#### Expected output of Testcase 2 ####

{'A': ['B', 'C', 'D', 'E'], 'Q': ['F', 'H', 'I']}
M =  [[0 1 1 1 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 1 1 0]]
person_names ['A', 'B', 'C', 'D', 'E', 'F', 'H', 'I', 'Q']
D =  [[4 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 3]]
top influencers =  ['A']
similarity_matrix =  [[0. 0.]
 [0. 0.]]
['A' 'Q']
[]

### --- Testcase 3 --- ###
#### You can get infile_3.txt from [here](https://drive.google.com/file/d/1Xwe48zFV43s7nAkmtEqVWdXoAlTswLkI/view?usp=share_link) ####

In [None]:
# Testcase 3
followed_by_dict = generate_followed_by_dict("infile_3.txt")
print(followed_by_dict)
M, person_names = generate_followed_by_matrix(followed_by_dict)
print("M = ", M)
print("person_names", person_names)
D = generate_degree_matrix(M)
print("D = ", D)
print("top influencers = ", get_top_influencer(M, person_names))
SM, all_influencers = generate_similarity_matrix_among_influencers(M, person_names)
print("similarity_matrix = ", SM)
print(all_influencers)

pairs = get_all_pairs_of_most_similar_influencers(SM, all_influencers)
print(pairs)

#### Expected output of Testcase 3 ####

{'A': ['B', 'C', 'D', 'E', 'Q'], 'Q': ['F', 'H', 'I', 'A'], 'R': ['B', 'C', 'D', 'E', 'A']}
M =  [[0 1 1 1 1 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 1 1 1 0 0]
 [1 1 1 1 1 0 0 0 0 0]]
person_names ['A', 'B', 'C', 'D', 'E', 'F', 'H', 'I', 'Q', 'R']
D =  [[5 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 4 0]
 [0 0 0 0 0 0 0 0 0 5]]
top influencers =  ['A', 'R']
similarity_matrix =  [[0.   0.   0.67]
 [0.   0.   0.12]
 [0.67 0.12 0.  ]]
['A' 'Q' 'R']
[('A', 'R')]

### --- Testcase 4 --- ###
#### You can get infile_4.txt from [here](https://drive.google.com/file/d/1OZZO5Muq0iviRSPwEWIfvWm_G3c4ktf4/view?usp=share_link) ####

In [None]:
# Testcase 4
followed_by_dict = generate_followed_by_dict("infile_4.txt")
print(followed_by_dict)
M, person_names = generate_followed_by_matrix(followed_by_dict)
print("M = ", M)
print("person_names", person_names)
D = generate_degree_matrix(M)
print("D = ", D)
print("top influencers = ", get_top_influencer(M, person_names))
SM, all_influencers = generate_similarity_matrix_among_influencers(M, person_names)
print("similarity_matrix = ", SM)
print(all_influencers)

pairs = get_all_pairs_of_most_similar_influencers(SM, all_influencers)
print(pairs)

#### Expected output of Testcase 4 ####

{'A': ['B', 'C', 'D', 'E', 'Q', 'P'], 'B': ['A', 'C', 'D', 'E'], 'C': ['D', 'E'], 'D': ['A', 'B', 'C', 'E'], 'E': ['A', 'B', 'C', 'D']}
M =  [[0 1 1 1 1 1 1]
 [1 0 1 1 1 0 0]
 [0 0 0 1 1 0 0]
 [1 1 1 0 1 0 0]
 [1 1 1 1 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]]
person_names ['A', 'B', 'C', 'D', 'E', 'P', 'Q']
D =  [[6 0 0 0 0 0 0]
 [0 4 0 0 0 0 0]
 [0 0 2 0 0 0 0]
 [0 0 0 4 0 0 0]
 [0 0 0 0 4 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]]
top influencers =  ['A']
similarity_matrix =  [[0.   0.43 0.33 0.43 0.43]
 [0.43 0.   0.5  0.6  0.6 ]
 [0.33 0.5  0.   0.2  0.2 ]
 [0.43 0.6  0.2  0.   0.6 ]
 [0.43 0.6  0.2  0.6  0.  ]]
['A' 'B' 'C' 'D' 'E']
[('B', 'D'), ('B', 'E'), ('D', 'E')]

### --- Testcase 5 --- ###
#### You can get infile_5.txt from [here](https://drive.google.com/file/d/1DsHlLf3s5YEem4jNeQM_XMbefce67kfA/view?usp=share_link) ####

In [None]:
# Testcase 5
followed_by_dict = generate_followed_by_dict("infile_5.txt")
print(followed_by_dict)
M, person_names = generate_followed_by_matrix(followed_by_dict)
print("M = ", M)
print("person_names", person_names)
D = generate_degree_matrix(M)
print("D = ", D)
print("top influencers = ", get_top_influencer(M, person_names))
SM, all_influencers = generate_similarity_matrix_among_influencers(M, person_names)
print("similarity_matrix = ", SM)
print(all_influencers)

pairs = get_all_pairs_of_most_similar_influencers(SM, all_influencers)
print(pairs)

#### Expected output of Testcase 5 ####

{'A': ['Q'], 'Q': ['A']}
M =  [[0 1]
 [1 0]]
person_names ['A', 'Q']
D =  [[1 0]
 [0 1]]
top influencers =  ['A', 'Q']
similarity_matrix =  [[0. 0.]
 [0. 0.]]
['A' 'Q']
[]