### Amigos dos amigos na firma

In [1]:
users = [
    { "id": 0, "name": "Hero" },
    { "id": 1, "name": "Dunn" },
    { "id": 2, "name": "Sue" },
    { "id": 3, "name": "Chi" },
    { "id": 4, "name": "Thor" },
    { "id": 5, "name": "Clive" },
    { "id": 6, "name": "Hicks" },
    { "id": 7, "name": "Devin" },
    { "id": 8, "name": "Kate" },
    { "id": 9, "name": "Klein" }
]

friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
(4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]

In [2]:
# dict com uma lista vazia para cada id de usuário
friendships = {user["id"] : [] for user in users}
# loop pelos pares de amigos para preenchê-la
for i, j in friendship_pairs:
    friendships[i].append(j) # adicione j como amigo do usuário i
    friendships[j].append(i) # adicione i como amigo do usuário j

In [3]:
print(friendships)

{0: [1, 2], 1: [0, 2, 3], 2: [0, 1, 3], 3: [1, 2, 4], 4: [3, 5], 5: [4, 6, 7], 6: [5, 8], 7: [5, 8], 8: [6, 7, 9], 9: [8]}


#### Qual é o número médio de conexões?

1. definir o tamanho total das conexões, somando o tamanho de todas as listas de friends
2. Dividir o total pelo número de usuários

In [4]:
def number_of_friends(user):
    user_id = user["id"]
    friend_ids = friendships[user_id]
    return len(friend_ids)

total_connections = sum(number_of_friends(user) for user in users)

num_users = len(users)
avg_connections = total_connections / num_users

In [5]:
print(num_users)
print(avg_connections)

10
2.4


In [10]:
# Quem tem mais amigos?
num_friends_by_id = [(user["id"], number_of_friends(user)) for user in users]

# Organizando a lista por número de amizades
num_friends_by_id.sort(
    key = lambda id_and_friends: id_and_friends[1],
    reverse = True
)
print(num_friends_by_id) # cada par é um (user_id, num_friends)

[(1, 3), (2, 3), (3, 3), (5, 3), (8, 3), (0, 2), (4, 2), (6, 2), (7, 2), (9, 1)]


### Talvez você conheça...
Pessoas que você talvez conheça na firma, amigos dos amigos

In [15]:
def foaf_ids_bad(user):
    return [foaf_id
            for friend_id in friendships[user["id"]]
            for foaf_id in friendships[friend_id]]

In [17]:
friendships[0]

[1, 2]

In [18]:
foaf_ids_bad(users[0])

[0, 2, 3, 0, 1, 3]

In [20]:
# Contagem de amigos em comum

from collections import Counter

def friends_of_friends(user):
    user_id = user["id"]
    return Counter(
       foaf_id
       for friend_id in friendships[user_id] # para cada amigo meu
       for foaf_id in friendships[friend_id] # encontre os amigos dele
       if foaf_id != user_id                   # que não sejam eu
       and foaf_id not in friendships[user_id] # e não sejam meus amigos
    )

print(friends_of_friends(users[3]))

Counter({0: 2, 5: 1})


Chi possui 2 amigos em comum com Hero (_id 0_) e apenas 1 com Clive (_id 5_)

### Vamos reunir por interesses em comum

In [21]:
interests = [
(0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
(0, "Spark"), (0, "Storm"), (0, "Cassandra"),
(1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
(1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
(2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
(3, "statistics"), (3, "regression"), (3, "probability"),
(4, "machine learning"), (4, "regression"), (4, "decision trees"),
(4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
(5, "Haskell"), (5, "programming languages"), (6, "statistics"),
(6, "probability"), (6, "mathematics"), (6, "theory"),
(7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
(7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
(8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
(9, "Java"), (9, "MapReduce"), (9, "Big Data")
]

In [23]:
def ds_who_like(target_interest):
    return [user_id
            for user_id, user_interest in interests
            if user_interest == target_interest]

# isso funciona bem para uma lista pequena

In [25]:
ds_who_like("numpy")

[2]

In [26]:
ds_who_like("regression")

[3, 4]

In [27]:
ds_who_like("Big Data")

[0, 8, 9]

In [32]:
# Para lista maiores, com mais usuários ou interesses, melhor construir index de interesses para os usuários

from collections import defaultdict

# keys are interests, values are lists of user_ids with that interest
user_ids_by_interest = defaultdict(list)

for user_id, interest in interests:
    user_ids_by_interest[interest].append(user_id)

# Agora de usuário para interesse
# Keys are user_ids, values are list of interests
interests_by_user_id = defaultdict(list)

for user_id, interest in interests:
    interests_by_user_id[user_id].append(interest)

def most_common_interests_with(user):
    return Counter(
        interested_user_id
        for interest in interests_by_user_id[user["id"]]
        for interested_user_id in user_ids_by_interest[interest]
        if interested_user_id != user["id"]
        )

In [34]:
most_common_interests_with(users[0])

Counter({9: 3, 8: 1, 1: 2, 5: 1})