# VK Project
### Network Science, Amantur Amatov


### Get Friends from VK

In [None]:
!pip install vk_api



In [None]:
import vk_api

Accessing VK API

In [None]:
phone_number = ''
password = ''
vk_session = vk_api.VkApi(phone_number, password)
vk_session.auth()

vk = vk_session.get_api()

I need to filter my friend list from deleted/banned users and get their attributes.

In [None]:
def filter_friends(friend_list, important_fields):

  '''
  Deletes all inactive users and get only active one's ID's, first and last names.
  
  Args:
    friend_list(List of dicts): The input list of friends information to be filtered
    important_fields(List of str): The input list of important information fields of friends
                                  to be stayed in output list
  Returns:
    filtered_friend_list(List of dicts): The filtered output list of friends with prefered fields

  '''

  filtered_friend_list = []

  for friend in friend_list:
    if 'deactivated' in friend.keys():
      continue
    filtered_friend = {k:v for k,v in friend.items() if k in important_fields}
    filtered_friend_list.append(filtered_friend)
    
  return filtered_friend_list

In [None]:
def get_users_friend_list(user_id=None, 
                          fields_to_download=None, 
                          fields_to_filter=['id', 'first_name', 'last_name']):
  '''

  '''
  raw_friend_list = vk.friends.get(user_id=user_id, fields=fields_to_download)
  filtered_friend_list = filter_friends(raw_friend_list['items'], fields_to_filter) 

  return (raw_friend_list, filtered_friend_list)

In [None]:
raw_friend_list, filtered_friend_list = get_users_friend_list(fields_to_download=['sex', 'country','education','city'],
                                                              fields_to_filter=['id', 'first_name', 'last_name',
                                                                                'country', 'city', 'sex', 'university_name'])
print('There are {} friends in my VK friend list, which are not deleted/banned, and {} that are deleted/banned.'\
      .format(len(filtered_friend_list),
              raw_friend_list['count']-len(filtered_friend_list)))

There are 256 friends in my VK friend list, which are not deleted/banned, and 9 that are deleted/banned.


Let us now find whether my friends have connections between each other (make some list of edges of future graph).  

In [None]:
def find_friend_friends(friend_list):
  '''
  Returns a list of tuples where first positional argument is info about the friend and 
  the second is his/her friend list.

  '''
  ff_list = [(friend, vk.friends.get(user_id = friend['id'])['items']) for friend in friend_list]
  
  return ff_list
  
def find_friend_connections(friends_friend_list):
  connection_set = {tuple(sorted((friend1['id'], friend2['id']))) for friend1, _ in friends_friend_list for friend2, friend_list in friends_friend_list
                     if friend1['id'] in friend_list}
  return connection_set

In [None]:
# do not run
# friends_friends = find_friend_friends(filter_friends_list)
# friend_edges = find_friend_connections(friends_friends)

In [None]:
from google.colab import files

def graph_edges_output(edges, filename = 'friend_connections.txt'):
  with open(filename, 'w') as f:
    f.write('\n'.join('%s %s' % x for x in edges))
  files.download(filename)
  print('Done!')

In [None]:
# graph_edges_output(friend_edges)

In [None]:
def graph_edges_input(filename='friend_connections.txt'):
  with open(filename) as f:
    content = f.readlines()
  # you may also want to remove whitespace characters like `\n` at the end of each line
  # content = [x.strip() for x in content] 
  content = [tuple(int(s) for s in i.split(' ')) for i in content]
  return content

In [None]:
friend_edges = graph_edges_input()
friend_edges[:10]

[(39138844, 336523067),
 (200201791, 212233817),
 (134651972, 139486120),
 (195776078, 267603225),
 (193651936, 342433080),
 (200201791, 236859310),
 (49205592, 303401274),
 (192862475, 426012154),
 (212827934, 392847871),
 (21894584, 316551627)]

Let us add attribute "Number of mutual friends" to edges.

In [None]:
edge_atrs = {}
for edge in friend_edges:
    if 226163965 in edge:
      edge_atrs[edge] = {'mutuals':0}
      continue
    mutuals = len(vk.friends.getMutual(source_uid = edge[0], target_uid = edge[1]))
    edge_atrs[edge] = {'mutuals':mutuals}

In [None]:
import networkx as nx

G = nx.Graph()
G_edges = friend_edges
G_nodes = [friend['id'] for friend in filtered_friend_list]
G.add_nodes_from(G_nodes)
G.add_edges_from(G_edges)
nx.set_edge_attributes(G, edge_atrs)
G.remove_node(226163965)

In [None]:
def nodes_attributes(G, friend_list_with_atrs):
  atrs_dict = {friend['id']:friend for friend in friend_list_with_atrs}
  nx.set_node_attributes(G, atrs_dict)
  for (n,d) in G.nodes(data=True):
    d['name']=d['first_name']+' '+d['last_name']
    del d['first_name'], d['last_name']
  return  G
# {friend['id']:friend for friend in filtered_friend_list}

In [None]:
G_atr = nodes_attributes(G, filtered_friend_list)
for (n,d) in G_atr.nodes(data=True):
  print(d)
  if len(d)<4:
    break

{'id': 368567, 'sex': 2, 'city': {'id': 2, 'title': 'Санкт-Петербург'}, 'country': {'id': 1, 'title': 'Россия'}, 'name': 'Павел Неделько'}
{'id': 2153814, 'sex': 2, 'city': {'id': 1, 'title': 'Москва'}, 'country': {'id': 1, 'title': 'Россия'}, 'university_name': 'МФТИ (Физтех)', 'name': 'Евгений Молчанов'}
{'id': 2352276, 'sex': 1, 'city': {'id': 99, 'title': 'Новосибирск'}, 'country': {'id': 1, 'title': 'Россия'}, 'university_name': 'НГПУ', 'name': 'Александра Жданович'}
{'id': 4166562, 'sex': 1, 'city': {'id': 14620, 'title': 'Lüneburg'}, 'country': {'id': 65, 'title': 'Германия'}, 'university_name': 'РГПУ им. А. И. Герцена', 'name': 'Ольга Кузнецова'}
{'id': 6132074, 'sex': 1, 'country': {'id': 1, 'title': 'Россия'}, 'university_name': 'СПбПУ Петра Великого (Политех)', 'name': 'Александра Галкина'}
{'id': 8469731, 'sex': 2, 'city': {'id': 1, 'title': 'Москва'}, 'country': {'id': 1, 'title': 'Россия'}, 'university_name': 'МГТУ им. Н. Э. Баумана', 'name': 'Александр Хижик'}
{'id': 849

Let us add popularity feature to our nodes. It means how many friends they have.

In [None]:
def popularity(friend_list):
  return {friend['id']:{'nfriends':vk.friends.get(user_id=friend['id'])['count']} for friend in friend_list}
popularity_atr = popularity(filtered_friend_list)

In [None]:
  nx.set_node_attributes(G_atr, popularity_atr)
  G_atr.nodes()[281865476]

{'city': {'id': 2, 'title': 'Санкт-Петербург'},
 'country': {'id': 1, 'title': 'Россия'},
 'id': 281865476,
 'name': 'Евгений Шкалев',
 'nfriends': 66,
 'sex': 2,
 'university_name': 'СПбПУ Петра Великого (Политех)'}

Let us download all prepared data for graph

In [None]:
nx.write_gexf(G_atr, "friend_list.gexf")