In [None]:
# Dependencias

import os
import io
import re
import json
import time
import numpy
import unidecode
from glob import glob
from operator import itemgetter
from functools import lru_cache

In [None]:
# Funciones generales

def get_config(key):
    jsonfile = open('config.json').read()
    config = json.loads(jsonfile)
    if key in config:
        return config[key]

def read_json(filepath):
    jsonfile = io.open(filepath, encoding='utf8').read()
    return json.loads(jsonfile)

def save_json(filepath, content):
    path = os.path.dirname(filepath)
    if path: os.makedirs(path, exist_ok=True)
    jsonfile = io.open(filepath, 'w', encoding='utf8')
    jsoncontent = json.dumps(content, ensure_ascii=False)
    jsonfile.write(jsoncontent)
    jsonfile.close()

In [None]:
# Funciones para recuperar los intereses obtenidos

def list_users_obtained():
    users = []
    files = glob("data\\interests\\*")
    for file in files:
        user = re.search('.*\\\\\d*@(.*)\.json', file).groups()[0]
        users.append(user)
    return users

def path_interests(user_nick):
    folders = glob("data\\interests\\*@%s.json" % user_nick)
    if (len(folders) == 0):
        return None
    else:
        return folders[0]

def load_interests(user):
    filepath = path_interests(user)
    interests = read_json(filepath)
    return interests

In [None]:
# Funciones para trabajar con las relaciones

@lru_cache(maxsize=None)
def load_relations():
    ficheroentrada = open('data\\relations\\wikidata_relevant.json', 'r').read()
    relations = json.loads(ficheroentrada)
    return relations

def indirect_relation1(interest1, interest2):
    all_relations = load_relations()
    if (interest1 in all_relations):
        relations = all_relations[interest1]
        if (interest2 in relations):
            return True
    if (interest2 in all_relations):
        relations = all_relations[interest2]
        if (interest1 in relations):
            return True
    return False

def indirect_relation2(interest1, interest2):
    all_relations = load_relations()
    if (interest1 in all_relations) and (interest2 in all_relations):
        relations1 = all_relations[interest1]
        relations2 = all_relations[interest2]
        coincidences = set(relations1).intersection(relations2)
        if (len(coincidences) > 0):
            return True
    return False

In [None]:
# Funciones para trabajar con los intereses

def summatory_property(interests, key):
    summatory = 0
    for interest in interests:
        summatory += interest[key]
    return summatory

#@lru_cache(maxsize=1000000) # Como crece indefinidamente hay que fijar un limite de llamadas, pero depende del hardware...
def interests_proximity(interest1, interest2):
    if (interest1 == interest2):
        return 1.00
    elif indirect_relation1(interest1, interest2):
        return 0.75
    elif indirect_relation2(interest1, interest2):
        return 0.50
    else:
        return 0.00

In [None]:
# Funciones para trabajar con las afinidades obtenidos de las relaciones

def path_affinities(user):
    return path_interests(user).replace("interests", "affinities")

def save_affinities(affinities, user):
    affinities = sorted(affinities, key=itemgetter('weight'), reverse = True)
    filepath = path_affinities(user)
    save_json(filepath, affinities)

def load_affinities(user):
    filepath = path_affinities(user)
    affinities = read_json(filepath)
    return affinities

In [None]:
# Funciones de alto nivel para procesar las cuentas de los usuarios

def calculate_affinity(user_of_interest, user_to_compare):
    interests_user1 = load_interests(user_of_interest)
    interests_user2 = load_interests(user_to_compare)
    for interest_user1 in interests_user1:
        proximities = []
        for interest_user2 in interests_user2:
            min_weight = min(interest_user1['weight'], interest_user2['weight'])
            proximities.append(min_weight * interests_proximity(interest_user2['entity'], interest_user1['entity']))
        interest_user1['proximity'] = max(proximities)
    interests_user1 = sorted(interests_user1, key=itemgetter('proximity'), reverse = True)
    affinity = summatory_property(interests_user1, 'proximity')
    self_affinity = summatory_property(interests_user1, 'weight')
    relative_affinity = affinity / self_affinity
    return relative_affinity, interests_user1

def cross_all_users():
    users = list_users_obtained()
    matrix = numpy.zeros((len(users), len(users)))
    for index1, user1 in enumerate(users):
        affinities = []
        for index2, user2 in enumerate(users):
            affinity = calculate_affinity(user1, user2)[0]
            print('@'+user1, '-->', '@'+user2, affinity)
            matrix[index1][index2] = round(affinity, 2)
            affinities.append({'user': user2, 'weight': affinity})
        save_affinities(affinities, user1)
    return matrix

In [None]:
# Probamos a calcular la afinidad entre dos usuarios cualesquiera

t0 = time.time()
affinity, interests = calculate_affinity("BarackObama", "realDonaldTrump")
print(time.time()-t0, 'seconds') # La primera vez cuesta mas porque carga el diccionario de relaciones en memoria

print(affinity)
interests

In [None]:
t0 = time.time()

print(cross_all_users()) # Probamos a calcular la afinidad entre todos los usuarios
# Se debería obtener una matriz con unos en la diagonal principal porque la afinidad con uno mismo es máxima
# Si la matriz tiende a ser simétrica respecto a la diagonal principal quiere decir que la afinidad es correspondida

print(time.time()-t0, 'seconds')