In [None]:
# Dependencias

import os
import io
import re
import json
import time
import numpy
import unidecode
from glob import glob
from operator import itemgetter
from functools import lru_cache
from py2neo import Graph, Node, Relationship

In [None]:
# Funciones generales

def get_config(key):
    jsonfile = open('config.json').read()
    config = json.loads(jsonfile)
    if key in config:
        return config[key]

def read_json(filepath):
    jsonfile = io.open(filepath, encoding='utf8').read()
    return json.loads(jsonfile)

def save_json(filepath, content):
    path = os.path.dirname(filepath)
    if path: os.makedirs(path, exist_ok=True)
    jsonfile = io.open(filepath, 'w', encoding='utf8')
    jsoncontent = json.dumps(content, ensure_ascii=False)
    jsonfile.write(jsoncontent)
    jsonfile.close()

In [None]:
# Funciones para trabajar con los usuarios

def list_users_obtained():
    users = []
    files = glob("data\\accounts\\*")
    for file in files:
        user = re.search('.*\\\\\d*@(.*)\.json', file).groups()[0]
        users.append(user)
    return users

def path_users(user_nick):
    folders = glob("data\\accounts\\*@%s.json" % user_nick)
    if (len(folders) == 0):
        return None
    else:
        return folders[0]

def load_user_info(user):
    filepath = path_users(user)
    info = read_json(filepath)
    return info

In [None]:
# Funciones para trabajar con los intereses

def path_interests(user_nick):
    folders = glob("data\\interests\\*@%s.json" % user_nick)
    if (len(folders) == 0):
        return None
    else:
        return folders[0]

def load_interests(user):
    filepath = path_interests(user)
    interests = read_json(filepath)
    return interests

def parse_interests(interests):
    parsed = []
    for interest in interests:
        weight = round(interest['weight'], 2)
        if (weight >= 0.50):
            tag = 'INTEREST_3'
        elif (weight >= 0.20):
            tag = 'INTEREST_2'
        elif (weight >= 0.10):
            tag = 'INTEREST_1'
        else:
            tag = 'INTEREST_0'
        if tag:
            parsed.append({'entity': interest['entity'], 'count': interest['count'], 'weight': weight, 'tag': tag})
    return parsed

def get_interests(user):
    output = []
    interests = parse_interests(load_interests(user))
    for interest in interests:
        if (interest['count'] > 2):
            output.append(interest)
    return output

In [None]:
# Funciones para trabajar con las afinidades

def path_affinities(user):
    folders = glob("data\\affinities\\*@%s.json" % user)
    if (len(folders) == 0):
        return None
    else:
        return folders[0]

def load_affinities(user):
    filepath = path_affinities(user)
    affinities = read_json(filepath)
    return affinities

def parse_affinities(affinities):
    parsed = []
    for affinity in affinities:
        weight = round(affinity['weight'], 2)
        if (weight >= 0.30):
            tag = 'AFFINITY_3'
        elif (weight >= 0.20):
            tag = 'AFFINITY_2'
        elif (weight >= 0.10):
            tag = 'AFFINITY_1'
        else:
            tag = None
        if tag:
            parsed.append({'user': affinity['user'], 'weight': weight, 'tag': tag})
    return parsed

def get_affinities(user):
    output = []
    affinities = parse_affinities(load_affinities(user))
    for affinity in affinities:
        if (affinity['user'] != user):
            output.append(affinity)
    return output

In [None]:
# Funciones para trabajar con las entidades y sus relaciones

@lru_cache(maxsize=None)
def load_relations():
    ficheroentrada = open('data\\relations\\wikidata_relevant.json', 'r').read()
    relations = json.loads(ficheroentrada)
    return relations

def get_relations_entities():
    entities = []
    relations = load_relations()
    for relation in relations:
        entities.append(relation)
        entities.extend(relations[relation])
    return list(set(entities))

def get_interests_entities():
    entities = []
    for user in list_users_obtained():
        entities.extend([interest['entity'] for interest in get_interests(user)])
    return list(set(entities))

def get_all_entities():
    entities = get_relations_entities()
    entities.extend(get_interests_entities())
    entities = list(set(entities))
    return entities

def indirect_entities(entity1, entity2):
    entities = []
    relations = load_relations()
    if (entity1 in relations) and (entity2 in relations):
        relations1 = relations[entity1]
        relations2 = relations[entity2]
        entities = set(relations1).intersection(relations2)
    return list(entities)

def get_indirect_relations():
    relations = {}
    entities = get_interests_entities()
    for entity1 in entities:
        for entity2 in entities:
            if (entity1 != entity2):
                coincidences = indirect_entities(entity1, entity2)
                if (len(coincidences) > 0):
                    if (entity1 in relations):
                        current_relations = relations[entity1]
                        current_relations.extend(coincidences)
                        relations[entity1] = list(set(current_relations))
                    else:
                        relations[entity1] = coincidences
                    if (entity2 in relations):
                        current_relations = relations[entity2]
                        current_relations.extend(coincidences)
                        relations[entity2] = list(set(current_relations))
                    else:
                        relations[entity2] = coincidences
    return relations

In [None]:
# Funciones para definir los elementos de neo4j

@lru_cache(maxsize=None)
def define_user(user):
    user_info = load_user_info(user)
    node = Node("User", nick=user)
    node['name'] = user_info['name']
    node['created'] = user_info['created']
    node['language'] = user_info['language']
    node['verified'] = user_info['verified']
    node['followers'] = user_info['followers']
    node['following'] = user_info['following']
    node['tweets'] = user_info['tweets']
    node['favourites'] = user_info['favourites']
    return node

@lru_cache(maxsize=None)
def define_entity(entity):
    return Node("Entity", label=entity)

def define_affinity(user, affinity):
    user1 = define_user(user)
    user2 = define_user(affinity['user'])
    return Relationship(user1, affinity['tag'], user2, weight=affinity['weight'])

def define_interest(user, interest):
    user = define_user(user)
    entity = define_entity(interest['entity'])
    return Relationship(user, interest['tag'], entity, weight=interest['weight'], count=interest['count'])

def define_relation(entity1, entity2):
    entity1 = define_entity(entity1)
    entity2 = define_entity(entity2)
    return Relationship(entity1, 'RELATION', entity2)

In [None]:
# Funciones para interactuar con el servidor de neo4j

def neo4j_server():
    config = get_config("neo4j")
    g = Graph(host=config['host'], port=config['port'], user=config['user'], password=config['password'])
    return g

def delete_all():
    server = neo4j_server()
    server.run("MATCH ()-[r:AFFINITY_1|AFFINITY_2|AFFINITY_3]->() DELETE r")
    server.run("MATCH ()-[r:INTEREST_0|INTEREST_1|INTEREST_2|INTEREST_3]->() DELETE r")
    server.run("MATCH ()-[r:RELATION]->() DELETE r")
    server.run("MATCH (n:User) DELETE n")
    server.run("MATCH (n:Entity) DELETE n")

def insert_data():
    server = neo4j_server()
    for user in list_users_obtained():
        for affinity in get_affinities(user):
            server.merge(define_affinity(user, affinity))
        for interest in get_interests(user):
            server.merge(define_interest(user, interest))
    for entity, relations in get_indirect_relations().items():
        for relation in relations:
            server.merge(define_relation(entity, relation))
        

In [None]:
t0 = time.time()

delete_all() # Borramos todo antes de insertar para que se actualicen los nuevos valores
insert_data() # Insertamos los usuarios, sus afinidades e intereses, asi como las entidades que los relacionan

print(time.time()-t0, 'seconds')