In [1]:
import json
import sqlite3


In [4]:
data_cache_prefix = 'data_cache/'

database_file = data_cache_prefix+"author.db"

In [5]:
def create_author_table(db_file):
    sql = """
        CREATE TABLE IF NOT EXISTS authors (
            id integer PRIMARY KEY,
            n text NOT NULL,
            a text,
            t text
        );
        """
    with sqlite3.connect(db_file) as conn:
        conn.cursor().execute(sql)


def create_paper_table(db_file):
    sql = """
        CREATE TABLE IF NOT EXISTS papers (
            id integer PRIMARY KEY,
            title text NOT NULL,
            tag text
        );
        """
    with sqlite3.connect(db_file) as conn:
        conn.cursor().execute(sql)

def create_author_papers_table(db_file):
    sql = """
        CREATE TABLE IF NOT EXISTS author_papers (
            id integer PRIMARY KEY,
            author_id text,
            paper_id text,
            FOREIGN KEY(author_id) REFERENCES authors(id),
            FOREIGN KEY(paper_id) REFERENCES papers(id)
        );
        """
    with sqlite3.connect(db_file) as conn:
        conn.cursor().execute(sql)

def create_coauths_table(db_file):
    sql = """
        CREATE TABLE IF NOT EXISTS coauths (
            id integer PRIMARY KEY,
            author1_id text,
            author2_id text,
            count integer,
            FOREIGN KEY(author1_id) REFERENCES authors(id),
            FOREIGN KEY(author2_id) REFERENCES authors(id)
        );
        """
    with sqlite3.connect(db_file) as conn:
        conn.cursor().execute(sql)

create_author_table(database_file)
create_paper_table(database_file)
create_author_papers_table(database_file)
create_coauths_table(database_file)

In [6]:
def insert_author(conn, author):
    sql = """
    INSERT INTO authors (id,n,a,t) VALUES (?,?,?,?)
    """
    cur = conn.cursor()
    cur.execute(sql, author)
    return cur.lastrowid

def insert_paper(conn, paper):
    sql = """
    INSERT INTO papers (id,title,tag) VALUES (?,?,?)
    """
    cur = conn.cursor()
    cur.execute(sql, paper)
    return cur.lastrowid

def insert_author_paper(conn, author_paper):
    sql = """
    INSERT INTO author_papers (author_id,paper_id) VALUES (?,?)
    """
    cur = conn.cursor()
    cur.execute(sql, author_paper)
    return cur.lastrowid


def insert_coauth(conn, coauth):
    sql = """
    INSERT INTO coauths (author1_id,author2_id,count) VALUES (?,?,?)
    """
    cur = conn.cursor()
    cur.execute(sql, coauth)
    return cur.lastrowid

In [7]:
def concat_author(conn, id, a, t):
    sql = """
    SELECT a,t FROM authors WHERE id=?
    """
    cur = conn.cursor()
    a_,t_ = cur.execute(sql, [id]).fetchone()
    if a is not None:
        if a_ is not None:
            a_ = set(a_)
            a_.add(a)
        else:
            a_ = set(a)
    t_ = set(t_)
    t_.update(t)
    sql = """
    UPDATE authors SET
    a=?,
    t=?
    WHERE id=?
    """
    print(a_, t_)
    cur.execute(sql, [a_,t_,id])
    return cur.lastrowid

with sqlite3.connect(database_file) as conn:
    concat_author(conn, 285492075,'123', ['285492075'])

{'3', '1', '2'} {'A', 'V', 'h', 'l', 'm', 'r', 'x', 'C', ']', 'o', 'e', 'c', 'y', 'p', 'f', '[', ')', 'g', 'b', 'v', 'P', '285492075', 'u', ',', 'a', 't', 'n', '(', 'i', '"', ' ', 's'}


InterfaceError: Error binding parameter 0 - probably unsupported type.

In [6]:
authors = {}
papers = {}
coauths = {}
author_papers = []

steps = 0
with sqlite3.connect(database_file) as conn:
    with open('data/mathPaper.txt') as file:
        for line in file:
            if steps == 10000:
                break
            if steps % 10000 == 0:
                print(steps)
            steps += 1
            paper = json.loads(line)
            insert_paper(conn, (paper['id'], paper['title'], json.dumps(paper['tag'])))
            paper['authors'].sort(key=lambda it: it['id'])
            for k, author in enumerate(paper['authors']):
                if not (author['id'] in coauths):
                    insert_author(conn, (author['id'], author['name'], (json.dumps([author['org']]) if 'org' in author else None), json.dumps([list(paper['tag'])])))
                    coauths[author['id']] = {}
                else:
                    concat_author(conn, author['id'], (author['org'] if 'org' in author else None), list(paper['tag']))
                insert_author_paper(conn, (author['id'], paper['id']))
                for co in paper['authors'][k+1:]:
                    if co['id'] in coauths[author['id']]:
                        coauths[author['id']][co['id']] += 1
                    else:
                        coauths[author['id']][co['id']] = 1

0


In [7]:
import networkx as nx
from lib.cache import *

In [8]:
G = nx.Graph()
for author_id1 in coauths:
    for author_id2 in coauths[author_id1]:
        G.add_edge(author_id1, author_id2, weight=coauths[author_id1][author_id2])

save_graph(G, 'whole_graph')

In [9]:
# if G.number_of_nodes() < 100:
# nx.draw(G)

In [10]:
# G.number_of_nodes()

In [11]:
# from pyvis.network import Network
#
# g = Network(height=800, width=800, notebook=True)
# g.toggle_hide_edges_on_drag(True)
# g.barnes_hut()
# g.from_nx(G)
# g.show('tmp.html')