# Creating graphs

From a csv file with at least three columns, two with words and one with a frequency. 

In [1]:
import sqlite3
import pandas as pd
import networkx as nx
import dhlab.graph_networkx_louvain as gnl

# Functions 

In [2]:
def query(db, sql, param=()):
    """ Query a sqlitedatabase with sql and param"""
    with sqlite3.connect(db) as con:
        cur = con.cursor()
        cur.execute(sql, param)
    return cur.fetchall()

In [3]:
def check_graph_first(db, x, top = 20):
    return query(db, "select * from word_graph where first = ? order by pmi desc limit ?", (x, top))
def check_graph_second(db, x, top = 20):
    return query(db, "select * from word_graph where second = ? order by pmi desc limit ?", (x, top))

# Querying word graphs

The table of word pairs is computed using the ngram-table and named word_graph. Indexed and ready to use.

The columns of the word_graph table are first, second, freq, pmi where freq contains the original frequencies, and pmi is the computed association value, using the following formula:

$$\frac{\textrm{freq}(x,y)^2}{\textrm{freq}(x)*\textrm{freq}(y)}$$
 
The frequency is squared to give the cooccurence frequency a higher weight. The marginals freq(x) are aggregated from the word positions. For each word  𝑤  it is associated with two frequencies, one frequency from the occurence in the first column, and one from the second column.



In [7]:
word_graphs = "c://Users/yoons/Documents/word_graph_database/book_news_word_pairs.db"

In [35]:
def expand(db, x, top = 30):
    a = check_graph_first(db, x, top)
    b = check_graph_second(db, x, top)
    words = {x[1] for x in a} | {x[0] for x in b} - {x}
    G = a + b
    for word in words:
        G += check_graph_first(db, word, top) + check_graph_second(db, word, top)
    return G

def expandall(db, xs, top = 30):
    return  list({edge for x in xs for edge in expand(db, x, top)})
            
            


In [59]:
w = ['sorg']
edges = [tuple(x[:3]) for x in expandall(word_graphs, w, top = 20)]
g = nx.DiGraph()
g.add_weighted_edges_from(edges)
g = g.subgraph([n for n,d in g.out_degree() if d > 0])

gnl.show_graph(g, spread = .8)

In [60]:
gnl.show_communities(g)

savn-sakn :  savn, sakn, nød, sut, sorgreaksjoner, Sorg, slit, naud, elendighet, armod, tomrom, harm, trengsel, længsel, fattigdom, sult, svolt, tomhet, einsemd, krise, fortviling, hunger, tap, sorger, lengsler, kjærleik, lideiser, lidelser, otte, strev, krise-, forsakelse, kriser, Tapsopplevelser, krig, gleda, sorg-, traumer, våde, Kriser, togn, sedløyse, lidingar, svevnløyse, Sult, sæla, frygd, misvækst, barn, Hyst, taps-, gjerdene, Syt, Myst, lyst, forsaking, arbeidsløshet, Lengsel, misvekst, Tap, stiene, Krise-, nod, tap-, Personbeundring, Krise

sorg-skuffelse :  sorg, skuffelse, fortvilelse, smerte, tristhet, sinne, vemod, sjokk, Glede, utopi, sorgarbeid, Sjokk, angst, raseri, redsel, irritasjon, bitterhet, frustrasjon, frykt, lengsel, vantro, ensomhet, oppgitthet, ergrelse, melankoli, håp, undring, mismot, vantru, håpløshet, tungsinn, virkelighet, nedtrykthet, blodtap, nedstemthet, übehag, avmakt, gråhet, desperasjon, ærgrelse, hevelse, besøkstjeneste, misnøye, vanntro, Fryd, ny

In [61]:
gnl.show_cliques(g)

(3, 1) sykdom, ergrelse, angst, forstemthet, forundring, fornedrelse, forbauselse, opsikt, engstelse, sut, sorger, takksemd, bestyrtelse, irritasjon, vemod, kvide, forargelse, oppgitthet, tristhet, sult, raseri, lengsel, frustrasjon, forferdelse, slit, Sorg, bedrøvelse, bitterhet, gremmelse, håpløshet, forbitrelse, motgang, sinne, synd, redsel, længsel, naud, ulykke, bekymring, nød, indignasjon, frykt, død, undring, melankoli, sakn, overraskelse, skam, sorg, smerte, mismot, jammer, saknad, lettelse, vantro, glede, harme, gagn, oppsikt, gavn, humor, ærgrelse, beklagelse, avsky, sjokk, lidelse, fortvilelse, tomhet, Glede, skuffelse, argskap, forbløffelse, harm, savn, elendighet, ensomhet, Sjokk, armod, tungsinn, takknemlighet

(3, 2) krise-, sorgreaksjoner, krise, sorg, Sorg, død, kriser, ,, sorgarbeid

(4, 1) sykdom, ulykke, angst, bekymring, forundring, forbauselse, nød, indignasjon, frykt, undring, forargelse, sakn, oppgitthet, vemod, overraskelse, sorg, smerte, tristhet, mismot, rase