#### Load data then sort it by source and target subreddits

In [None]:
import pandas as pd
data = pd.read_csv("title_tab_separated.csv")
data.head()

data_sorted = data.sort_values(by=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'], inplace=False, ascending=False)
data_sorted.head()

#### Make positive and negative sentiment dataframes

In [None]:
neg_matrix = data_sorted['LINK_SENTIMENT'] < 0
pos_matrix = data_sorted['LINK_SENTIMENT'] > 0
data_sorted[neg_matrix]
data_sorted[pos_matrix]

matrix1_neg = data_sorted[neg_matrix].groupby(['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']).LINK_SENTIMENT.sum()
matrix1_pos = data_sorted[pos_matrix].groupby(['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']).LINK_SENTIMENT.sum()

#### the_donald -> target # of link sentiments positive 

In [None]:
print(matrix1_pos['the_donald'].to_string())

#### the_donald -> target # of link sentiments negative 

In [None]:
print(matrix1_neg['the_donald'].to_string())

#### Make dataframe for total sentiment: positive plus sentiment

In [None]:
matrix_total = data_sorted.groupby(['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']).LINK_SENTIMENT.sum()

#### the_donald -> target # of link sentiments total - negative + postive

In [None]:
print(matrix_total['the_donald'].to_string())

#### Save positive and negatives CSVs

In [None]:
matrix1_pos.to_csv('matrix1_pos.csv')
matrix1_neg.to_csv('matrix1_neg.csv')

#### Load neg CSV that was just saved

In [None]:
neg_data = pd.read_csv("matrix1_neg.csv")
neg_data.head()

pos_data = pd.read_csv("matrix1_pos.csv")
pos_data.head()

In [None]:
print(neg_data.columns)
print(pos_data.columns)

In [None]:
neg_data.rename(columns={'1200isjerky':'source',
                          '1200isplenty':'target',
                          '-1':'weight'}, 
                 inplace=True)

pos_data.rename(columns={'07thexpansion':'source',
                          'visualnovels':'target',
                          '1':'weight'}, 
                 inplace=True)

In [None]:
pos_data.to_csv('matrix1_pos.csv')
neg_data.to_csv('matrix1_neg.csv')

#### display graph - jgraph

In [1]:
import jgraph
from py2neo import Graph, Node, Relationship
import sys
graph = Graph(password="password")

In [4]:
data = graph.run("MATCH (s:Subreddit)-[l:LINK]->(t:Subreddit) \
        WHERE l.weight < -40 \
        RETURN s.id, t.id LIMIT 20")
data = [tuple(x) for x in data]

fig = jgraph.draw(data , shader="lambert", default_node_color=0x383294, show_save=True)


In [None]:
data = graph.run("MATCH allShortestPaths((u:Subreddit {id:'mapporn'})-[*]-(me:Subreddit {id:'alpharetta'})) RETURN u.id, me.id")

data = [tuple(x) for x in data]

jgraph.draw(data , shader="lambert", default_node_color=0x383294)

#### Graph - python-igraph

In [5]:
from igraph import Graph as IGraph

query = '''
MATCH (c1:Subreddit)-[r:LINK]->(c2:Subreddit)
RETURN c1.id, c2.id, r.weight AS weight
'''

ig = IGraph.TupleList(graph.run(query), weights=True)

In [9]:
pg = ig.pagerank()
pgvs = []
for p in zip(ig.vs, pg):
    print(p)
    pgvs.append({"id": p[0]["name"], "pg": p[1]})
pgvs

write_clusters_query = '''
UNWIND {nodes} AS n
MATCH (c:Subreddit) WHERE c.id = n.id
SET c.pagerank = n.pg
'''

graph.run(write_clusters_query, nodes=pgvs)

(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 0, {'name': 'botsrights'}), 0.005629379427749086)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1, {'name': '1200isjerky'}), 5.13627052488518e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2, {'name': 'botrights'}), 0.000427717798411102)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3, {'name': 'mylittleandysonic1'}), 0.00043314911202638586)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 4, {'name': '195'}), 7.457310872918094e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5, {'name': 'ooer'}), 0.0002354988060110195)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6, {'name': 'newscape'}), 4.254336239913031e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 7, {'name': '2007scape'}), 0.0005276823135513022)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8, {'name': 'games'}), 0.0010504362623916625)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 9, {'name': 'shitredditsay

(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1496, {'name': 'isrconservativeracist'}), 7.021877900991349e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1497, {'name': 'twominutehate'}), 2.6889403728983885e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1498, {'name': 'bestofconservative'}), 6.394549207598274e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1499, {'name': 'conservativemeta'}), 3.775869557081049e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1500, {'name': 'cuckservatism'}), 2.6889403728983885e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1501, {'name': 'seattleparody'}), 4.132202629127921e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1502, {'name': 'exseattlemoderators'}), 3.0426430102467316e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1503, {'name': 'sips'}), 0.00019145837714410827)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 1504, {'name': 'citiesskylines'}), 6.6075099750714

(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2392, {'name': 'upvotedforstarwars'}), 4.083730230085281e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2393, {'name': 'fuckindave'}), 2.8882944597387053e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2394, {'name': 'rapevan'}), 2.8882944597387053e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2395, {'name': 'randomshitposts'}), 2.8882944597387053e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2396, {'name': 'snowboarding'}), 6.557647251956904e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2397, {'name': 'subredditsashashtags'}), 8.670213307180567e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2398, {'name': 'madisonwi'}), 8.43270688488169e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2399, {'name': 'advertising'}), 2.8882944597387053e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 2400, {'name': 'tworedditorsonecup'}), 7.74459671114223e-05)
(igra

(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3778, {'name': 'gloriouscrtmasterrace'}), 4.3884294689994055e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3779, {'name': 'rwby'}), 0.00015314222487476634)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3780, {'name': 'sonyvegas'}), 2.901120476989831e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3781, {'name': 'bulletbarry'}), 2.901120476989831e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3782, {'name': 'memeeconomy'}), 9.98024636699493e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3783, {'name': 'burritomasterrace'}), 2.901120476989831e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3784, {'name': 'narcolepticnarwhal'}), 2.901120476989831e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3785, {'name': 'scamslayers'}), 2.901120476989831e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 3786, {'name': 'redditscrabble'}), 2.901120476989831e-05)
(igraph.Ve

(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5438, {'name': 'thatdudewithstories'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5439, {'name': 'imchrishansen_'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5440, {'name': 'ilokit'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5441, {'name': 'bestofwritingprompts'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5442, {'name': 'somethingmagical'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5443, {'name': 'lostinwriting'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5444, {'name': 'serhm'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5445, {'name': 'thekommanderkronicles'}), 3.213364169839095e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 5446, {'name': 'leoduhvinci'}), 3.213364169839095e-05)
(i

(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6623, {'name': 'northkoreanews'}), 5.8997090047401655e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6624, {'name': '52book'}), 3.4649423741371e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6625, {'name': 'borderlands2'}), 5.3417383405157964e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6626, {'name': 'cannabiscultivation'}), 8.4311544469286e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6627, {'name': 'spacebuckets'}), 0.0001597481895206021)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6628, {'name': 'bostontrees'}), 8.431154446915998e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6629, {'name': 'wackytictacs'}), 5.3921406438737146e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6630, {'name': 'copypasta'}), 0.0001085489155405267)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 6631, {'name': 'gentlemangabers'}), 2.8537234877987314e-05)
(igraph.Vertex(<i

(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8417, {'name': 'islamicstate'}), 2.7213873763228764e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8418, {'name': 'ripplers'}), 0.00010945709281972676)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8419, {'name': 'cryptomarkets'}), 0.00010945709281964314)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8420, {'name': 'roacirclejerk'}), 0.00010945709281972676)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8421, {'name': 'rivalsofaether'}), 0.00010945709281964314)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8422, {'name': 'robowars'}), 0.00010945709281972676)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8423, {'name': 'shittyengineering'}), 0.00010945709281964314)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8424, {'name': 'dyinglight'}), 3.129611947851571e-05)
(igraph.Vertex(<igraph.Graph object at 0x10b39b228>, 8425, {'name': 'indiedev'}), 3.129611947851571e-05)
(igraph.Vertex(<i

<py2neo.database.Cursor at 0x10a7f2630>

In [10]:
pageRank = '''
    MATCH (n:Subreddit)RETURN n.id AS name, n.pagerank AS pagerank ORDER BY pagerank DESC LIMIT 10
'''

x = graph.run(pageRank)

print(x.to_data_frame())

             name  pagerank
0  subredditdrama  0.027142
1          bestof  0.017377
2       askreddit  0.013125
3           drama  0.010053
4           funny  0.006496
5   todayilearned  0.005688
6      the_donald  0.005686
7      botsrights  0.005629
8            pics  0.005583
9       worldnews  0.005298


In [12]:
clusters = IGraph.community_walktrap(ig, weights="weight").as_clustering()

nodes = [{"id": node["name"]} for node in ig.vs]
for node in nodes:
    idx = ig.vs.find(name=node["id"]).index
    node["community"] = clusters.membership[idx]

write_clusters_query = '''
UNWIND {nodes} AS n
MATCH (c:Subreddit) WHERE c.id = n.id
SET c.community = toInt(n.community)
'''

graph.run(write_clusters_query, nodes=nodes)


<py2neo.database.Cursor at 0x11b6032b0>

In [17]:
community = '''
    MATCH (c:Subreddit) \
    WITH c.community AS cluster, collect(c.id) AS  members \
    RETURN cluster, members ORDER BY cluster ASC
'''

x2 = graph.run(community)

print(x2.to_data_frame())

      cluster                                            members
0           0  [18bfriendzonest, 1990boys, 19thworldproblems,...
1           1                                      [1200isjerky]
2           2                                        [botrights]
3           3                               [mylittleandysonic1]
4           4                                              [195]
...       ...                                                ...
6254     6254                                      [yellowstone]
6255     6255                            [wildernessbackpacking]
6256     6256                                [songwriterscircle]
6257     6257                                          [gunporn]
6258     6258                                            [motog]

[6259 rows x 2 columns]
