In [0]:
! pip install networkx
! pip install plotly
! pip install colorlover



# Introduction to NetworkX

In [0]:
import networkx as nx

## Creating a graph

In [0]:
G = nx.Graph()

In [0]:
G.add_node(234)
G.add_node("hello")
G.add_edge(234,"hello")

In [0]:
print("Nodes:", G.nodes())
print("Edges:", G.edges())

Nodes: [234, 'hello']
Edges: [(234, 'hello')]


In [0]:
G[234]

AtlasView({'hello': {}})

In [0]:
G['hello']

AtlasView({234: {}})

## Properties on edges

In [0]:
# Option 1
G.add_edge('Alice', 'Bob')
G['Alice']['Bob']['know'] = 10
G['Alice']['Bob']['friends'] = 5

In [0]:
# Option 2
G.add_edges_from([('Alice', 'Bob', {'know': 10, 'friends': 5})])

In [0]:
print("Nodes:", G.nodes())
print("Edges:", G.edges())

Nodes: [234, 'hello', 'Alice', 'Bob']
Edges: [(234, 'hello'), ('Alice', 'Bob')]


In [0]:
G['Bob']

AtlasView({'Alice': {'know': 10, 'friends': 5}})

In [0]:
G['Alice']

AtlasView({'Bob': {'know': 10, 'friends': 5}})

In [0]:
G['Bob']['Alice']['know'] += 1

In [0]:
G['Alice']

AtlasView({'Bob': {'know': 11, 'friends': 5}})

## Analyzing graphs

In [0]:
G.add_edge('Alice', 'Carlos')
G.add_edge('Carlos', 'Dave')
G.add_edge('Dave', 'Bob')
G.add_edge('Alice', 'Eve')

In [0]:
components = nx.connected_components(G)
list(components)

[{234, 'hello'}, {'Alice', 'Bob', 'Carlos', 'Dave', 'Eve'}]

In [0]:
nx.degree(G)

DegreeView({234: 1, 'hello': 1, 'Alice': 3, 'Bob': 2, 'Carlos': 2, 'Dave': 2, 'Eve': 1})

In [0]:
nx.degree(G,'Bob')

2

In [0]:
nx.has_path(G, 'Alice', 'Dave')

True

In [0]:
nx.has_path(G, 'Alice', 'hello')

False

In [0]:
nx.shortest_path(G, 'Alice', 'Dave')

['Alice', 'Carlos', 'Dave']

## Centrality

In [0]:
nx.degree_centrality(G)

{234: 0.16666666666666666,
 'Alice': 0.5,
 'Bob': 0.3333333333333333,
 'Carlos': 0.3333333333333333,
 'Dave': 0.3333333333333333,
 'Eve': 0.16666666666666666,
 'hello': 0.16666666666666666}

In [0]:
nx.betweenness_centrality(G)

{234: 0.0,
 'Alice': 0.23333333333333334,
 'Bob': 0.06666666666666667,
 'Carlos': 0.06666666666666667,
 'Dave': 0.03333333333333333,
 'Eve': 0.0,
 'hello': 0.0}

# Loading Twitter Data

In [0]:
import pandas as pd

In [0]:
# from google.colab import files
# uploaded = files.upload()

In [0]:
df = pd.read_csv("tweets2009-06-0115.csv.zip", sep='\t', compression='zip')

In [0]:
print("Num of rows:", df.shape[0])

Num of rows: 3437690


In [0]:
df.head()

Unnamed: 0,date,user,tweet
0,2009-06-01 21:43:59,burtonator,No Post Title
1,2009-06-01 21:47:23,burtonator,No Post Title
2,2009-06-02 01:15:44,burtonator,No Post Title
3,2009-06-02 05:17:52,burtonator,No Post Title
4,2009-06-02 23:58:25,burtonator,No Post Title


## Finding most common hash tags

In [0]:
from collections import Counter

In [0]:
allTweets = df["tweet"].str.cat(sep=' ')
tweetWords = [word.strip(""" ,.:'\";""").lower() for word in allTweets.split()]
hashTags = [word for word in tweetWords if word.startswith("#")]
hashTagsCounter = Counter(hashTags)

In [0]:
hashTagsCounter.most_common(100)

[('#iranelection', 26853),
 ('#followfriday', 16400),
 ('#jobs', 13322),
 ('#iremember', 11057),
 ('#spymaster', 10587),
 ('#ff', 10446),
 ('#squarespace', 9198),
 ('#tcot', 7691),
 ('#fb', 6107),
 ('#cnnfail', 4451),
 ('#11thcommandment', 3429),
 ('#jtv', 3317),
 ('#140mafia', 3144),
 ('#iran', 2935),
 ('#', 2895),
 ('#news', 2837),
 ('#quote', 2750),
 ('#vampirebite', 2634),
 ('#1', 2587),
 ('#bsb', 2433),
 ('#tweetmyjobs', 2086),
 ('#iphone', 1697),
 ('#lastfm', 1599),
 ('#mp2', 1589),
 ('#niley', 1528),
 ('#music', 1489),
 ('#p2', 1439),
 ('#follow', 1390),
 ('#pawpawty', 1305),
 ('#hhrs', 1256),
 ('#fail', 1246),
 ('#twitter', 1216),
 ('#tlot', 1214),
 ('#facebook', 1177),
 ('#sgp', 1151),
 ('#mashchat', 1143),
 ('#tinychat', 1111),
 ('#2', 1107),
 ('#digg', 1102),
 ('#gop', 1009),
 ('#phish', 1001),
 ('#mlb', 962),
 ('#travel', 932),
 ('#bonnaroo', 887),
 ('#twitpocalypse', 879),
 ('#iranelections', 857),
 ('#rt', 856),
 ('#zensursula', 811),
 ('#jamlegend', 790),
 ('#quotes', 75

In [0]:
redsoxTag = df[df["tweet"].str.lower().str.contains("#redsox", na=False)].copy()

In [0]:
def addMentionedColumn(df):
    
    def mentionsList(txt):
        allWords = [word.strip(""" ,.:'\";""").lower() for word in txt.split()]
        allNames = [word.strip("@") for word in allWords if word.startswith("@")]
        uniqueNames = list(set(allNames))
        return allNames
    
    df["mentioned"] = df["tweet"].apply(mentionsList)

In [0]:
addMentionedColumn(redsoxTag)

In [0]:
redsoxTag.head(10)

Unnamed: 0,date,user,tweet,mentioned
39,2009-06-07 02:07:42,redsoxtweets,"#redsox Extra Bases: Sox win, 8-1: The Rangers...",[]
20460,2009-06-11 17:25:28,jlopezdr,Tenemos los Yankees Pisao!!!!!!! #redsox jajaj...,[]
34514,2009-06-11 17:50:42,jeffespo,Love the chant for A-Rod last night ~ http://b...,[]
40110,2009-06-11 17:59:18,ram0na,On the way to Boston to see the Sox/Yanks! #re...,[]
51121,2009-06-11 18:15:23,strully,RT @GlobeSox: Extra Bases blog: Sox draft Mich...,[globesox]
70355,2009-06-11 18:47:22,timmah1,#redsox draft grandson of Carl Yastrzemski htt...,[]
95221,2009-06-11 19:26:41,suzieprof,Yaz is terrific even if he 's a sox- RT @hash_...,"[hash_mlb, wbz]"
112251,2009-06-11 19:52:51,beantowntwits,RT @GlobeSoxExtra Bases blog: Tonight's lineup...,[globesoxextra]
122744,2009-06-11 20:13:52,phineaspoe,RT @LilPecan: Michael Yastrzemski (#1098) draf...,[lilpecan]
154427,2009-06-11 21:08:20,iandrinkwater,Can you imagine how glorious it will be if Pau...,[]


In [0]:
def mentionGraph(df):
    g = nx.Graph()
    
    for (index, date, user, tweet, mentionedUsers) in df.itertuples():
        for mentionedUser in mentionedUsers:
            if (user in g) and (mentionedUser in g[user]):
                g[user][mentionedUser]["numberMentions"] += 1
            else:
                g.add_edge(user, mentionedUser, numberMentions=1)
    
    return g

In [0]:
redsoxGraph = mentionGraph(redsoxTag)

In [0]:
print("# nodes:", len(redsoxGraph.nodes()))
print("# edges:", len(redsoxGraph.edges()))

# nodes: 346
# edges: 339


In [0]:
redsoxGraph['shelley1005']

AtlasView({'geekwearsprada': {'numberMentions': 2}, 'fenway': {'numberMentions': 1}, 'm_weber': {'numberMentions': 1}, 'hackswithhaggs': {'numberMentions': 1}, 'globesox': {'numberMentions': 1}, 'sweettweet74': {'numberMentions': 1}, 'soxjetsfan': {'numberMentions': 2}})

# Visualize Mention Graph

In [0]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
import plotly.graph_objects as go
init_notebook_mode(connected=True)

In [0]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))

### Generate random positions for nodes and store them at property "pos"

In [0]:
import random
def addRandomPositions(graph):
    posDict = dict((node,(random.gauss(0,10),random.gauss(0,10))) for node in graph.nodes())
    nx.set_node_attributes(graph, name="pos", values=posDict)

In [0]:
addRandomPositions(redsoxGraph)

In [0]:
nx.get_node_attributes(redsoxGraph, 'pos')['shelley1005']

(12.827224229542004, 6.203333763380204)

### Visualize using Plot.ly scatter plots

In [0]:
def plotNetwork(graph):
    scatters=[]

    for (node1, node2) in graph.edges():
        x0, y0 = graph.nodes[node1]['pos']
        x1, y1 = graph.nodes[node2]['pos']
        edgeWidth = graph[node1][node2]['numberMentions']
        s = Scatter(
                x=[x0, x1],
                y=[y0, y1],
                hoverinfo='none',
                mode='lines', 
                line=scatter.Line(width=1 ,color='#888'))
        scatters.append(s)



    for node in graph.nodes():
        xPos, yPos = graph.nodes[node]['pos']
        s = Scatter(
                x=[xPos], 
                y=[yPos], 
                hoverinfo='none',
                mode='markers', 
                marker=dict(
                    color="#888", 
                    size=10,         
                    line=dict(width=2)))
        scatters.append(s)
    
    layout = Layout(showlegend=False)
    fig = Figure(data=scatters, layout=layout)
    iplot(fig, show_link=False)

In [0]:
redsoxGraph.nodes['strully']['pos']

(20.263762529075183, 8.807043003010266)

In [0]:
configure_plotly_browser_state()
plotNetwork(redsoxGraph)

### Visualize using node size and edge width

In [0]:
def plotNetworkSize(graph):
    scatters=[]

    for (node1, node2) in graph.edges():
        x0, y0 = graph.nodes[node1]['pos']
        x1, y1 = graph.nodes[node2]['pos']
        edgeWidth = graph[node1][node2]['numberMentions']
        s = Scatter(
                x=[x0, x1],
                y=[y0, y1],
                hoverinfo='none',
                mode='lines', 
                line=scatter.Line(width=edgeWidth ,color='#888'))
        scatters.append(s)



    for node in graph.nodes():
        xPos, yPos = graph.nodes[node]['pos']
        s = Scatter(
                x=[xPos], 
                y=[yPos], 
                hoverinfo='none',
                mode='markers', 
                marker=dict(
                    color="#888", 
                    size=nx.degree(graph,node)*2,         
                    line=dict(width=2)))
        scatters.append(s)
    
    layout = Layout(showlegend=False)
    fig = Figure(data=scatters, layout=layout)
    iplot(fig, show_link=False)

In [0]:
configure_plotly_browser_state()
plotNetworkSize(redsoxGraph)

## Using *Colorlover* for colors

In [0]:
import colorlover as cl
from IPython.display import HTML

In [0]:
HTML(cl.to_html( cl.scales['9'] ))

In [0]:
# map purd color scale to 300 cells
purd = cl.scales['9']['seq']['PuRd']
purd300 = cl.interp(purd, 300)
HTML(cl.to_html(purd300))

## Adding color and text based on centrality

In [0]:
def plotNetworkSizeColor(graph):
    closenessCentr = nx.closeness_centrality(redsoxGraph)
    maxCentr = max(closenessCentr.values())
    minCentr = min(closenessCentr.values())
    
    scatters=[]

    for (node1, node2) in graph.edges():
        x0, y0 = graph.nodes[node1]['pos']
        x1, y1 = graph.nodes[node2]['pos']
        edgeWidth = graph[node1][node2]['numberMentions']
        s = Scatter(
                x=[x0, x1],
                y=[y0, y1],
                hoverinfo='none',
                mode='lines', 
                line=scatter.Line(width=edgeWidth ,color='#888'))
        scatters.append(s)



    for node in graph.nodes():
        nodeCentr = closenessCentr[node]
        nodeColor = int(299*(nodeCentr-minCentr)/(maxCentr-minCentr))
        xPos, yPos = graph.nodes[node]['pos']
        s = Scatter(
                x=[xPos], 
                y=[yPos], 
                text="User: %s\nCloseness: %.3f" % (node, nodeCentr),
                hoverinfo='text',
                mode='markers', 
                marker=dict(
                    color=purd300[nodeColor], 
                    size=nx.degree(graph,node)*2,         
                    line=dict(width=2)))
        scatters.append(s)
    
    layout = Layout(showlegend=False)
    fig = Figure(data=scatters, layout=layout)
    iplot(fig, show_link=False)

In [0]:
configure_plotly_browser_state()
plotNetworkSizeColor(redsoxGraph)

## Using NetworkX layouts

In [0]:
def applyLayout(graph, layoutFunc):
    posDict = layoutFunc(graph) 
    nx.set_node_attributes(graph, name="pos", values=posDict)

### Spring layout

In [0]:
redsoxGraphSpring = redsoxGraph.copy()
applyLayout(redsoxGraphSpring, nx.spring_layout)
configure_plotly_browser_state()
plotNetworkSizeColor(redsoxGraphSpring)

### Random layout

In [0]:
redsoxGraphRandom = redsoxGraph.copy()
applyLayout(redsoxGraphRandom, nx.random_layout)
configure_plotly_browser_state()
plotNetworkSizeColor(redsoxGraphRandom)

### Circular layout

In [0]:
redsoxGraphCircular = redsoxGraph.copy()
applyLayout(redsoxGraphCircular, nx.circular_layout)
configure_plotly_browser_state()
plotNetworkSizeColor(redsoxGraphCircular)

### Spectral layout

In [0]:
redsoxGraphSpectral = redsoxGraph.copy()
applyLayout(redsoxGraphSpectral, nx.spectral_layout)
configure_plotly_browser_state()
plotNetworkSizeColor(redsoxGraphSpectral)

In [0]:
import plotly
plotly.__version__

'4.1.1'