In [1]:
'''libraries list with help showing the version of the libraries being used in this nodebook'''
libraries = []

'''Datasets, arrays and files '''
import pandas as pd
import numpy as np
import pickle
import json
libraries.append('pandas')
libraries.append('numpy')
libraries.append('pickle')
libraries.append('json')

'''Following progress'''
from tqdm.notebook import tqdm
libraries.append('tqdm')

'''Plots'''
%matplotlib notebook

import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpldatacursor import datacursor
from mpldatacursor import HighlightingDataCursor
import seaborn as sns
plt.style.use('seaborn-paper')
plt.rcParams["figure.facecolor"] = "w"


import dash
import dash_core_components as dcc
import dash_html_components as html

import plotly.graph_objs as go
from colour import Color
from datetime import datetime
from textwrap import dedent as d
from pyvis.network import Network
libraries.append('dash')
libraries.append('plotly')
libraries.append('colour')
libraries.append('textwrap')
libraries.append('pyvis')
libraries.append('matplotlib')
libraries.append('mpldatacursor')
libraries.append('seaborn')
                 
'''Networks and community detection'''
import networkx as nx
import igraph as ig
from sklearn import cluster
libraries.append('networkx')
libraries.append('igraph')


'''Pthon and library version'''
import types
import pkg_resources
import sys
from platform import python_version

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.title = "Transaction Network"

if __name__ == '__main__':
    #app.run_server(debug=True)
    app.run_server(debug=True, host = '127.0.0.1')


The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


OSError: [Errno 48] Address already in use

In [9]:
import socket
socket.gethostname()

port = 0
socket.getaddrinfo(socket.gethostname(), port, socket.AF_INET, socket.SOCK_STREAM)

[(<AddressFamily.AF_INET: 2>,
  <SocketKind.SOCK_STREAM: 1>,
  6,
  '',
  ('10.0.0.182', 0))]

In [10]:
'''To display version of Software being used'''
print('Version of python installed: {}' .format(sys.version))
print('Version of python being used: {}' .format(python_version()))
print('\nNon-built in libraries being used:')

for m in pkg_resources.working_set:
    if m.project_name.lower() in libraries:
        print('{}, version {}'.format(m.project_name,m.version))

Version of python installed: 3.8.10 (default, May 19 2021, 11:01:55) 
[Clang 10.0.0 ]
Version of python being used: 3.8.10

Non-built in libraries being used:
tqdm, version 4.62.3
seaborn, version 0.11.2
pyvis, version 0.1.9
plotly, version 5.1.0
pandas, version 1.3.3
numpy, version 1.19.2
networkx, version 2.6.3
mpldatacursor, version 0.7.1
matplotlib, version 3.4.3
dash, version 2.0.0
colour, version 0.1.5


In [11]:
def add_edges(G,nt, val):
    #def add_edges(G, nt, val):
    '''Where G is the graph containing all the properties, and val is the measure that we
    want to generate to be ploted in the interactive visualization (it needs to be labelled
    as 'value')'''
    for e1, e2, dic in G.edges(data=True):
        w = dic['weight']
        c = dic['correlation']
        s = dic['similarity']
        if val == 'weight':
            v = w
            n = 10
        elif val == 'correlation':
            v = c
            n = 1/10
        elif val == 'similarity':
            v = s
            n = 5
        nt.add_edge(e1, e2, value = (v/n))

In [20]:
'''To retrieve the networks information'''
G_01 = nx.read_gpickle("../_generated_data/Net_1v3.gpickle")
G_02 = nx.read_gpickle("../_generated_data/Net_2v3.gpickle")
G_03 = nx.read_gpickle("../_generated_data/Net_3v3.gpickle")

In [3]:
G_01['ig_street']['covid19'] #To check the measures of an edge

{'weight': 0.003265655453962751,
 'similarity': 9,
 'correlation': {'correlation': -0.21981545969429406}}

In [4]:
G_01.nodes(data='group')

NodeDataView({'corona': 0, 'canonphotographer': 0, 'emptystreets': 1, 'coronaqueensnyc': 0, 'canon77d': 0, 'unedited': 0, 'emptynyc': 1, 'nycshutdown': 1, 'coronaqueens': 0, 'quarantine': 2, 'visitLosAngeles': 2, 'foodbesat': 2, 'eatLA': 2, 'feastagrem': 2, 'lovefood': 2, 'onthetable': 2, 'seriouseats': 2, 'stayhomesafer': 2, 'yumYum': 2, 'Delicious': 2, 'Tasty': 2, 'qurantinelife': 11, 'coronavirus': 4, 'covid19': 5, 'viralvideos': 4, 'coronaviruspandemic': 6, 'lilbaby': 4, 'lilwayne': 4, 'kjmg': 4, 'growinsilence': 4, 'mindonamillion': 4, 'yourpage': 4, 'sequester': 6, 'abandoned': 6, 'covid_19': 5, 'bw': 5, 'bnw': 5, 'cityscape': 5, 'lensculture': 5, 'walkingthedog': 7, 'streetphotographer': 5, 'urbanstreetphotogallery': 5, 'acros': 5, 'photodocumentary': 5, 'ig_street': 5, 'urbanstreetphotography': 5, 'spicollective': 5, 'classicnegative': 5, 'w8time': 5, 'stayhome': 1, 'thisis51': 8, 'db365': 8, '30daysofmovement': 8, 'sweatyselfie': 8, 'PAUSElifeNYC': 8, 'hamont': 1, 'crackilton'

In [24]:
G_03.nodes(data=True)

NodeDataView({'covid_19': {'group': 1, 'color': '#f7755a'}, 'urbanstreetphotography': {'group': 1, 'color': '#f7755a'}, 'Coronavirus': {'group': 6, 'color': '#91a531'}, 'MayThe4thBeWithYou': {'group': 6, 'color': '#91a531'}, 'faceshield': {'group': 6, 'color': '#91a531'}, 'SaveTheWorld': {'group': 6, 'color': '#91a531'}, 'BillionShieldsChallenge': {'group': 6, 'color': '#91a531'}, 'ExOWorldNow': {'group': 6, 'color': '#91a531'}, 'BillionShields': {'group': 6, 'color': '#91a531'}, 'Bottles2Shields': {'group': 6, 'color': '#91a531'}, 'Masks4All': {'group': 6, 'color': '#91a531'}, 'TogetherAtHome': {'group': 6, 'color': '#91a531'}, 'mit': {'group': 6, 'color': '#91a531'}, 'LOCKDOWN2020': {'group': 6, 'color': '#91a531'}, 'trending': {'group': 20, 'color': '#f560e1'}, 'lilbaby': {'group': 20, 'color': '#f560e1'}, 'lilwayne': {'group': 20, 'color': '#f560e1'}, 'kjmg': {'group': 20, 'color': '#f560e1'}, 'growinsilence': {'group': 20, 'color': '#f560e1'}, 'mindonamillion': {'group': 20, 'colo

In [5]:
n_groups = []
for node, group in set(G_01.nodes(data='group')):
    n_groups.append(group)
n_groups = set(n_groups)
max(n_groups)

37

In [6]:
palette = sns.color_palette("husl", 6).as_hex()
palette
palette[0]

'#f77189'

In [12]:
def generate_html(val, G, id_):
    '''To generate an html file, given a val measure from 
    value = ['weight', 'similarity', 'correlation'],
    a network G and an id_ to name the file'''
    
    n_groups = []
    for node, group in set(G.nodes(data='group')):
        n_groups.append(group)
    n_groups = set(n_groups)
    max_group = max(n_groups)
    
    n = max_group + 1
    palette = sns.color_palette("husl", n).as_hex()
   
    for node in list(G.nodes()):
        node_group = G.nodes(data=True)[node]['group'] 
        G.nodes[node]['color'] = palette[node_group]

    H = G.copy()
    H.remove_edges_from(list(H.edges())) 
    nx_graph = H #add edges only

    nt = Network('700px', '700px', notebook=False)
    
    nt.force_atlas_2based()
    #nt.barnes_hut()
    nt.from_nx(nx_graph)
    add_edges(G, nt, val) #add edges with value selected as property
    nt.show_buttons(filter_=['physics'])
    nt.show('../_generated_data/Net_'+val+id_+'.html')


In [21]:
generate_html('weight', G_01, 't01v3')

In [22]:
generate_html('weight', G_02, 't02v3')

In [23]:
generate_html('weight', G_03, 't03v3')

In [16]:
def dic_to_partition(dic):
    '''Transform a dictionary containing nodes as keys and groups or communities as values
    into a partition'''
    partition = []
    groups= set(dic.values())
    for group in groups:
        set_tmp = set([node for node, g in dic.items() if g == group])
        partition.append(set_tmp)
    return partition

def find_hashtag_community(hashtag, algorithm):
    '''To find the community of a given hashtag, and an algorithm,
    it will look for the saved partition using that algorithm. it will returm the community
    (set of nodes) that are in the same community of the given hashtag.'''
    with open('Partitions.pickle', 'rb') as handle:
        partitions = pickle.load(handle)
    p = partitions[algorithm]
    #print(p)
    com = p[hashtag]
    return dic_to_partition(p)[com]

In [17]:
def draw_graph3(networkx_graph,notebook=True,output_filename='graph.html',show_buttons=True,only_physics_buttons=True):
        """
        This function accepts a networkx graph object,
        converts it to a pyvis network object preserving its node and edge attributes,
        and both returns and saves a dynamic network visualization.

        Valid node attributes include:
            "size", "value", "title", "x", "y", "label", "color".

            (For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_node)

        Valid edge attributes include:
            "arrowStrikethrough", "hidden", "physics", "title", "value", "width"

            (For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_edge)


        Args:
            networkx_graph: The graph to convert and display
            notebook: Display in Jupyter?
            output_filename: Where to save the converted network
            show_buttons: Show buttons in saved version of network?
            only_physics_buttons: Show only buttons controlling physics of network?
        """

        # import
        from pyvis import network as net

        # make a pyvis network
        pyvis_graph = net.Network(notebook=notebook)
        pyvis_graph.width = '1000px'
        # for each node and its attributes in the networkx graph
        

    
        for node,node_attrs in networkx_graph.nodes(data=True):
            pyvis_graph.add_node(node,**node_attrs)
    #         print(node,node_attrs)

        pyvis_graph.force_atlas_2based()
    
        # for each edge and its attributes in the networkx graph
        for source,target,edge_attrs in networkx_graph.edges(data=True):
            # if value/width not specified directly, and weight is specified, set 'value' to 'weight'
            if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
                # place at key 'value' the weight of the edge
                edge_attrs['value']=edge_attrs['weight']
            # add the edge
            pyvis_graph.add_edge(source,target,**edge_attrs)

        # turn buttons on
        if show_buttons:
            if only_physics_buttons:
                pyvis_graph.show_buttons(filter_=['physics'])
            else:
                pyvis_graph.show_buttons()

        # return and also save
        return pyvis_graph.show(output_filename)

In [42]:
draw_graph3(M,output_filename='../_generated_data/Net.html', notebook=False, only_physics_buttons=True)