## **Compare the two Networks**

The purpose of this Notebook is to **compare our Market and Storytelling Networks** to get the impact of Storytelling in the partitioning and the connections.

In [None]:
# Import all needed libraries
import tweepy                   # Python wrapper around Twitter API
from google.colab import drive  # to mount Drive to Colab notebook

import pandas as pd
pd.set_option('display.max_colwidth', None) #to see more text
import json 
import csv
from datetime import date
from datetime import datetime
import time
import numpy as np
import re
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import seaborn as sns
sns.set()

from textblob import TextBlob
from wordcloud import WordCloud
import string
import itertools
from collections import Counter
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...




In [None]:
# import essential libraries for network

%%capture
import networkx as nx  #for the manipulation of networks 
import numpy as np  #for useful maths functions
import pandas as pd  #for the manipulation of dataframes 
import seaborn as sns  #for visualization
import matplotlib.pyplot as plt  #for visualization
from scipy import sparse  #for high-level functions
import community.community_louvain as community_louvain  #community detection inside networks
from sklearn.metrics.pairwise import cosine_distances  #cosine distance between two variables
sns.set(color_codes=True, rc={'figure.figsize':(10,8)})  #set seaborn
sns.set()

# install datashader
!pip install -qq datashader

# import the network visualization libraries and backend
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
from bokeh.plotting import show
kwargs = dict(width=800, height=800, xaxis=None, yaxis=None)
opts.defaults(opts.Nodes(**kwargs), opts.Graph(**kwargs))
from holoviews.operation.datashader import datashade, bundle_graph

In [None]:
# Load Market Network dataset
market_net = pd.read_csv('/content/drive/MyDrive/Final Project/1. Market Network Analysis/Market Similarities.csv')
market_net.head()

Unnamed: 0.1,Unnamed: 0,Brand,Brand Owner,Twitter,Apparel,Shoes,Watch,Jewelry,Leather,Accessories/Others,Eyewear,Fragrances,Beauty,Partition,Centrality
0,0,Cartier,Richemont SA,Cartier,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0,0.538462
1,1,Van Cleef & Arpels,Richemont SA,vancleefarpels,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1,0.615385
2,2,Burberry,Richemont SA,Burberry,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.487179
3,3,Baume & Mercier,Richemont SA,baumeetmercier,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.487179
4,4,IWC,Richemont SA,IWC,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.487179


In [None]:
# Load Market Network dataset
story_net = pd.read_csv('/content/drive/MyDrive/Final Project/3. Storytelling Network Analysis/NLP Similarities.csv')
story_net.head()

Unnamed: 0.1,Unnamed: 0,brand,subjectivity,polarity,nb_words,text_len,negative,neutral,positive,compound,Partition,Centrality
0,0,BALENCIAGA,0.0,0.0,8.785714,68.071429,0.0,1.0,0.0,0.0,0,0.051282
1,1,BenefitBeauty,0.410326,0.232698,8.416538,48.190682,0.026861,0.616965,0.258678,0.310477,3,0.076923
2,2,Boucheron,0.365159,0.179372,13.162429,83.83809,0.013025,0.826264,0.158117,0.281332,2,0.692308
3,3,Bulgariofficial,0.37389,0.203347,13.314198,85.194136,0.00908,0.788926,0.201688,0.371378,3,0.615385
4,4,Burberry,0.276546,0.107133,13.772938,89.389249,0.006502,0.912383,0.081116,0.158115,4,0.282051


### **Set the Market Network**

In [None]:
# Set data
market_net = market_net.iloc[:,[3,4,5,6,7,8,9,10,11,12]]

# Calculate distances into a square matrix
market_dist = cosine_distances(market_net.iloc[:,1:],market_net.iloc[:,1:])

# calculate a cutoff (for a less crowded network)
market_perc = np.percentile(1-market_dist, 60)

# create NW
market_G = nx.from_numpy_array(1-market_dist)

# add names
market_attributes_dict = market_net.iloc[:,[0]].T.to_dict()

# Set nodes attributes
nx.set_node_attributes(market_G, market_attributes_dict)

# Get rid of low-weight edges
market_G = nx.edge_subgraph(market_G, [(u,v) for u,v,d in market_G.edges(data=True) if d['weight'] > market_perc])

# Set centrality degree
centrality_dgr = nx.degree_centrality(market_G)
centrality_dgr = pd.DataFrame({'centrality_dgr':centrality_dgr})
centrality_dgr = centrality_dgr.to_dict('index')
nx.set_node_attributes(market_G, centrality_dgr)

In [None]:
# identify communities with community_louvain
partition = community_louvain.best_partition(market_G)
nx.set_node_attributes(market_G, partition, 'partition')

# plot network
position = nx.spring_layout(market_G)
graph = hv.Graph.from_networkx(market_G, position).opts(
                                                                        tools=['hover'],
                                                                        edge_alpha=0.15,
                                                                        node_size=13,
                                                                        node_color='partition', cmap='Set1',
                                                                        legend_position='right'
                                                                        )

labels = hv.Labels(graph.nodes, ['x', 'y'])

show(hv.render((graph * labels.opts(text_font_size='0pt', text_color='black', xoffset=-0.01, 
                                    yoffset=-0.04, bgcolor='white', padding=0.2))))

In [None]:
# Let's use bundle_graph for a better visual

from holoviews.operation.datashader import datashade, bundle_graph
bundled = bundle_graph(graph)
show(hv.render(bundled))

### **Set the Storytelling Network**

In [None]:
# Set data
story_net = story_net.iloc[:,[1,2,3,4,5,6,7,8,9]]

# Scale values
from sklearn.preprocessing import MinMaxScaler
scl = MinMaxScaler()
data_num = scl.fit_transform(story_net.iloc[:,1:])

# Calculate distances into a square matrix
story_dist = cosine_distances(data_num,data_num)

# calculate a cutoff (for a less crowded network)
story_perc = np.percentile(1-story_dist, 60)

# create NW
story_G = nx.from_numpy_array(1-story_dist)

# add names
story_attributes_dict = story_net.iloc[:,[0]].T.to_dict()

# Set nodes attributes
nx.set_node_attributes(story_G, story_attributes_dict)

# Get rid of low-weight edges
story_G = nx.edge_subgraph(story_G, [(u,v) for u,v,d in story_G.edges(data=True) if d['weight'] > story_perc])

# Set centrality degree
centrality_dgr = nx.degree_centrality(story_G)
centrality_dgr = pd.DataFrame({'centrality_dgr':centrality_dgr})
centrality_dgr = centrality_dgr.to_dict('index')
nx.set_node_attributes(story_G, centrality_dgr)

In [None]:
# identify communities with community_louvain
partition = community_louvain.best_partition(story_G)
nx.set_node_attributes(story_G, partition, 'partition')

# plot network
position = nx.spring_layout(market_G) # We keep the same layout for comparison!!
graph = hv.Graph.from_networkx(story_G, position).opts(
                                                                        tools=['hover'],
                                                                        edge_alpha=0.15,
                                                                        node_size=13,
                                                                        node_color='partition', cmap='Set1',
                                                                        legend_position='right'
                                                                        )

labels = hv.Labels(graph.nodes, ['x', 'y'])

show(hv.render((graph * labels.opts(text_font_size='0pt', text_color='black', xoffset=-0.01, 
                                    yoffset=-0.04, bgcolor='white', padding=0.2))))

In [None]:
# Let's use bundle_graph for a better visual

from holoviews.operation.datashader import datashade, bundle_graph
bundled = bundle_graph(graph)
show(hv.render(bundled))