In [1]:
import pandas as pd
import networkx as nx

In [2]:
crystal_df = pd.read_csv("For Gephi/crystal_industry_social_network.csv")

In [3]:
crystal_df

Unnamed: 0,Source,Target
0,gwengwenn,jam.stones
1,huimin_c,jam.stones
2,agent_g07,jam.stones
3,agnestankimpheng,jam.stones
4,umareumm,jam.stones
...,...,...
74043,eugene.tyz,tamzacrystals
74044,xenias_art,tamzacrystals
74045,frankieyaw,tamzacrystals
74046,kh3yton,tamzacrystals


# Part 1 - Simple Analysis

### Identify hubs in the network

- They are mainly the retailers/communities

In [4]:
hubs = crystal_df.groupby("Target").count().sort_values(by="Source", ascending=False)

In [5]:
hubs

Unnamed: 0_level_0,Source
Target,Unnamed: 1_level_1
jam.stones,13404
ohmou.sg,11287
glacecrystals,9774
soulalign,8031
crystalmojo,7255
covetcrystalsjewelry,7000
illa_nocte,6063
fated.itis,3141
tamzacrystals,2476
gemstorysg,2284


In [6]:
hubs_list = list(hubs.to_dict()["Source"].keys())
hubs_list

['jam.stones',
 'ohmou.sg',
 'glacecrystals',
 'soulalign',
 'crystalmojo',
 'covetcrystalsjewelry',
 'illa_nocte',
 'fated.itis',
 'tamzacrystals',
 'gemstorysg',
 'kayla.crystals',
 'loveforcrystals']

In [7]:
for index, row in crystal_df.iterrows():
    if row["Target"] not in hubs_list:
        print(row)

print("Completed")

Completed


### Identify people who follow majority of the hubs

In [8]:
potential_influencers = crystal_df.groupby("Source").count().sort_values(by="Target", ascending=False)

In [9]:
potential_influencers

Unnamed: 0_level_0,Target
Source,Unnamed: 1_level_1
oromii,11
del_felsingher,10
milko7,10
ohmnification,10
jiahui11111,10
...,...
hope_through_thyca,1
hopeandbliss1,1
hopegirldancing,1
hopelovefortune,1


These are the people who follow at least 9 out of the 12 hubs

In [10]:
potential_influencers[potential_influencers["Target"] > 8]

Unnamed: 0_level_0,Target
Source,Unnamed: 1_level_1
oromii,11
del_felsingher,10
milko7,10
ohmnification,10
jiahui11111,10
goldhoarder,9
faithdababie,9
quirky_adventurer,9
iqinyu,9
eleanor_neo,9


# Part 2 - Network Analysis

In [11]:
G = nx.Graph()

In [12]:
G = nx.from_pandas_edgelist(crystal_df, 'Source', 'Target')

### Generate insights

- Number of nodes
- Number of edges
- Degree centrality of nodes
- Betweenness centrality of nodes
- Closeness centrality of nodes
- Eigenvector centrality of nodes

Number of nodes

In [21]:
nx.number_of_nodes(G)

53499

Number of edges

In [22]:
nx.number_of_edges(G)

74047

Number of strongly connected components

In [23]:
nx.number_strongly_connected_components(G)

NetworkXNotImplemented: not implemented for undirected type

Degree centrality of nodes

In [15]:
nx.degree_centrality(G)

{'gwengwenn': 1.8692287562151855e-05,
 'jam.stones': 0.2505514224830835,
 'huimin_c': 1.8692287562151855e-05,
 'agent_g07': 1.8692287562151855e-05,
 'agnestankimpheng': 1.8692287562151855e-05,
 'umareumm': 1.8692287562151855e-05,
 'starlightsglaxing': 1.8692287562151855e-05,
 'shangkariselvarajah': 1.8692287562151855e-05,
 'gazellemum': 1.8692287562151855e-05,
 'zima_loves_bunnies': 1.8692287562151855e-05,
 'wolf_katazyna': 5.6076862686455566e-05,
 '_cara_tan_': 1.8692287562151855e-05,
 'ekitty27': 3.738457512430371e-05,
 'pohjaslin': 1.8692287562151855e-05,
 'bloatofhippos': 1.8692287562151855e-05,
 'simplyprincessy': 0.000130846012935063,
 'inez_xb': 7.476915024860742e-05,
 'blushwax': 1.8692287562151855e-05,
 'eileen.symrise': 1.8692287562151855e-05,
 'yemafrnndz': 1.8692287562151855e-05,
 'unconsciousss': 3.738457512430371e-05,
 '_cassandra_ong': 1.8692287562151855e-05,
 'rebeccasayz': 1.8692287562151855e-05,
 'jacelynye': 1.8692287562151855e-05,
 'purplexedly': 1.8692287562151855e

Betweenness centrality of nodes

In [16]:
nx.betweenness_centrality(G)

KeyboardInterrupt: 

Closeness centrality of nodes

In [17]:
nx.closeness_centrality(G)

{'gwengwenn': 0.29502139674416555,
 'jam.stones': 0.4184794937382176,
 'huimin_c': 0.29502139674416555,
 'agent_g07': 0.29502139674416555,
 'agnestankimpheng': 0.29502139674416555,
 'umareumm': 0.29502139674416555,
 'starlightsglaxing': 0.29502139674416555,
 'shangkariselvarajah': 0.29502139674416555,
 'gazellemum': 0.29502139674416555,
 'zima_loves_bunnies': 0.29502139674416555,
 'wolf_katazyna': 0.3278606141947504,
 '_cara_tan_': 0.29502139674416555,
 'ekitty27': 0.3228781285646173,
 'pohjaslin': 0.29502139674416555,
 'bloatofhippos': 0.29502139674416555,
 'simplyprincessy': 0.3763912927237677,
 'inez_xb': 0.35010176235381885,
 'blushwax': 0.29502139674416555,
 'eileen.symrise': 0.29502139674416555,
 'yemafrnndz': 0.29502139674416555,
 'unconsciousss': 0.32741916728378817,
 '_cassandra_ong': 0.29502139674416555,
 'rebeccasayz': 0.29502139674416555,
 'jacelynye': 0.29502139674416555,
 'purplexedly': 0.29502139674416555,
 'daisywailing': 0.29502139674416555,
 'chingyouuu': 0.2950213967

Eigenvector centrality of nodes

In [20]:
nx.eigenvector_centrality(G)

PowerIterationFailedConvergence: (PowerIterationFailedConvergence(...), 'power iteration failed to converge within 100 iterations')