In [1]:
#Variables that contains the user credentials to access Twitter API
access_token = ""
access_token_secret = ""
consumer_key = ""
consumer_secret = ""

import os
import sys
import json
import time
import math
from tweepy import Cursor
import tweepy
from tweepy import OAuthHandler
import networkx as nx
import pprint 
pp = pprint.PrettyPrinter(indent=3)
from operator import itemgetter

# authentication
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

# Scrape Friends of OnePlus

In [2]:
MAX_FRIENDS = 15000

def paginate(items, n):
    """Generate n-sized chunks from items"""
    for i in range(0, len(items), n):
        yield items[i:i+n]

if __name__ == '__main__':
    screen_name = "oneplus" #enter screen name on who you would like to check on it.
    print("collecting data for " + screen_name)
    dirname = "output/users/{}".format(screen_name)
    max_pages = math.ceil(MAX_FRIENDS / 5000)
    try:
        os.makedirs(dirname, mode=0o755, exist_ok=True)
    except OSError:
        print("Directory {} already exists".format(dirname))
    except Exception as e:
        print("Error while creating directory {}".format(dirname))
        print(e)
        sys.exit(1)
               
    # get friends for a given user
    fname = "output/users/{}/friends.json".format(screen_name)
    with open(fname, 'w') as f:
        for friends in Cursor(api.friends_ids, screen_name=screen_name).pages(max_pages):
            for chunk in paginate(friends, 100):
                users = api.lookup_users(user_ids=chunk)
                for user in users:
                    f.write(json.dumps(user._json)+"\n")
            if len(friends) == 5000:
                print("More results available. Sleeping for 60 seconds to avoid rate limit")
                time.sleep(60)
    print("task completed")

collecting data for oneplus
task completed


# Scrape Friends of Friends 

In [12]:
import sys
import json
from random import sample

def paginate(items, n):
    """Generate n-sized chunks from items"""
    for i in range(0, len(items), n):
        yield items[i:i+n]

MAX_FRIENDS = 150000

if __name__ == '__main__':
    screen_name = "oneplus"
    friends_file = 'output/users/{}/friends.json'.format(screen_name)
    with open(friends_file) as f2:
       
        friends = []
        for line in f2:
            profile = json.loads(line)
            friends.append(profile['screen_name'])
            
    
    #Manually scrape the friends of friends since OnePlus has less friends        
    screen_name = "OnePlus_NL" #enter screen name on who you would like to check on it.
    print("collecting data for " + screen_name)
    dirname = "output/users/oneplus/{}".format(screen_name)
    max_pages = math.ceil(MAX_FRIENDS / 5000)
    try:
        os.makedirs(dirname, mode=0o755, exist_ok=True)
    except OSError:
        print("Directory {} already exists".format(dirname))
    except Exception as e:
        print("Error while creating directory {}".format(dirname))
        print(e)
        sys.exit(1)

    # get friends for a given user
    fname = "output/users/oneplus/{}/friends.json".format(screen_name)
    with open(fname, 'w') as f:
        for friends in Cursor(api.friends_ids, screen_name=screen_name).pages(max_pages):
            for chunk in paginate(friends, 100):
                users = api.lookup_users(user_ids=chunk)
                for user in users:
                    f.write(json.dumps(user._json)+"\n")
            if len(friends) == 5000:
                print("More results available. Sleeping for 60 seconds to avoid rate limit")
                time.sleep(60)
    print("task completed")

collecting data for OnePlus_NL
task completed


# Generate Node and Edge 

In [29]:
import sys
import json
from random import sample

if __name__ == '__main__':
    screen_name = "oneplus"

    master_nodelist = []
    master_edgelist = []
    
    friends_file = 'output/users/{}/friends.json'.format(screen_name)
    with open(friends_file) as f2:
       
        friends = []
        for line in f2:
            profile = json.loads(line)
            friends.append(profile['screen_name'])
            
    #Add to master edge and node list 
    master_nodelist.append('oneplus')
    for x in range(0,len(friends)):
        master_nodelist.append(friends[x])
        master_edgelist.append(('oneplus',friends[x]))
        
    
    oneplus_fof = {}
    
    for fr in friends:
        fof_file = 'output/users/oneplus/{}/friends.json'.format(fr)
        
        with open(fof_file) as f2:

            frlist = []
            count = 0 
            for line in f2:
                if count <= 100:
                    profile = json.loads(line)
                    frlist.append(profile['screen_name'])
                    count += 1
                else:
                    continue
            oneplus_fof[fr] = frlist
    
    for fr in friends:
        for fr2 in oneplus_fof[fr]:
            if fr2 not in master_nodelist:
                master_nodelist.append(fr2)
                
    for fr in friends:
        for fr2 in oneplus_fof[fr]:
            master_edgelist.append((fr,fr2))

In [30]:
G = nx.Graph()

G.add_nodes_from(master_nodelist)
G.add_edges_from(master_edgelist)

In [31]:
import community
import matplotlib.pyplot as plt

In [32]:
%matplotlib qt

In [33]:
#Based on Louvain heuristices
#https://perso.uclouvain.be/vincent.blondel/research/louvain.html
part = community.best_partition(G)


# Color mapping using community structure
values = [part.get(node) for node in G.nodes()]
#values = [mod.get(node) for node in G.nodes()]
d = dict(nx.degree(G))
#https://networkx.github.io/documentation/networkx-1.10/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html
#https://matplotlib.org/examples/color/colormaps_reference.html
nx.draw_spring(G, cmap=plt.get_cmap('Pastel1'), node_color = values, node_size=[v for v in d.values()], with_labels=True,
              edge_color='grey')
plt.show()

  if cb.is_numlike(alpha):


# Number of nodes / edges

In [34]:
print('Nodes: ',G.number_of_nodes())
print('Edges: ',G.number_of_edges())

Nodes:  804
Edges:  925


# Density

In [35]:
density = nx.density(G)
print("Network density:", density)

Network density: 0.0028654981629833397


# Degree Centralities

In [36]:
degree_dict = dict(G.degree(G.nodes()))
#pp.pprint(degree_dict)
nx.set_node_attributes(G, name ='degree', values = degree_dict)

sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)
sorted_degree[:5]

[('OnePlus_ITA', 108),
 ('getpeid', 108),
 ('petelau2007', 107),
 ('LinusTech', 104),
 ('MKBHD', 104)]

# Declaring Centralities

In [37]:
betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality_numpy(G) # Run eigenvector centrality

# Assign each to an attribute in your network
nx.set_node_attributes(G, name = 'betweenness', values = betweenness_dict)
nx.set_node_attributes(G, name = 'eigenvector', values = eigenvector_dict)

# Eigenvector Centrality

In [38]:
nx.set_node_attributes(G, part, 'modularity')
# First get a list of just the nodes in that class
class0 = [n for n in G.nodes() if G.node[n]['modularity'] == 0]

# Then create a dictionary of the eigenvector centralities of those nodes
class0_eigenvector = {n:G.node[n]['eigenvector'] for n in class0}

# Then sort that dictionary and print the first 5 results
class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 0 Sorted by Eigenvector Centrality:")
for node in class0_sorted_by_eigenvector:
    print("Name:", node[0], "| Eigenvector Centrality:", node[1])

Modularity Class 0 Sorted by Eigenvector Centrality:
Name: oneplus_fr | Eigenvector Centrality: 0.29151524126356676
Name: OnePlus_USA | Eigenvector Centrality: 0.24230134184923727
Name: oneplus | Eigenvector Centrality: 0.23284093770136374
Name: OnePlus_UK | Eigenvector Centrality: 0.18831318449734108
Name: OnePlus_Support | Eigenvector Centrality: 0.18439103296367723
Name: OnePlus_ES | Eigenvector Centrality: 0.17642096285867132
Name: OnePlus_IN | Eigenvector Centrality: 0.17473827969755368
Name: OnePlus_DE | Eigenvector Centrality: 0.16414068233547782
Name: OnePlus_NL | Eigenvector Centrality: 0.1434131099272123
Name: OnePlus_BE | Eigenvector Centrality: 0.13503054845355233
Name: OnePlus_FI | Eigenvector Centrality: 0.10632102369739269
Name: FNATIC | Eigenvector Centrality: 0.06651158456165573
Name: ElisaOyj | Eigenvector Centrality: 0.03049276422111289
Name: nousparisstore | Eigenvector Centrality: 0.020236216054761
Name: AndroMagOUATCH | Eigenvector Centrality: 0.020236216054761
Na

In [39]:
nx.set_node_attributes(G, part, 'modularity')
# First get a list of just the nodes in that class
class1 = [n for n in G.nodes() if G.node[n]['modularity'] == 1]

# Then create a dictionary of the eigenvector centralities of those nodes
class1_eigenvector = {n:G.node[n]['eigenvector'] for n in class1}

# Then sort that dictionary and print the first 5 results
class1_sorted_by_eigenvector = sorted(class1_eigenvector.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 1 Sorted by Eigenvector Centrality:")
for node in class1_sorted_by_eigenvector[:5]:
    print("Name:", node[0], "| Eigenvector Centrality:", node[1])

Modularity Class 1 Sorted by Eigenvector Centrality:
Name: McLarenF1 | Eigenvector Centrality: 0.11860165863479342
Name: TechnicalGuruji | Eigenvector Centrality: 0.021736555649505072
Name: AaronGillard_17 | Eigenvector Centrality: 0.008233013060256237
Name: McLarenFanChina | Eigenvector Centrality: 0.008233013060256237
Name: 24hoursoflemans | Eigenvector Centrality: 0.008233013060256235


In [40]:
sorted_eigen = sorted(eigenvector_dict.items(), key=itemgetter(1), reverse=True)

n = 5
print("Top {} nodes by eigenvector centrality:".format(n))
for b in sorted_eigen[:n]:
    print(b)

Top 5 nodes by eigenvector centrality:
('petelau2007', 0.3329461542501089)
('OnePlus_ITA', 0.3141715130476302)
('oneplus_fr', 0.29151524126356676)
('getpeid', 0.2604352195519159)
('OnePlus_USA', 0.24230134184923727)


# Betweeness centrality

In [41]:
sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

n = 5
print("Top {} nodes by betweenness centrality:".format(n))
for b in sorted_betweenness[:n]:
    print(b)

Top 5 nodes by betweenness centrality:
('petelau2007', 0.24804129138486578)
('OnePlus_ITA', 0.23854653649941543)
('getpeid', 0.23112584630533883)
('McLarenF1', 0.2299011660469444)
('LinusTech', 0.21904889570164612)


In [42]:
nx.set_node_attributes(G, part, 'modularity')
# First get a list of just the nodes in that class
class0 = [n for n in G.nodes() if G.node[n]['modularity'] == 4]

# Then create a dictionary of the betweenness centralities of those nodes
class0_eigenvector = {n:G.node[n]['betweenness'] for n in class0}

# Then sort that dictionary and print the first 5 results
class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 0 Sorted by Betweenness Centrality:")
for node in class0_sorted_by_eigenvector:
    print("Name:", node[0], "| Betweenness Centrality:", node[1])

Modularity Class 0 Sorted by Betweenness Centrality:
Name: MKBHD | Betweenness Centrality: 0.2128401883692019
Name: JeffBezos | Betweenness Centrality: 0.004913065996129168
Name: ArdCB | Betweenness Centrality: 0.004512371323828688
Name: Waymo | Betweenness Centrality: 0.004512371323828688
Name: Razer | Betweenness Centrality: 0.00417028041052507
Name: geekyranjit | Betweenness Centrality: 0.0024933469996180494
Name: rodemics | Betweenness Centrality: 0.0024933469996180494
Name: Eminem | Betweenness Centrality: 0.0
Name: LikeTeslaKim | Betweenness Centrality: 0.0
Name: TickTickTeam | Betweenness Centrality: 0.0
Name: betablock3r | Betweenness Centrality: 0.0
Name: BillGates | Betweenness Centrality: 0.0
Name: rtingsdotcom | Betweenness Centrality: 0.0
Name: willduwilldu | Betweenness Centrality: 0.0
Name: gracemburke | Betweenness Centrality: 0.0
Name: NBT88YT | Betweenness Centrality: 0.0
Name: PixelFilmStudio | Betweenness Centrality: 0.0
Name: LumaForge | Betweenness Centrality: 0.0

# Generate Subgraph

In [45]:
targetcom = [0,1] #Specify community
subgra = []

for x in part:
    if part[x] in targetcom:
        subgra.append(x)

subgraph = G.subgraph(subgra)

values2 = [part.get(node) for node in subgraph]

In [46]:
nx.draw_spring(subgraph, cmap=plt.get_cmap('Pastel1'), with_labels=True, node_color = values2)

  if cb.is_numlike(alpha):


# Testing Area (Own Reference)

In [43]:
target = 'OnePlus_ITA'
print('eigenvector: ', eigenvector_dict[target])
print('betweeness: ', betweenness_dict[target])
print('degree: ', degree_dict[target])

eigenvector:  0.3141715130476302
betweeness:  0.23854653649941543
degree:  108


In [44]:
nodetarget = 'oneplus'

for tupley in master_edgelist:
    if (tupley[0]==nodetarget) or (tupley[1]==nodetarget):
        print('Connected Nodes are ', tupley)

Connected Nodes are  ('oneplus', 'OnePlus_FI')
Connected Nodes are  ('oneplus', 'McLarenF1')
Connected Nodes are  ('oneplus', 'OnePlus_USA')
Connected Nodes are  ('oneplus', 'linusgsebastian')
Connected Nodes are  ('oneplus', 'LinusTech')
Connected Nodes are  ('oneplus', 'MKBHD')
Connected Nodes are  ('oneplus', 'UnboxTherapy')
Connected Nodes are  ('oneplus', 'OnePlus_ES')
Connected Nodes are  ('oneplus', 'OnePlus_BE')
Connected Nodes are  ('oneplus', 'OnePlus_NL')
Connected Nodes are  ('oneplus', 'OnePlus_ITA')
Connected Nodes are  ('oneplus', 'OnePlus_DE')
Connected Nodes are  ('oneplus', 'getpeid')
Connected Nodes are  ('oneplus', 'petelau2007')
Connected Nodes are  ('oneplus', 'OnePlus_UK')
Connected Nodes are  ('oneplus', 'oneplus_fr')
Connected Nodes are  ('oneplus', 'OnePlus_Support')
Connected Nodes are  ('oneplus', 'OnePlus_IN')
Connected Nodes are  ('OnePlus_FI', 'oneplus')
Connected Nodes are  ('McLarenF1', 'oneplus')
Connected Nodes are  ('OnePlus_USA', 'oneplus')
Connecte

In [27]:
nodetarget = 'petelau2007'

for tupley in master_edgelist:
    if (tupley[0]==nodetarget) or (tupley[1]==nodetarget):
        print('Connected Nodes are ', tupley)

Connected Nodes are  ('oneplus', 'petelau2007')
Connected Nodes are  ('OnePlus_FI', 'petelau2007')
Connected Nodes are  ('McLarenF1', 'petelau2007')
Connected Nodes are  ('OnePlus_USA', 'petelau2007')
Connected Nodes are  ('UnboxTherapy', 'petelau2007')
Connected Nodes are  ('OnePlus_ES', 'petelau2007')
Connected Nodes are  ('OnePlus_DE', 'petelau2007')
Connected Nodes are  ('petelau2007', 'McLarenF1')
Connected Nodes are  ('petelau2007', 'ZBrownCEO')
Connected Nodes are  ('petelau2007', 'JanetBrown980')
Connected Nodes are  ('petelau2007', 'saschasegan')
Connected Nodes are  ('petelau2007', 'Android')
Connected Nodes are  ('petelau2007', 'ZacksJerryRig')
Connected Nodes are  ('petelau2007', 'linusgsebastian')
Connected Nodes are  ('petelau2007', 'jimmyfallon')
Connected Nodes are  ('petelau2007', 'OnePlus_USA')
Connected Nodes are  ('petelau2007', 'JeffBezos')
Connected Nodes are  ('petelau2007', 'DanStrumpf')
Connected Nodes are  ('petelau2007', 'poltronafrau')
Connected Nodes are  (

In [14]:
targetcom = [1]

for x in part:
    if part[x] in targetcom:
        print(x)

McLarenF1
NickNoodlenick
HuskiChocolate
fotis78man
F1Tricky
Ashman95
DestroyerxXF1
chattysian
gamsmits
1gorFraga
rafredarrows
MarkPryde
hringner
FulbrookPE
Bahrain_Merida
TechnicalGuruji
philiphornby
FernsF1Tweets
MiggsAubrey
TeamHendrick
SportByFort
Gulen_Meredith
sergiosettecama
IainWitts
Fran_Lorenzo95
SimonHendy
BBCTheOneShow
RMFNE
alasdairmulhern
andrewmarkwhite
BonzoKEN
Mattrocchi
Nintyworldchamp
basakdalgic
Shane84104524
kazsmithF1
Carlossainz55
georette100
Ste_mclarenfan
AaronGillard_17
debarooo
GrahamDxon
MclarenF1Adam
OlliPahkala
KarenMcelhatton
edvaark
D2BDofficial
kpotts81
NicciF1fan
F1FansEyeView
F1lippos
McLarenCollectr
RaceOfChampions
McLarenShadow
F1Good1
timholmesdesign
TimWatsonUK
GdeFerran
NickyByrne
24hoursoflemans
24heuresdumans
MigueluVe
danway60
htcesports
htc
htcvive
JBStoyou
StaMcLaren
RickettJonny
thibautcourtois
JimmieJohnson
SuzanneArmstr20
F1Media
CaseyNeistat
osbornejoe
DoningtonParkUK
CliveWoodward
_SNFLWR27
timkefford
micahgianneli
airline_B773787
DylanF