# Retrieving and Analyzing Audience of a Twitter Account

In [1]:
from LibOM.Tools import *
from omterms.interface import *
from functools import reduce
import re, json
import pandas as pd

In [2]:
class Tweets:
    re_rt = re.compile(r"RT @.*?:")
    re_url = re.compile("(?P<url>https?://[^\s]+)")
    re_ht_valid = re.compile(r"^[a-zA-Z]+[a-zA-Z0-9]*")
    
    def __init__(self, tweets = []):
        print('The tweet extractor class is initiated with {} tweets'.format(len(tweets)))
    
    def extract_info(self,tweets, kw = True):
        return [{'rt': self.is_rt(t),
                 'rt_from': self.rt_from(t),
                 'mentions': self.extract_mentions(t),
                 'hashtags': self.extract_hashtags(t),
                 'keywords': self.extract_keywords(t, kw),
                 'text': self.clean_text(t)} for t in tweets
               ]
            
    def is_rt(self, tweet):
        tweet = tweet['text']
        search = re.search(self.re_rt, tweet)
        return True if search else False
    
    def rt_from(self, tweet):
        matched = re.match(self.re_rt, tweet['text'])
        if not matched: return set()
        tweet['text'] = matched.group()
        return self.extract_mentions(tweet, no_rt=False)
    
    def remove_rt(self, tweet):
        tweet = tweet['text']
        return re.sub(self.re_rt, r' ', tweet)
    
    def extract_urls(self, tweet):
        tweet = tweet['text']
        search = re.search(self.re_url, tweet)
        if search: return search.group("url")
        
    def remove_urls(self, tweet):
        tweet = tweet['text']
        return re.sub(self.re_url, r' ', tweet)
    
    def extract_mentions(self, tweet, no_rt = True):
        tweet = tweet['text']
        if no_rt: tweet = re.sub(self.re_rt, r' ', tweet)
        mentions = set(t[1:] for t in tweet.split() if t.startswith('@'))
        return set([m[:-1] if m.endswith(':') else m for m in mentions])
    
    def extract_hashtags(self, tweet):
        tweet = tweet['text']
        hts = [t[1:] for t in tweet.split() if t.startswith('#')]
        valid_hts = set()
        for ht in hts:
            matched = re.match(self.re_ht_valid, ht)
            if not matched: continue
            valid_hts.add(matched.group())  
        return valid_hts
    
    def remove_mentions(self, tweet):
        tweet = tweet['text']
        tokens = tweet.split()
        mentions = set(t for t in tokens if t.startswith('@'))
        return ' '.join([t for t in tokens if t not in mentions])
    
    def clean_text(self, tweet):
        tweet['text'] = self.remove_urls(tweet)
        tweet['text'] = self.remove_rt(tweet)
        return self.remove_mentions(tweet)
    
    def extract_keywords(self, tweet, kw = True):
        if not kw: return set()
        if tweet['lang'] != 'en': return set()
        text = self.clean_text(tweet)
        return set(extract_terms(text, min_termlength=2, min_tf=0).Term)


### 1. Connecting to the Twitter API

#### Credentials
In order to get credentials to be able to connect to the Twitter:
- Login to https://apps.twitter.com 
 - If you already have twitter application you may use its credentials.
 - Or generate a new set of keys and tokens via "Create a New App" button.
- Click "Keys and Access Tokens" tab and copy "API key", API secret".
- Click "Create my Access Token" and copy "Access token" and "Access token secret".

In [3]:
Credentials = {}
Credentials['Consumer_Key'] = "your key" 
Credentials['Consumer_Secret'] = "your secret"
Credentials['Access_Token'] = "your token" 
Credentials['Access_Token_Secret'] = "your token secret"

#### Instantiating the custom Twitter interface
 The interface is used to drive Twitter API. It was originally developed for the Community Spirals API project.

In [4]:
ClientTwitter = Twitter()

#### Setting the account.

In [5]:
account = 'SimPolProject'

### 2. Retrieving followers of the account

Uncomment the cell below, when updating and re-harvesting twitters of the followers is needed. It will take time and will be subject to limitations.

In [6]:
#followers = ClientTwitter.retrieve_followers([auser])['theset']
# The account's own tweets are also to be harvested.
#followers.add(account)

#### Saving the harvested data on the disk for re-use and to avoid repeated harvesting.

In [7]:
fname = './output/lists/' + account +'_followers.pickle'

In [8]:
#with open(fname, 'wb') as handle: pickle.dump(followers, handle, protocol=pickle.HIGHEST_PROTOCOL)

#### Reloading the harvested tweets of the followers

In [9]:
with open(fname, 'rb') as handle:
    followers = pickle.load(handle)

In [10]:
len(followers)

683

In [11]:
pp.pprint(followers)

{'0ttamatt',
 '2dipicchee',
 '4A50CqwsQqeahlq',
 '7softley',
 '7wData',
 '85Fifteen',
 'AEBIOM',
 'APRC_CF',
 'ARNAOYTHSD',
 'AdaFerko',
 'AdamPC26',
 'Adeline_story',
 'Adellibya1979',
 'AdolfSanchez5',
 'Adoufrancois',
 'Agent_Jase',
 'Agn_ssa',
 'AlanGIsaac',
 'AlbertoMunoz',
 'AldeiDiogo',
 'AlessandroLaced',
 'Alex_GuerreroR',
 'AlineCosette',
 'Amanuel40Rui',
 'AndreaBindo',
 'AndreaRoventini',
 'Andreas_Chile',
 'Andrewbrum',
 'AngelFe23124123',
 'Angela07261976',
 'Anna_Hadorn',
 'AnneMur16630871',
 'AppDeveloperHue',
 'ArcmorRib',
 'ArnautzPgvila',
 'Arturo311311311',
 'AsaBentley',
 'Asmmar1999',
 'AugustusSouto',
 'BIntendente',
 'BUPardeeCenter',
 'Barone54924381',
 'BenitoMma',
 'Benmccreton1',
 'Better_Finance',
 'Beyond_Ratings',
 'Bifrostonline',
 'BigDataStackEU',
 'BingoLittle1',
 'BlacMambaMMA',
 'Blinis',
 'Blueland1',
 'BluewaterGlobe',
 'BoekeKatrin',
 'BradZarnett',
 'BrainConceptual',
 'BsjgMMZetBPIHca',
 'BusinessPartnrs',
 'ByGeorgePerr',
 'CASAPUEBLO8',
 'CCL

### 3. Retrieving tweets of the followers of the account
Uncomment the cell below, when updating and re-harvesting twitters of the followers is needed. It will take time and will be subject to limitations.

In [12]:
def harvest_tweets(followers):
    done = set()
    notdone = set(followers).difference(done)
    for auser in notdone:
        debates = ClientTwitter.retrieve_tweets([auser], nlatest = 200, merge = False)
        tweets = debates[auser]['content']
        fname = './output/tweets/' + auser +'.pickle'
        with open(fname, 'wb') as handle:
            pickle.dump(tweets, handle, protocol=pickle.HIGHEST_PROTOCOL)
        done.add(auser)
        print('Done: {}'.format(auser))
    return done, notdone

In [13]:
#harvest_tweets(followers)

#### A quick examination of harvested data.

In [14]:
for auser in followers:
    fname = './output/tweets/' + auser +'.pickle'
    with open(fname, 'rb') as handle:
        tweets = pickle.load(handle)
    print('{}: {}'.format(auser,len(tweets)))
    if len(tweets) < 5: pp.pprint(tweets)

Marji84: 200
feducaydesarrol: 200
vectracrome111: 200
SwissCognitive: 200
greencareersorg: 199
ArnautzPgvila: 200
dlacalle_IA: 200
StieberHarald: 37
SiQuizas: 199
hsissom: 69
adilson_motter: 143
VlPetrovV: 193
aurorab96124853: 1
[{'lang': 'en',
  'text': 'aurora borealis demonstration 1. move the crowd '
          'https://t.co/O8Wa2fAW8Y via @YouTube'}]
burrolahola: 200
marlin_meier: 37
Seavangelesse: 200
7wData: 200
danilushkinaed1: 25
daniel_goldsmth: 200
SusFinLab: 199
filevaig41: 34
EnviroNews: 199
Xilbshi: 185
AndreaBindo: 200
PPFEnergia: 199
germanacanzi: 200
MichellLiberty: 192
bassindales: 200
fet_eu: 200
PolicyCompassEU: 199
anaishannah: 200
Maxknew: 199
egbr916: 200
shakgohir: 199
TSERETELI_0102: 22
BrainConceptual: 52
bb_pedromartins: 200
DinoFiorita: 199
OVESCO: 200
Saracris95: 199
WOETUK: 23
agabardo: 199
MariaBejaCosta1: 157
aymeric_vie: 200
2dipicchee: 192
KimMarln: 200
brjmoez: 200
kurganskayapos: 27
ale_marichela: 200
Transitionway: 200
LyndaMansson: 199
amsterdam_bil

''
the_young_fdn: 200
Thierry_Kame: 199
ey0758: 0
''
MikeOConnor3: 200
avogli: 0
''
edzaizvobwo: 184
DELPHIBG: 200
hamzaz: 190
sees_lab: 200
RebuildMacro: 200
flaskn6: 200
LucieKirstein: 15
KoljaKleineberg: 200
Paorazio: 200
gary6115: 200
MichaelRFerrari: 200
DeboyJames21: 185
NickJackson81: 73
LetUsShineNow: 200
multiplex_fet: 11
ferminserrano: 200
complex_warsaw: 113
ricardo11701135: 23
bergautier: 199
LostInMath: 48
JBPKGSolutions: 83
kostasdimou5: 0
[]
veronika_st5: 68
AnneMur16630871: 200
emacampiglio: 199
Easyimpresa1: 17
Better_Finance: 200
LeanneUssher: 43
renesugar: 200
AsaBentley: 200
enzomanzo1: 41
yuliay: 199
ayazarka88: 199
MartinaOccelli: 117
goodhartlaw: 199
panchulzz: 0
[]
netsci2017: 200
bethscleft: 69
resume_unilu: 200
TagederSonne: 200
JonathanEcon: 6
marcoderrico: 0
''
TMWFTE: 200
CigeuInfo: 0
''
Alex_GuerreroR: 200
GiachiniDaniele: 110
FETFX_EU: 200
LuisaDeAmicis: 200
brianla1: 196
FabriceSabatier: 99
__jasmy: 4
[{'lang': 'en',
  'text': 'RT @SimPolProject: How can

bastidas_d: 195
DougThorpe_com: 200
TiberiuPetre: 0
''
flipparini: 200
dataforpolicy: 200
giacomolivan: 153
mennoliauw: 4
[{'lang': 'nl',
  'text': 'RT @EricHolterhues: In Amsterdam creëerden @pjotrdejong &amp; '
          '@mennoliauw   @vandejong Springhouse gefinancierd door Triodos '
          'Cultuurfonds  https://t.co/DPPrlswkPC'},
 {'lang': 'en',
  'text': 'RT @DSMeu: Want to make the most out of online engagement? Check '
          'out the EU-supported project @mymeedia http://t.co/n3CGzilCqF '
          '#h2020 #socialmedia'},
 {'lang': 'nl',
  'text': '@rvermin @mennoliauw De aanhouder wint! Vanavond was relaunch WR!! '
          'Nu meer bottom up benadering.'},
 {'lang': 'nl',
  'text': 'RT @VanVelzenMusic: Kan de kracht van muziek bijdragen aan vrede? '
          'Kijk morgen naar #MusicAboveFighting onze reis voor Masterpeace '
          'naar Congo! Om 21u op Veronica'}]
helloplanetmars: 200
aebiondo: 106
Monithon: 200
CherryEnergy1: 199
PEP4UM: 200
SimPolProject: 200


### 4. Extracting and curating information from each follower's tweets

In [15]:
def has_info(data):
    for k,v in data.items():
        if v: return True
    return False
    
def curate_user(auser, curator, kw = True):
    fname = './output/tweets/' + auser +'.pickle'
    with open(fname, 'rb') as handle:
        tweets = pickle.load(handle)
    n = len(tweets)
    print('{}: {}'.format(auser,n))
    curated = curator.extract_info(tweets, kw)
    curated = [c for c in curated if has_info(c)]
    return n, curated

def curate_followers(followers, kw = True):
    """It proccesses and analyzes each follower's each tweets one by one.
    
    Note:
        Examining tweets one by one specifically to clean, tokenize and standardize and
        extract terms is relatively costly in terms of time.
    
    Args:
        followers (:obj:`set` of  :obj:`str`): A set of twitter account's screen names.
        kw (:obj:`bool`): When True extract terms of each tweets (default True).
    
    Returns:
        (:obj:`dict`): A JSON like collection of curated data.
            Each entry in the collection represents a curated twitter data.
    
    """
    curated = dict()
    mT = Tweets()
    for f in followers:
        n, data = curate_user(f, mT, kw)
        curated[f] = {'ntweets':n,'data':data}
    ofnamebase = './output/simpolproject_curated_new'
    fpickle = ofnamebase + '.pickle'
    fjson = ofnamebase + '.json'
    with open(fpickle, 'wb') as handle:
        pickle.dump(curated, handle, protocol=pickle.HIGHEST_PROTOCOL)
    #with open(fjson, 'w') as handle: json.dump(curated, handle)
    return curated

#### 4.1 Demo of curating a specific twitter account
Note that only 'mentions' and 'rt_from' fields are listed below.

In [16]:
mT = Tweets()
acurated_account = curate_user('zbattiz', mT, kw=True)

The tweet extractor class is initiated with 0 tweets
zbattiz: 200
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed sym

Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extract

Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 16
Cleaning process: Initial size of tokens = 16
Reduction due to punctuations and stopwords = 7.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 7
Percentage = 44%
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Re

Percentage = 33%
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 33
Cleaning process: Initial size of tokens = 33
Reduction due to punctuations and stopwords = 16.
Reduction due to all numeral terms = 1
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 17
Percentage = 52%
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 27
Cleaning process: Initial size of tokens = 27
Reduction due to punctuations and stopwords = 6.
Reduction due to all numeral terms = 1
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The to

Done. Number of terms: 20
Cleaning process: Initial size of tokens = 20
Reduction due to punctuations and stopwords = 7.
Reduction due to all numeral terms = 1
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 8
Percentage = 40%
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the te

Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 17
Cleaning process: Initial size of tokens = 17
Reduction due to punctuations and stopwords = 5.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partiall

Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing 

Percentage = 65%
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 10
Cleaning process: Initial size of tokens = 10
Reduction due to punctuations and stopwords = 3.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 1
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 4
Percentage = 40%
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 15
Cleaning process: Initial size of tokens = 15
Reduction due to punctuations and stopwords = 4.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 1
Reduction due to terms with not allowed symbols = 0
The tota

Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.
Configuring the text cleaner ...
A single text is provided.
Extract

Extracting the terms ...
Tokenizing the input text ..
Done. Number of terms: 0
Cleaning process: Initial size of tokens = 0
Reduction due to punctuations and stopwords = 0.
Reduction due to all numeral terms = 0
Reduction due to short terms = 0
Reduction due to rare terms = 0
Reduction due to partially numeral terms = 0
Reduction due to terms with not allowed symbols = 0
The total term count reduction during this cleaning process = 0
COMPLETED.


In [17]:
type(acurated_account)

tuple

##### Number of tweets found and curated:

In [18]:
acurated_account[0]

200

In [19]:
len(acurated_account[1])

200

##### Fields of curated data

In [20]:
pp.pprint(acurated_account[1][42])

{'hashtags': {'networks'},
 'keywords': {'dont',
              'double',
              'economics',
              'everything',
              'friday',
              'miss',
              'networks',
              'talk',
              'venice'},
 'mentions': {'CaFoscari', 'GuidoCaldarelli'},
 'rt': False,
 'rt_from': set(),
 'text': 'This Friday don‘t miss this double talk on „why #networks in '
         'economics and everything“ at Venice'}


In [21]:
pp.pprint([{'Mentions':d['mentions'], 'Retweeted from':d['rt_from']} for d in acurated_account[1]])

[{'Mentions': set(), 'Retweeted from': {'vcolizza'}},
 {'Mentions': set(), 'Retweeted from': {'jkbren'}},
 {'Mentions': {'alainbarrat', 'netsci2018', 'NetSciPhDs', 'vcolizza', 'jkbren'},
  'Retweeted from': set()},
 {'Mentions': set(), 'Retweeted from': {'forfinancewatch'}},
 {'Mentions': set(), 'Retweeted from': {'ISIgrowth'}},
 {'Mentions': set(), 'Retweeted from': {'Transitionway'}},
 {'Mentions': set(), 'Retweeted from': {'Transitionway'}},
 {'Mentions': set(), 'Retweeted from': {'NetSciPhDs'}},
 {'Mentions': {'CEP',
               'Frank_vanlerven',
               'NEF',
               'Pierre_Monnin',
               'PositiveMoneyEU',
               'PositiveMoneyUK',
               'Sharpe_Actuary',
               'Transitionway',
               'forfinancewatch',
               'jryancollins',
               'lesliehook'},
  'Retweeted from': set()},
 {'Mentions': {'CEP',
               'Frank_vanlerven',
               'NEF',
               'Pierre_Monnin',
               'Pos

#### 4.2 Curating the tweets of each follower

In [22]:
# if keyword extraction is needed set kw to True
#curated = curate_followers(followers, kw = False)

#### Load from preveiously curated data.

In [23]:
fname = './output/simpolproject_curated_new.pickle'
with open(fname, 'rb') as handle:
        curated = pickle.load(handle)

In [24]:
#pp.pprint(curated)

In [25]:
curated['bulentozel'].keys()

dict_keys(['ntweets', 'data'])

In [26]:
curated['bulentozel']['ntweets'], len(curated['bulentozel']['data'])

(81, 80)

In [27]:
pp.pprint(curated['bulentozel']['data'])

[{'hashtags': set(),
  'keywords': set(),
  'mentions': set(),
  'rt': False,
  'rt_from': set(),
  'text': 'make your own smart phone.'},
 {'hashtags': {'S'},
  'keywords': set(),
  'mentions': set(),
  'rt': False,
  'rt_from': set(),
  'text': '#SözBarışın'},
 {'hashtags': {'paradisepapers'},
  'keywords': set(),
  'mentions': set(),
  'rt': False,
  'rt_from': set(),
  'text': 'After #paradisepapers, thousands are signing this open letter to '
          'shut down these insane tax havens:'},
 {'hashtags': set(),
  'keywords': set(),
  'mentions': set(),
  'rt': False,
  'rt_from': set(),
  'text': "Until we manage to avoid those tera-scale thefts there won't be any "
          'political stability anywhere on this planet:'},
 {'hashtags': set(),
  'keywords': set(),
  'mentions': set(),
  'rt': False,
  'rt_from': set(),
  'text': 'Zizek a true zeitgeist discovers machine learning and free software '
          'movement.'},
 {'hashtags': set(),
  'keywords': set(),
  'mentions': {'

### 5. Analysis

In [28]:
def count_links(connections):
    co = dict()
    for alist in connections:
        for a in alist:
            if a in co.keys():
                co[a] += 1
            else:
                co[a] = 1
    return co

def get_popularity(net):
    popularity = dict()
    for nodes in net.values():
        for node,weight in nodes.items():
            if node in popularity.keys():
                popularity[node] += weight
            else:
                popularity[node] = weight
    return popularity

def get_rt_ratios(curated):
    rt_ratios = dict()
    for a in curated.keys():
        nt = curated[a]['ntweets']
        nrt = sum([1 for d in curated[a]['data'] if d['rt']])
        ratio = nrt/nt if  nt else 0
        rt_ratios[a] = {'nTweet':nt,
                        'nRTweet':nrt,
                        'rDiffusion':ratio}
    return rt_ratios


def extract_relations(curated, linktype='mentions'):
    relations = dict()
    for a in curated.keys():
        links = [d[linktype] for d in curated[a]['data'] if d[linktype]]
        relations[a] = count_links(links)
    return relations

def select_subnet(net, nodes, isin = True):
    if isin:
        return {n:{m:w for m,w in links.items() if m in nodes} for n,links in net.items() if n in nodes}
    else:
        return {n:{m:w for m,w in links.items() if m not in nodes} for n,links in net.items() if n in nodes}
    
def drop_isolates(net):
    net = {n:{m:w for m,w in links.items() if m != n} for n,links in net.items()}
    isolates = [n for n,w in net.items() if not w]
    nonisolates = {n:w for n,w in net.items() if w}
    return nonisolates, isolates

def get_cohesion_ratios(net, nodes):
    cohesion = dict()
    for node,links in net.items():
        internal = sum([w for n,w in links.items() if n in nodes])
        external = sum([w for n,w in links.items() if n  not in nodes])
        total = internal + external
        cohesion[node] = {'in': internal,
                          'out': external,
                          'ratio': internal / total if total else 0
                         }
    return cohesion

#### 5.1 Analyzing retweet stats
 - **nTweet** : Number of tweet from the follower were available (up to latest 200 was possible.)
 - **nRTweet** : Number of retweets of those retrieved tweets.
 - **RTweetDiffusion** : A simple information propagation measure. Here it is the ratio of the retweets to the tweets. *(1 - RTweetDiffusion)* could be interpreted as a information sourcing/originating behavioral measure.

In [29]:
RT_Stats = get_rt_ratios(curated)
pp.pprint(RT_Stats)

{'0ttamatt': {'nRTweet': 68, 'nTweet': 73, 'rDiffusion': 0.9315068493150684},
 '2dipicchee': {'nRTweet': 156, 'nTweet': 192, 'rDiffusion': 0.8125},
 '4A50CqwsQqeahlq': {'nRTweet': 0, 'nTweet': 0, 'rDiffusion': 0},
 '7softley': {'nRTweet': 161, 'nTweet': 200, 'rDiffusion': 0.805},
 '7wData': {'nRTweet': 132, 'nTweet': 200, 'rDiffusion': 0.66},
 '85Fifteen': {'nRTweet': 24, 'nTweet': 200, 'rDiffusion': 0.12},
 'AEBIOM': {'nRTweet': 119, 'nTweet': 200, 'rDiffusion': 0.595},
 'APRC_CF': {'nRTweet': 100, 'nTweet': 200, 'rDiffusion': 0.5},
 'ARNAOYTHSD': {'nRTweet': 0, 'nTweet': 200, 'rDiffusion': 0.0},
 'AdaFerko': {'nRTweet': 56, 'nTweet': 200, 'rDiffusion': 0.28},
 'AdamPC26': {'nRTweet': 107, 'nTweet': 199, 'rDiffusion': 0.5376884422110553},
 'Adeline_story': {'nRTweet': 0, 'nTweet': 0, 'rDiffusion': 0},
 'Adellibya1979': {'nRTweet': 123,
                   'nTweet': 142,
                   'rDiffusion': 0.8661971830985915},
 'AdolfSanchez5': {'nRTweet': 153, 'nTweet': 200, 'rDiffusion':

 'bplisse': {'nRTweet': 71, 'nTweet': 81, 'rDiffusion': 0.8765432098765432},
 'brianehenyo': {'nRTweet': 55, 'nTweet': 200, 'rDiffusion': 0.275},
 'brianla1': {'nRTweet': 112, 'nTweet': 196, 'rDiffusion': 0.5714285714285714},
 'brjmoez': {'nRTweet': 145, 'nTweet': 200, 'rDiffusion': 0.725},
 'bulentozel': {'nRTweet': 8, 'nTweet': 81, 'rDiffusion': 0.09876543209876543},
 'burrolahola': {'nRTweet': 89, 'nTweet': 200, 'rDiffusion': 0.445},
 'camilleoudinot': {'nRTweet': 30, 'nTweet': 200, 'rDiffusion': 0.15},
 'carinacarinas': {'nRTweet': 149, 'nTweet': 200, 'rDiffusion': 0.745},
 'carnadani': {'nRTweet': 82, 'nTweet': 82, 'rDiffusion': 1.0},
 'cbbagsic': {'nRTweet': 178, 'nTweet': 200, 'rDiffusion': 0.89},
 'chrizau': {'nRTweet': 42, 'nTweet': 200, 'rDiffusion': 0.21},
 'ciro': {'nRTweet': 172, 'nTweet': 200, 'rDiffusion': 0.86},
 'claudia_mihai': {'nRTweet': 39,
                   'nTweet': 197,
                   'rDiffusion': 0.19796954314720813},
 'complex_warsaw': {'nRTweet': 27,
  

#### 5.2 Constructing and analyzing: who-retweets-whom network

- **RTPopularity** : Of all harvested tweets how often the tweets from the follower is retweeted by the other followers.
- **RTInternal** : Of all the retweets by the follower, the number of times the follower is retweeting the tweet of another followers from the community. 
- **RTExternal** : Of all the retweets by the follower, the number of times the follower is retweeting a tweet from a non-follower user.
- **RTCohesion** : The ratio of the internal tweets to the total retweets. The measure can be used as a proxy on 'to what extend the community under investigation is within the focus of the user?'. A cumulative statistics over all the followers may give an idea on the cohesion between followers.


In [30]:
RT_Network = extract_relations(curated, linktype = 'rt_from')
RT_Network_Cohesion = get_cohesion_ratios(RT_Network, followers)
RT_Network_Internal = select_subnet(RT_Network, followers)
RT_Network_Internal_Connected, RT_Network_Internal_Isolates = drop_isolates(RT_Network_Internal)

In [31]:
pp.pprint(RT_Network_Cohesion)

{'0ttamatt': {'in': 31, 'out': 37, 'ratio': 0.45588235294117646},
 '2dipicchee': {'in': 82, 'out': 74, 'ratio': 0.5256410256410257},
 '4A50CqwsQqeahlq': {'in': 0, 'out': 0, 'ratio': 0},
 '7softley': {'in': 0, 'out': 161, 'ratio': 0.0},
 '7wData': {'in': 0, 'out': 132, 'ratio': 0.0},
 '85Fifteen': {'in': 0, 'out': 24, 'ratio': 0.0},
 'AEBIOM': {'in': 1, 'out': 118, 'ratio': 0.008403361344537815},
 'APRC_CF': {'in': 4, 'out': 96, 'ratio': 0.04},
 'ARNAOYTHSD': {'in': 0, 'out': 0, 'ratio': 0},
 'AdaFerko': {'in': 0, 'out': 56, 'ratio': 0.0},
 'AdamPC26': {'in': 0, 'out': 107, 'ratio': 0.0},
 'Adeline_story': {'in': 0, 'out': 0, 'ratio': 0},
 'Adellibya1979': {'in': 0, 'out': 123, 'ratio': 0.0},
 'AdolfSanchez5': {'in': 0, 'out': 153, 'ratio': 0.0},
 'Adoufrancois': {'in': 0, 'out': 0, 'ratio': 0},
 'Agent_Jase': {'in': 2, 'out': 43, 'ratio': 0.044444444444444446},
 'Agn_ssa': {'in': 77, 'out': 30, 'ratio': 0.719626168224299},
 'AlanGIsaac': {'in': 1, 'out': 45, 'ratio': 0.0217391304347826

 'nachoenblanco': {'in': 2, 'out': 59, 'ratio': 0.03278688524590164},
 'nalsrarekli': {'in': 0, 'out': 161, 'ratio': 0.0},
 'nanumyan': {'in': 3, 'out': 79, 'ratio': 0.036585365853658534},
 'netizait': {'in': 0, 'out': 0, 'ratio': 0},
 'netsci15': {'in': 26, 'out': 174, 'ratio': 0.13},
 'netsci2017': {'in': 9, 'out': 95, 'ratio': 0.08653846153846154},
 'ngominhthanh4': {'in': 0, 'out': 137, 'ratio': 0.0},
 'nicolerycroft1': {'in': 0, 'out': 37, 'ratio': 0.0},
 'nyaghouti': {'in': 0, 'out': 0, 'ratio': 0},
 'oddsku007': {'in': 31, 'out': 0, 'ratio': 1.0},
 'oikonomopoulo14': {'in': 6, 'out': 11, 'ratio': 0.35294117647058826},
 'oil_products': {'in': 0, 'out': 12, 'ratio': 0.0},
 'olivierhoedeman': {'in': 2, 'out': 197, 'ratio': 0.010050251256281407},
 'openmaker': {'in': 16, 'out': 107, 'ratio': 0.13008130081300814},
 'openp2pdesign': {'in': 4, 'out': 172, 'ratio': 0.022727272727272728},
 'orbassano2000': {'in': 0, 'out': 171, 'ratio': 0.0},
 'oskayipek': {'in': 0, 'out': 159, 'ratio': 

In [32]:
pp.pprint(RT_Network_Internal_Connected)

{'0ttamatt': {'AndreaRoventini': 3,
              'GuidoCaldarelli': 6,
              'ISIgrowth': 1,
              'Pierre_Monnin': 1,
              'SimPolProject': 10,
              'VlPetrovV': 1,
              'ciro': 1,
              'cosnet_bifi': 1,
              'fabiofxvanni': 2,
              'forfinancewatch': 1,
              'mnapoletano1': 1,
              'zbattiz': 3},
 '2dipicchee': {'Agn_ssa': 3,
                'Beyond_Ratings': 1,
                'FinexusCenter': 1,
                'SimPolProject': 60,
                'Transitionway': 1,
                'VlPetrovV': 4,
                'cosnet_bifi': 1,
                'fabiofxvanni': 1,
                'fet_eu': 1,
                'lilitpopoyan': 1,
                'vtchakarova': 1,
                'zbattiz': 7},
 'APRC_CF': {'aparpau': 4},
 'Agn_ssa': {'SimPolProject': 72,
             'VlPetrovV': 1,
             'lilitpopoyan': 1,
             'zbattiz': 3},
 'AlanGIsaac': {'cosnet_bifi': 1},
 'AlessandroLaced':

 'Pierre_Monnin': {'EcologicalEcon': 1,
                   'Lsuttors': 2,
                   'MacroPru': 1,
                   'Transitionway': 5,
                   'emacampiglio': 2,
                   'forfinancewatch': 3,
                   'joostmulder': 1},
 'PolicyCompassEU': {'7wData': 1, 'fet_eu': 1},
 'RFIFoundation': {'Climate_Friend': 1, 'UNEP_FI': 1},
 'RebuildMacro': {'GECoalition': 2,
                  'MacroPru': 1,
                  'gbracci_des': 1,
                  'stephenkinsella': 5},
 'Registry_Blocks': {'RogervEijk': 1},
 'RobPasqualino': {'Transitionway': 1, 'zbattiz': 2},
 'RockJimford': {'MazzucatoM': 1},
 'RogerCremades': {'MazzucatoM': 1,
                   'Pierre_Monnin': 3,
                   'Transitionway': 7,
                   'adilson_motter': 1,
                   'firth_john': 1,
                   'zbattiz': 2},
 'RogervEijk': {'Registry_Blocks': 10},
 'RothcoPepp': {'MazzucatoM': 1, 'zbattiz': 1},
 'SICommunity_EU': {'Gianlucamisu': 3, 'monican

In [33]:
print(len(RT_Network_Internal), len(RT_Network_Internal_Connected), len(RT_Network_Internal_Isolates))

683 246 437


In [34]:
RT_Popularity = get_popularity(RT_Network)
RT_Popularity['zbattiz'], RT_Popularity['TheEconomist'], RT_Popularity['SimPolProject'], 

(142, 94, 1262)

#### 5.3 Constructing and analyzing: who-mentions-whom network

- **MentionPopularity** :  Of the all harvested tweets how often the follower is mentioned by the other followers.
- **MentionInternal** : Of all the mentions by the follower the number of times the follower is mentioning the other followers of the community members. 
- **MentionExternal** : Of all the mentions by the follower the number of times the follower is mentioning the non-followers of the the community members.
- **MentionCohesion** : The ratio of the internal mentions to the total mentions. The measure can be used as a proxy on 'to what extend the community under investigation is within the focus of the user. A cumulative statistics over all the followers may give an idea on the cohesion between followers.

In [35]:
Mention_Network = extract_relations(curated, linktype = 'mentions')
Mention_Network_Cohesion = get_cohesion_ratios(Mention_Network, followers)
Mention_Network_Internal = select_subnet(Mention_Network, followers)
Mention_Network_Internal_Connected, Mention_Network_Internal_Isolates = drop_isolates(Mention_Network_Internal)

In [36]:
pp.pprint(Mention_Network_Cohesion)

{'0ttamatt': {'in': 7, 'out': 6, 'ratio': 0.5384615384615384},
 '2dipicchee': {'in': 16, 'out': 37, 'ratio': 0.3018867924528302},
 '4A50CqwsQqeahlq': {'in': 0, 'out': 0, 'ratio': 0},
 '7softley': {'in': 1, 'out': 19, 'ratio': 0.05},
 '7wData': {'in': 0, 'out': 18, 'ratio': 0.0},
 '85Fifteen': {'in': 0, 'out': 83, 'ratio': 0.0},
 'AEBIOM': {'in': 7, 'out': 66, 'ratio': 0.0958904109589041},
 'APRC_CF': {'in': 8, 'out': 42, 'ratio': 0.16},
 'ARNAOYTHSD': {'in': 0, 'out': 4, 'ratio': 0.0},
 'AdaFerko': {'in': 0, 'out': 94, 'ratio': 0.0},
 'AdamPC26': {'in': 0, 'out': 52, 'ratio': 0.0},
 'Adeline_story': {'in': 0, 'out': 0, 'ratio': 0},
 'Adellibya1979': {'in': 0, 'out': 16, 'ratio': 0.0},
 'AdolfSanchez5': {'in': 0, 'out': 72, 'ratio': 0.0},
 'Adoufrancois': {'in': 0, 'out': 0, 'ratio': 0},
 'Agent_Jase': {'in': 0, 'out': 120, 'ratio': 0.0},
 'Agn_ssa': {'in': 19, 'out': 34, 'ratio': 0.3584905660377358},
 'AlanGIsaac': {'in': 0, 'out': 117, 'ratio': 0.0},
 'AlbertoMunoz': {'in': 0, 'out': 

 'SunilMalhotra': {'in': 1, 'out': 284, 'ratio': 0.0035087719298245615},
 'SusFinLab': {'in': 1, 'out': 27, 'ratio': 0.03571428571428571},
 'SusieMod88': {'in': 0, 'out': 655, 'ratio': 0.0},
 'SvyatoslavHrync': {'in': 0, 'out': 23, 'ratio': 0.0},
 'SwissCognitive': {'in': 0, 'out': 36, 'ratio': 0.0},
 'TMWFTE': {'in': 1, 'out': 198, 'ratio': 0.005025125628140704},
 'TSERETELI_0102': {'in': 0, 'out': 4, 'ratio': 0.0},
 'TagederSonne': {'in': 0, 'out': 22, 'ratio': 0.0},
 'TehRaio': {'in': 0, 'out': 118, 'ratio': 0.0},
 'TheRealShankar': {'in': 0, 'out': 17, 'ratio': 0.0},
 'Thierry_Kame': {'in': 0, 'out': 0, 'ratio': 0},
 'TiberiuPetre': {'in': 0, 'out': 0, 'ratio': 0},
 'TipGhosh': {'in': 0, 'out': 11, 'ratio': 0.0},
 'TomasBalint': {'in': 0, 'out': 33, 'ratio': 0.0},
 'TorreyShine': {'in': 0, 'out': 19, 'ratio': 0.0},
 'Transitionway': {'in': 197, 'out': 140, 'ratio': 0.5845697329376854},
 'UB_ICS': {'in': 17, 'out': 121, 'ratio': 0.12318840579710146},
 'UNEP_FI': {'in': 86, 'out': 14

In [37]:
pp.pprint(Mention_Network_Internal_Connected)

{'0ttamatt': {'FinexusCenter': 1,
              'Gius_C': 1,
              'GuidoCaldarelli': 3,
              'SimPolProject': 1,
              'fabiofxvanni': 1},
 '2dipicchee': {'FinexusCenter': 1,
                'GuidoCaldarelli': 1,
                'SimPolProject': 5,
                'fet_eu': 1,
                'forfinancewatch': 3,
                'marcobardoscia': 1,
                'marcoderrico': 1,
                'zbattiz': 3},
 '7softley': {'anxosan': 1},
 'Agn_ssa': {'2dipicchee': 1,
             'SimPolProject': 13,
             '_ChiaraPerillo': 2,
             'forfinancewatch': 1,
             'veronika_st5': 1,
             'zbattiz': 1},
 'AlessandroLaced': {'gbracci_des': 2},
 'AndreaRoventini': {'ISIgrowth': 19,
                     'Mattia_Guerini': 1,
                     'SimPolProject': 3,
                     'Transitionway': 1,
                     'arthurturrell': 1,
                     'mnapoletano1': 1,
                     'zbattiz': 1},
 'AsaBentley':

                   'FETFX_EU': 1,
                   'FETFlagships': 2,
                   'FinexusCenter': 4,
                   'GiachiniDaniele': 1,
                   'GuidoCaldarelli': 4,
                   'IASS_DOLFINS': 3,
                   'MacroPru': 1,
                   'Mattia_Guerini': 1,
                   'NickBeglinger': 3,
                   'PaoloBarucca': 2,
                   'Transitionway': 6,
                   '_ChiaraPerillo': 1,
                   'bitlumens': 2,
                   'bplisse': 1,
                   'cosnet_bifi': 2,
                   'fet_eu': 2,
                   'filippoaddarii': 2,
                   'forfinancewatch': 2,
                   'hamzaz': 3,
                   'lilitpopoyan': 1,
                   'mnapoletano1': 5,
                   'plusvaluetweets': 2,
                   'the_young_fdn': 2,
                   'uzh_bf': 2,
                   'veronika_st5': 1,
                   'vtchakarova': 1,
                   'zbatti

In [38]:
print(len(Mention_Network_Internal), len(Mention_Network_Internal_Connected), len(Mention_Network_Internal_Isolates))

683 159 524


In [39]:
Mention_Popularity = get_popularity(Mention_Network)
Mention_Popularity['zbattiz'], Mention_Popularity['TheEconomist'], Mention_Popularity['SimPolProject'], 

(195, 59, 176)

#### 5.4 Identifying important external nodes


##### Cehcking the source of retweets

In [40]:
RT_Network_External = select_subnet(RT_Network, followers, isin = False)

In [41]:
pp.pprint(RT_Network_External['zbattiz'])

{'BenLallemand': 4,
 'CEPweb': 2,
 'CampanaleMark': 1,
 'Frank_vanlerven': 1,
 'GaelGiraud_AFD': 1,
 'IngridHolmes7': 1,
 'KR_MachineLearn': 1,
 'LiveSpinoza': 1,
 'NVJRobins1': 3,
 'NetSciPhDs': 2,
 'SPRU': 1,
 'SriEvent': 3,
 'TheEconomist': 1,
 'VDombrovskis': 1,
 'alexvespi': 2,
 'andytuit': 1,
 'bbuchner13': 1,
 'eaepe': 3,
 'ecfin': 1,
 'ivcubic': 1,
 'jkbren': 1,
 'jryancollins': 1,
 'netsci2018': 1,
 'vcolizza': 1}


In [42]:
pp.pprint(RT_Network_Internal['zbattiz'])

{'EcologicalEcon': 1,
 'FinexusCenter': 1,
 'GuidoCaldarelli': 3,
 'ISIgrowth': 4,
 'LindaBach14': 1,
 'Pierre_Monnin': 3,
 'RobPasqualino': 1,
 'SimPolProject': 10,
 'Transitionway': 32,
 'UNEP_FI': 1,
 'adilson_motter': 1,
 'bitlumens': 1,
 'bplisse': 1,
 'cosnet_bifi': 3,
 'emacampiglio': 2,
 'fabiofxvanni': 1,
 'filippoaddarii': 2,
 'forfinancewatch': 4,
 'mnapoletano1': 7,
 'monicanagore': 3,
 'plusvaluetweets': 3,
 'uzh_bf': 2,
 'veronika_st5': 1,
 'zbattiz': 6}


In [43]:
RT_Popularity_External = get_popularity(RT_Network_External)

In [44]:
df_external_RT = pd.DataFrame.from_dict(RT_Popularity_External, orient='index')
df_external_RT.columns = ['Count']
df_external_RT.head(10)

Unnamed: 0,Count
OnTheHub,4
CBSNews,4
mcspocky,2
Gokuss7Gamer,1
review_wall,1
vexxhost,1
aaronvick,1
ElixiumCrypto,1
IsaiahGmusic,1
Digijoe78,1


In [45]:
df_external_RT = df_external_RT.sort_values(by = 'Count', ascending = False)
df_external_RT.head(10)

Unnamed: 0,Count
alexvespi,281
wef,125
zerohedge,105
TheEconomist,94
Team4Nature300,88
LaNet_2017,85
DSMeu,84
stevenstrogatz,83
net_science,83
EU_Commission,79


##### Cehcking the mentions of the community members

In [46]:
M_Network_External = select_subnet(Mention_Network, followers, isin = False)
M_Popularity_External = get_popularity(M_Network_External)
df_external_M = pd.DataFrame.from_dict(M_Popularity_External, orient='index')
df_external_M.columns = ['Count']
df_external_M.head()
df_external_M = df_external_M.sort_values(by = 'Count', ascending = False)
df_external_M.head(11)

Unnamed: 0,Count
YouTube,1048
_ERAFP_,173
,166
EU_Commission,142
Frances_Coppola,139
FT,133
rightrelevance,129
jeuasommenulle,128
GrkStav,128
elchaposing,128


### 6. Exporting the summary of results

In [47]:
Summary = dict()
for n in followers:
    PopM,PopRT,nT,nRT,rDiff = 0,0,0,0,0
    if n in Mention_Popularity.keys():
        PopM = Mention_Popularity[n]
    if n in RT_Popularity.keys():
        PopRT = RT_Popularity[n]
    if n in RT_Stats.keys():
        nT = RT_Stats[n]['nTweet']
        nRT = RT_Stats[n]['nRTweet']
        rDiff = RT_Stats[n]['rDiffusion']
    inM, outM, rM, inRT, outRT, rRT = 0,0,0,0,0,0 
    if n in Mention_Network_Cohesion.keys():
        inM = Mention_Network_Cohesion[n]['in']
        outM = Mention_Network_Cohesion[n]['out']
        rM = Mention_Network_Cohesion[n]['ratio']
    if n in RT_Network_Cohesion.keys():
        inRT = RT_Network_Cohesion[n]['in']
        outRT = RT_Network_Cohesion[n]['out']
        rRT = RT_Network_Cohesion[n]['ratio']
    Summary[n] = {
        'MentionPopularity':PopM,
        'RTPopularity':PopRT,
        'RTweetDiffusion':rDiff,
        'nTweet':nT,
        'nRTweet':nRT,
        'MentionInternal': inM,
        'MentionExternal': outM,
        'MentionCohesion': rM,
        'RTInternal': inRT,
        'RTExternal': outRT,
        'RTCohesion': rRT}
   

In [48]:
pp.pprint(Summary)

{'0ttamatt': {'MentionCohesion': 0.5384615384615384,
              'MentionExternal': 6,
              'MentionInternal': 7,
              'MentionPopularity': 0,
              'RTCohesion': 0.45588235294117646,
              'RTExternal': 37,
              'RTInternal': 31,
              'RTPopularity': 1,
              'RTweetDiffusion': 0.9315068493150684,
              'nRTweet': 68,
              'nTweet': 73},
 '2dipicchee': {'MentionCohesion': 0.3018867924528302,
                'MentionExternal': 37,
                'MentionInternal': 16,
                'MentionPopularity': 3,
                'RTCohesion': 0.5256410256410257,
                'RTExternal': 74,
                'RTInternal': 82,
                'RTPopularity': 4,
                'RTweetDiffusion': 0.8125,
                'nRTweet': 156,
                'nTweet': 192},
 '4A50CqwsQqeahlq': {'MentionCohesion': 0,
                     'MentionExternal': 0,
                     'MentionInternal': 0,
                  

                   'nRTweet': 88,
                   'nTweet': 199},
 'ChrisGGeorgas': {'MentionCohesion': 0.0,
                   'MentionExternal': 200,
                   'MentionInternal': 0,
                   'MentionPopularity': 0,
                   'RTCohesion': 0,
                   'RTExternal': 0,
                   'RTInternal': 0,
                   'RTPopularity': 0,
                   'RTweetDiffusion': 0.0,
                   'nRTweet': 0,
                   'nTweet': 200},
 'ChrisHudgins5': {'MentionCohesion': 0.0,
                   'MentionExternal': 24,
                   'MentionInternal': 0,
                   'MentionPopularity': 0,
                   'RTCohesion': 0.005747126436781609,
                   'RTExternal': 173,
                   'RTInternal': 1,
                   'RTPopularity': 1,
                   'RTweetDiffusion': 0.90625,
                   'nRTweet': 174,
                   'nTweet': 192},
 'ChristosEllina1': {'MentionCohesion': 0.056818181

                   'RTExternal': 0,
                   'RTInternal': 0,
                   'RTPopularity': 0,
                   'RTweetDiffusion': 0.0,
                   'nRTweet': 0,
                   'nTweet': 1},
 'KoljaKleineberg': {'MentionCohesion': 0.0,
                     'MentionExternal': 24,
                     'MentionInternal': 0,
                     'MentionPopularity': 4,
                     'RTCohesion': 0.0379746835443038,
                     'RTExternal': 152,
                     'RTInternal': 6,
                     'RTPopularity': 7,
                     'RTweetDiffusion': 0.79,
                     'nRTweet': 158,
                     'nTweet': 200},
 'KushyKush': {'MentionCohesion': 0.0,
               'MentionExternal': 120,
               'MentionInternal': 0,
               'MentionPopularity': 2,
               'RTCohesion': 0.030303030303030304,
               'RTExternal': 32,
               'RTInternal': 1,
               'RTPopularity': 6,
       

                 'nTweet': 200},
 'RogerCremades': {'MentionCohesion': 0.0,
                   'MentionExternal': 39,
                   'MentionInternal': 0,
                   'MentionPopularity': 1,
                   'RTCohesion': 0.08695652173913043,
                   'RTExternal': 168,
                   'RTInternal': 16,
                   'RTPopularity': 3,
                   'RTweetDiffusion': 0.92,
                   'nRTweet': 184,
                   'nTweet': 200},
 'RogervEijk': {'MentionCohesion': 0.0,
                'MentionExternal': 82,
                'MentionInternal': 0,
                'MentionPopularity': 0,
                'RTCohesion': 0.6666666666666666,
                'RTExternal': 5,
                'RTInternal': 10,
                'RTPopularity': 1,
                'RTweetDiffusion': 0.12605042016806722,
                'nRTweet': 15,
                'nTweet': 119},
 'RothcoPepp': {'MentionCohesion': 0.06832298136645963,
                'MentionExternal'

              'nTweet': 199},
 'ailatan1898': {'MentionCohesion': 0.0,
                 'MentionExternal': 11,
                 'MentionInternal': 0,
                 'MentionPopularity': 0,
                 'RTCohesion': 0.0,
                 'RTExternal': 50,
                 'RTInternal': 0,
                 'RTPopularity': 0,
                 'RTweetDiffusion': 0.7246376811594203,
                 'nRTweet': 50,
                 'nTweet': 69},
 'ale_marichela': {'MentionCohesion': 0.0,
                   'MentionExternal': 156,
                   'MentionInternal': 0,
                   'MentionPopularity': 0,
                   'RTCohesion': 0.0,
                   'RTExternal': 114,
                   'RTInternal': 0,
                   'RTPopularity': 0,
                   'RTweetDiffusion': 0.57,
                   'nRTweet': 114,
                   'nTweet': 200},
 'aleaf_h2020': {'MentionCohesion': 0.1411042944785276,
                 'MentionExternal': 140,
                 

             'RTPopularity': 5,
             'RTweetDiffusion': 0.975,
             'nRTweet': 195,
             'nTweet': 200},
 'eleni_choidas': {'MentionCohesion': 0.07142857142857142,
                   'MentionExternal': 52,
                   'MentionInternal': 4,
                   'MentionPopularity': 0,
                   'RTCohesion': 0.046153846153846156,
                   'RTExternal': 124,
                   'RTInternal': 6,
                   'RTPopularity': 0,
                   'RTweetDiffusion': 0.65,
                   'nRTweet': 130,
                   'nTweet': 200},
 'elifgunduzyeli': {'MentionCohesion': 0.0,
                    'MentionExternal': 32,
                    'MentionInternal': 0,
                    'MentionPopularity': 0,
                    'RTCohesion': 0.0,
                    'RTExternal': 149,
                    'RTInternal': 0,
                    'RTPopularity': 0,
                    'RTweetDiffusion': 0.745,
                    'nRTweet': 1

               'RTPopularity': 0,
               'RTweetDiffusion': 0.045,
               'nRTweet': 9,
               'nTweet': 200},
 'imsumitchouhan': {'MentionCohesion': 0.0,
                    'MentionExternal': 61,
                    'MentionInternal': 0,
                    'MentionPopularity': 0,
                    'RTCohesion': 0.006289308176100629,
                    'RTExternal': 158,
                    'RTInternal': 1,
                    'RTPopularity': 0,
                    'RTweetDiffusion': 0.795,
                    'nRTweet': 159,
                    'nTweet': 200},
 'indy_johar': {'MentionCohesion': 0.10526315789473684,
                'MentionExternal': 17,
                'MentionInternal': 2,
                'MentionPopularity': 76,
                'RTCohesion': 0.0,
                'RTExternal': 180,
                'RTInternal': 0,
                'RTPopularity': 33,
                'RTweetDiffusion': 0.9,
                'nRTweet': 180,
                'n

              'RTPopularity': 0,
              'RTweetDiffusion': 0.0,
              'nRTweet': 0,
              'nTweet': 10},
 'nachoenblanco': {'MentionCohesion': 0.04,
                   'MentionExternal': 24,
                   'MentionInternal': 1,
                   'MentionPopularity': 0,
                   'RTCohesion': 0.03278688524590164,
                   'RTExternal': 59,
                   'RTInternal': 2,
                   'RTPopularity': 1,
                   'RTweetDiffusion': 0.8472222222222222,
                   'nRTweet': 61,
                   'nTweet': 72},
 'nalsrarekli': {'MentionCohesion': 0.0,
                 'MentionExternal': 19,
                 'MentionInternal': 0,
                 'MentionPopularity': 0,
                 'RTCohesion': 0.0,
                 'RTExternal': 161,
                 'RTInternal': 0,
                 'RTPopularity': 0,
                 'RTweetDiffusion': 0.8090452261306532,
                 'nRTweet': 161,
                 'n

                    'nRTweet': 1,
                    'nTweet': 1},
 'yuliay': {'MentionCohesion': 0.0,
            'MentionExternal': 56,
            'MentionInternal': 0,
            'MentionPopularity': 0,
            'RTCohesion': 0.008264462809917356,
            'RTExternal': 120,
            'RTInternal': 1,
            'RTPopularity': 1,
            'RTweetDiffusion': 0.6080402010050251,
            'nRTweet': 121,
            'nTweet': 199},
 'yuridayana10': {'MentionCohesion': 0.05,
                  'MentionExternal': 19,
                  'MentionInternal': 1,
                  'MentionPopularity': 1,
                  'RTCohesion': 0.0,
                  'RTExternal': 77,
                  'RTInternal': 0,
                  'RTPopularity': 0,
                  'RTweetDiffusion': 0.7938144329896907,
                  'nRTweet': 77,
                  'nTweet': 97},
 'zbattiz': {'MentionCohesion': 0.4888888888888889,
             'MentionExternal': 92,
             'MentionIn

In [49]:
df_Summary = pd.DataFrame.from_dict(Summary, orient='index')

In [50]:
df_Summary = df_Summary.sort_values(by = ['MentionPopularity','RTPopularity','nTweet','MentionCohesion','RTCohesion', 'nRTweet'], ascending=False)

In [51]:
df_Summary.head(10)

Unnamed: 0,MentionPopularity,RTPopularity,RTweetDiffusion,nTweet,nRTweet,MentionInternal,MentionExternal,MentionCohesion,RTInternal,RTExternal,RTCohesion
zbattiz,195,142,0.65,200,130,88,92,0.488889,94,36,0.723077
fet_eu,193,111,0.45,200,90,36,183,0.164384,26,64,0.288889
SimPolProject,176,1262,0.825,200,165,77,47,0.620968,139,26,0.842424
filippoaddarii,110,39,0.595,200,119,53,96,0.355705,26,93,0.218487
forfinancewatch,97,49,0.525,200,105,4,84,0.045455,12,93,0.114286
UNEP_FI,85,23,0.345,200,69,86,143,0.375546,6,63,0.086957
AndreaRoventini,82,93,0.675,200,135,27,42,0.391304,84,51,0.622222
plusvaluetweets,77,85,0.37,200,74,154,204,0.430168,38,36,0.513514
indy_johar,76,33,0.9,200,180,2,17,0.105263,0,180,0.0
FETFlagships,74,17,0.87,200,174,0,27,0.0,10,164,0.057471


In [52]:
df_external_M.head(42).to_csv('top_external_mentions.csv')

In [53]:
df_external_RT.head(42).to_csv('top_external_RT.csv')

In [54]:
df_Summary.head(42).to_csv('top_SimPolProject_followers.csv')

In [55]:
df_Summary.to_csv('SimPolProject_followers.csv')