In [1]:
# Libraries
from collections import Counter
import pandas as pd
import snscrape.modules.twitter as sntwitter
import itertools
import math

# Functions
def get_mentioned_users(x):
    users = []
    try:
        for i in range(len(x)):
            users = users + [x[i]['username']]
    except:
        pass
    return users

# Selected locations
# Source: https://en.wikipedia.org/wiki/List_of_countries_by_English-speaking_population (>1MM native speakers)
locations = {
    'east_us': '35.672964,-79.039292,1200km',
    'central_us': '38.273120,-98.582187,1200km',
    'west_us': '39.515882,-116.853723,1100km',
    'ireland_uk': '54.521992,-3.984398,550km',
    'east_canada': '53.685485,-79.787644,1150km',
    'west_canada': '58.276278,-115.796203,1150km',
    'australia': '-25.610118, 134.354805,2100km',
    'south_africa': '-28.816624, 24.991639,1000km',
    'ireland': '52.812761,-8.703998,200km'
    'new_zeland': '-41.500083,172.834408,830km',
    'sri_lanka': '7.555494,80.713785, 265km',
    'singapore': '1.271988,103.823620,22km',
    'trinidad_tobago_loc': '10.695211,-61.168652,100km'
}

In [5]:
since = '2020-01-01'
until = '2021-03-01'
keywords = '(wuhan OR ncov OR coronavirus OR covid OR sars-cov-2 OR pandemic)'
loc = locations['singapore_loc']
df = pd.DataFrame(itertools.islice(sntwitter.TwitterSearchScraper(
    '{} since:{} until:{} geocode:"{}"'.format(keywords, since, until, loc)).get_items(), 1000))
df['username'] = df.user.apply(lambda x: x['username'])
print('Initial referential timestamp:', df.date.min())

users = (list(itertools.chain.from_iterable(df.mentionedUsers.apply(lambda x: get_mentioned_users(x)))))
users = pd.DataFrame(Counter(users).items(), columns=['account', 'count'])
users = users[users['count']>math.ceil(df.shape[0]/1000)].sort_values('count', ascending=False).reset_index(drop=True)
print('Number of selected users:', users.shape[0])

selected_users = pd.DataFrame([], columns=[
    'username', 'cnt', 'description', 'verified', 'followersCount', 'statusesCount', 'mediaCount'])
for i in range(users.shape[0]):
    try:
        ent = sntwitter.TwitterUserScraper(users.iloc[i].account).entity
        selected_users.loc[selected_users.shape[0]] = [
            ent.username, users.iloc[i]['count'], ent.description, ent.verified, ent.followersCount, ent.statusesCount, ent.mediaCount]
        print('Got info from user:',i)
    except:
        print('Got an error from user:', i)
        pass
selected_users = selected_users[(selected_users.description.apply(lambda x: any(word in x for word in ('news', 'News')))) & (selected_users.verified)]

Initial referential timestamp: 2020-12-07 10:52:47+00:00


In [9]:
selected_users

Unnamed: 0,username,cnt,description,verified,followersCount,statusesCount,mediaCount
1,googlenews,5,Google News helps you learn more about the sto...,True,291557,332,91
23,ABSCBNNews,2,"Stories, video, and multimedia for Filipinos w...",True,7359676,878670,193765
25,nytimes,2,News tips? Share them here: nyti.ms/2FVHq9v,True,49830099,428799,52569
26,SkyNews,2,We take you to the heart of the stories that s...,True,7080771,455410,127096


In [10]:
df

Unnamed: 0,url,date,content,renderedContent,id,user,outlinks,tcooutlinks,replyCount,retweetCount,...,source,sourceUrl,sourceLabel,media,retweetedTweet,quotedTweet,mentionedUsers,coordinates,place,username
0,https://twitter.com/GY_Singapore/status/136596...,2021-02-28 09:49:48+00:00,"Picturing the Pandemic, A Visual Record of Cov...","Picturing the Pandemic, A Visual Record of Cov...",1365962396196425731,"{'username': 'GY_Singapore', 'displayname': 'G...",[],[],0,0,...,"<a href=""http://twitter.com/download/android"" ...",http://twitter.com/download/android,Twitter for Android,[{'previewUrl': 'https://pbs.twimg.com/media/E...,,,,"{'longitude': 103.84846203917137, 'latitude': ...",National Museum of Singapore,GY_Singapore
1,https://twitter.com/natmuseum_sg/status/136591...,2021-02-28 06:44:58+00:00,#PicturingthePandemic: A Visual Record of COVI...,#PicturingthePandemic: A Visual Record of COVI...,1365915883424522244,"{'username': 'natmuseum_sg', 'displayname': 'N...",[http://go.gov.Sg/PicturingthePandemic],[https://t.co/xme4bK2wtN],1,3,...,"<a href=""http://twitter.com/download/iphone"" r...",http://twitter.com/download/iphone,Twitter for iPhone,[{'previewUrl': 'https://pbs.twimg.com/media/E...,,,,"{'longitude': 103.84846203917137, 'latitude': ...",National Museum of Singapore,natmuseum_sg
2,https://twitter.com/LilHuzzyVert/status/136591...,2021-02-28 06:35:56+00:00,Covid-19 vaccination MAY CAUSE PROLONG WHAT???...,Covid-19 vaccination MAY CAUSE PROLONG WHAT???...,1365913607549067273,"{'username': 'LilHuzzyVert', 'displayname': '𝑇...",[],[],0,0,...,"<a href=""http://twitter.com/download/iphone"" r...",http://twitter.com/download/iphone,Twitter for iPhone,[{'previewUrl': 'https://pbs.twimg.com/media/E...,,,,"{'longitude': 103.696411999967, 'latitude': 1....","北区, 新加坡",LilHuzzyVert
3,https://twitter.com/Erindrayanti_06/status/136...,2021-02-28 05:57:12+00:00,Semenjak covid gaji karyawan gak ada lagi yang...,Semenjak covid gaji karyawan gak ada lagi yang...,1365903859508408322,"{'username': 'Erindrayanti_06', 'displayname':...",[],[],0,0,...,"<a href=""http://twitter.com/download/android"" ...",http://twitter.com/download/android,Twitter for Android,,,,,"{'longitude': 103.9866335, 'latitude': 1.107063}","Lubuk Baja, Indonesia",Erindrayanti_06
4,https://twitter.com/KaneBlack_/status/13658989...,2021-02-28 05:37:48+00:00,"She was Demoted, Doubted and Rejected But Now ...","She was Demoted, Doubted and Rejected But Now ...",1365898978252922884,"{'username': 'KaneBlack_', 'displayname': 'Kan...",[https://www.goodnewsnetwork.org/katalin-karik...,[https://t.co/3ezCoSthPC],0,1,...,"<a href=""http://twitter.com/download/iphone"" r...",http://twitter.com/download/iphone,Twitter for iPhone,,,,,"{'longitude': 103.749959507073, 'latitude': 1....","中区, 新加坡",KaneBlack_
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,https://twitter.com/mfosgp/status/133593498757...,2020-12-07 13:11:36+00:00,@GeraldoRivera @realDonaldTrump @SenSchumer He...,@GeraldoRivera @realDonaldTrump @SenSchumer He...,1335934987573415937,"{'username': 'mfosgp', 'displayname': 'Microph...",[],[],0,0,...,"<a href=""http://twitter.com/download/iphone"" r...",http://twitter.com/download/iphone,Twitter for iPhone,,,,"[{'username': 'GeraldoRivera', 'displayname': ...","{'longitude': 103.749959507073, 'latitude': 1....","中区, 新加坡",mfosgp
996,https://twitter.com/mfosgp/status/133593098350...,2020-12-07 12:55:41+00:00,@AndrewHGiuliani @RudyGiuliani He’s had a toug...,@AndrewHGiuliani @RudyGiuliani He’s had a toug...,1335930983506345987,"{'username': 'mfosgp', 'displayname': 'Microph...",[],[],0,0,...,"<a href=""http://twitter.com/download/iphone"" r...",http://twitter.com/download/iphone,Twitter for iPhone,,,,"[{'username': 'AndrewHGiuliani', 'displayname'...","{'longitude': 103.749959507073, 'latitude': 1....","中区, 新加坡",mfosgp
997,https://twitter.com/mfosgp/status/133591876980...,2020-12-07 12:07:09+00:00,"@briantylercohen So within a month, Rudy gets ...","@briantylercohen So within a month, Rudy gets ...",1335918769802104833,"{'username': 'mfosgp', 'displayname': 'Microph...",[],[],0,0,...,"<a href=""http://twitter.com/download/iphone"" r...",http://twitter.com/download/iphone,Twitter for iPhone,[{'thumbnailUrl': 'https://pbs.twimg.com/tweet...,,,"[{'username': 'briantylercohen', 'displayname'...","{'longitude': 103.749959507073, 'latitude': 1....","中区, 新加坡",mfosgp
998,https://twitter.com/basilyeo/status/1335907581...,2020-12-07 11:22:42+00:00,Mixed feelings about having a new stadium. Loo...,Mixed feelings about having a new stadium. Loo...,1335907581835931650,"{'username': 'basilyeo', 'displayname': 'Basil...",[https://twitter.com/MilanReports/status/13358...,[https://t.co/vOghXFnfkz],1,0,...,"<a href=""http://twitter.com/download/iphone"" r...",http://twitter.com/download/iphone,Twitter for iPhone,,,{'url': 'https://twitter.com/MilanReports/stat...,,"{'longitude': 103.749959507073, 'latitude': 1....","中区, 新加坡",basilyeo
