In [133]:
import plotly.express as px
import plotly.graph_objects as go

import pandas as pd
import json
import os

from data_utils import DATA_PATH

In [112]:
state_abbreviations = '''Alabama	AL	Kentucky	KY	Ohio	OH
Alaska	AK	Louisiana	LA	Oklahoma	OK
Arizona	AZ	Maine	ME	Oregon	OR
Arkansas	AR	Maryland	MD	Pennsylvania	PA
American Samoa	AS	Massachusetts	MA	Puerto Rico	PR
California	CA	Michigan	MI	Rhode Island	RI
Colorado	CO	Minnesota	MN	South Carolina	SC
Connecticut	CT	Mississippi	MS	South Dakota	SD
Delaware	DE	Missouri	MO	Tennessee	TN
District of Columbia	DC	Montana	MT	Texas	TX
Florida	FL	Nebraska	NE	Trust Territories	TT
Georgia	GA	Nevada	NV	Utah	UT
Guam	GU	New Hampshire	NH	Vermont	VT
Hawaii	HI	New Jersey	NJ	Virginia	VA
Idaho	ID	New Mexico	NM	Virgin Islands	VI
Illinois	IL	New York	NY	Washington	WA
Indiana	IN	North Carolina	NC	West Virginia	WV
Iowa	IA	North Dakota	ND	Wisconsin	WI
Kansas	KS	Northern Mariana Islands	MP	Wyoming	WY'''
state_abbreviations = [token for line in state_abbreviations.split('\n') for token in line.split('\t')]
state_abbreviations = dict(zip([i for i in state_abbreviations[::2]], [i for i in state_abbreviations[1::2]]))
state_abbreviations.update({v: v for v in state_abbreviations.values()})
state_abbreviations.update({v.lower(): v for v in state_abbreviations.values()})
state_abbreviations.update({'U.S. Virgin Islands': 'VI'})

In [144]:
iso3 = {"BD": "BGD", "BE": "BEL", "BF": "BFA", "BG": "BGR", "BA": "BIH", "BB": "BRB", "WF": "WLF", "BL": "BLM", "BM": "BMU", "BN": "BRN", "BO": "BOL", "BH": "BHR", "BI": "BDI", "BJ": "BEN", "BT": "BTN", "JM": "JAM", "BV": "BVT", "BW": "BWA", "WS": "WSM", "BQ": "BES", "BR": "BRA", "BS": "BHS", "JE": "JEY", "BY": "BLR", "BZ": "BLZ", "RU": "RUS", "RW": "RWA", "RS": "SRB", "TL": "TLS", "RE": "REU", "TM": "TKM", "TJ": "TJK", "RO": "ROU", "TK": "TKL", "GW": "GNB", "GU": "GUM", "GT": "GTM", "GS": "SGS", "GR": "GRC", "GQ": "GNQ", "GP": "GLP", "JP": "JPN", "GY": "GUY", "GG": "GGY", "GF": "GUF", "GE": "GEO", "GD": "GRD", "GB": "GBR", "GA": "GAB", "SV": "SLV", "GN": "GIN", "GM": "GMB", "GL": "GRL", "GI": "GIB", "GH": "GHA", "OM": "OMN", "TN": "TUN", "JO": "JOR", "HR": "HRV", "HT": "HTI", "HU": "HUN", "HK": "HKG", "HN": "HND", "HM": "HMD", "VE": "VEN", "PR": "PRI", "PS": "PSE", "PW": "PLW", "PT": "PRT", "SJ": "SJM", "PY": "PRY", "IQ": "IRQ", "PA": "PAN", "PF": "PYF", "PG": "PNG", "PE": "PER", "PK": "PAK", "PH": "PHL", "PN": "PCN", "PL": "POL", "PM": "SPM", "ZM": "ZMB", "EH": "ESH", "EE": "EST", "EG": "EGY", "ZA": "ZAF", "EC": "ECU", "IT": "ITA", "VN": "VNM", "SB": "SLB", "ET": "ETH", "SO": "SOM", "ZW": "ZWE", "SA": "SAU", "ES": "ESP", "ER": "ERI", "ME": "MNE", "MD": "MDA", "MG": "MDG", "MF": "MAF", "MA": "MAR", "MC": "MCO", "UZ": "UZB", "MM": "MMR", "ML": "MLI", "MO": "MAC", "MN": "MNG", "MH": "MHL", "MK": "MKD", "MU": "MUS", "MT": "MLT", "MW": "MWI", "MV": "MDV", "MQ": "MTQ", "MP": "MNP", "MS": "MSR", "MR": "MRT", "IM": "IMN", "UG": "UGA", "TZ": "TZA", "MY": "MYS", "MX": "MEX", "IL": "ISR", "FR": "FRA", "IO": "IOT", "SH": "SHN", "FI": "FIN", "FJ": "FJI", "FK": "FLK", "FM": "FSM", "FO": "FRO", "NI": "NIC", "NL": "NLD", "NO": "NOR", "NA": "NAM", "VU": "VUT", "NC": "NCL", "NE": "NER", "NF": "NFK", "NG": "NGA", "NZ": "NZL", "NP": "NPL", "NR": "NRU", "NU": "NIU", "CK": "COK", "XK": "XKX", "CI": "CIV", "CH": "CHE", "CO": "COL", "CN": "CHN", "CM": "CMR", "CL": "CHL", "CC": "CCK", "CA": "CAN", "CG": "COG", "CF": "CAF", "CD": "COD", "CZ": "CZE", "CY": "CYP", "CX": "CXR", "CR": "CRI", "CW": "CUW", "CV": "CPV", "CU": "CUB", "SZ": "SWZ", "SY": "SYR", "SX": "SXM", "KG": "KGZ", "KE": "KEN", "SS": "SSD", "SR": "SUR", "KI": "KIR", "KH": "KHM", "KN": "KNA", "KM": "COM", "ST": "STP", "SK": "SVK", "KR": "KOR", "SI": "SVN", "KP": "PRK", "KW": "KWT", "SN": "SEN", "SM": "SMR", "SL": "SLE", "SC": "SYC", "KZ": "KAZ", "KY": "CYM", "SG": "SGP", "SE": "SWE", "SD": "SDN", "DO": "DOM", "DM": "DMA", "DJ": "DJI", "DK": "DNK", "VG": "VGB", "DE": "DEU", "YE": "YEM", "DZ": "DZA", "US": "USA", "UY": "URY", "YT": "MYT", "UM": "UMI", "LB": "LBN", "LC": "LCA", "LA": "LAO", "TV": "TUV", "TW": "TWN", "TT": "TTO", "TR": "TUR", "LK": "LKA", "LI": "LIE", "LV": "LVA", "TO": "TON", "LT": "LTU", "LU": "LUX", "LR": "LBR", "LS": "LSO", "TH": "THA", "TF": "ATF", "TG": "TGO", "TD": "TCD", "TC": "TCA", "LY": "LBY", "VA": "VAT", "VC": "VCT", "AE": "ARE", "AD": "AND", "AG": "ATG", "AF": "AFG", "AI": "AIA", "VI": "VIR", "IS": "ISL", "IR": "IRN", "AM": "ARM", "AL": "ALB", "AO": "AGO", "AQ": "ATA", "AS": "ASM", "AR": "ARG", "AU": "AUS", "AT": "AUT", "AW": "ABW", "IN": "IND", "AX": "ALA", "AZ": "AZE", "IE": "IRL", "ID": "IDN", "UA": "UKR", "QA": "QAT", "MZ": "MOZ"}

In [124]:
def map(locations, year, user_type='followers'):
    us_locations = locations[locations.countrycode=='US'].copy()
    print(f'number of entries with a location in the US: {len(us_locations)}')
    us_locations.state = us_locations.state.dropna().apply(lambda x: state_abbreviations[x])
    us_location_counts = us_locations.groupby('state').size().to_frame('size')
    print(us_location_counts.sort_values('size', ascending=False))
    print("total entries with a state:", us_location_counts.sum().values[0])
    fig = px.choropleth(us_location_counts.reset_index(),
                        locations='state',
                        locationmode="USA-states",
                        scope="usa",
                        color='size',
                        color_continuous_scale="Blues",
                        title=f'{user_type} by U.S. state in {year}'
                        )

    fig.show()

In [151]:
def map_world(year, locations, user_type):
    df = locations.groupby('countrycode').size().to_frame('size').reset_index().dropna()
    print(df.sort_values('size', ascending=False))
    fig = go.Figure(data=go.Choropleth(
        locations = df['countrycode'].map(iso3),
        z = df['size'],
        colorscale = 'Blues',
        autocolorscale=False,
        # reversescale=True,
        marker_line_color='darkgray',
        marker_line_width=0.5,
        # colorbar_tickprefix = '$',
        colorbar_title = f'# {user_type.lower()}',
    ))

    fig.update_layout(
        title_text=f'{user_type} by country in {year}',
        geo=dict(
            showframe=False,
            showcoastlines=False,
            projection_type='equirectangular'
        ),

    )

    fig.show()

In [3]:
out_dir = os.path.join(DATA_PATH, 'paper_data', 'meta')
with open(os.path.join(out_dir, 'twitter_follower_locations.json'), encoding='utf8') as f:
    twitter_follower_locations = json.load(f)
with open(os.path.join(out_dir, 'twitter_author_locations.json'), encoding='utf8') as f:
    twitter_author_locations = json.load(f)

In [129]:
year='2016'
locations = pd.DataFrame(twitter_follower_locations[year]).T
map(locations, year)

number of entries with a location in the US: 334
       size
state      
CA       51
NY       20
TX       20
OH       16
FL       14
MN       12
IN        9
TN        8
NV        8
VA        7
KS        7
PA        7
OK        6
AR        6
NE        6
IL        6
MD        5
AZ        5
OR        4
IA        4
MO        4
UT        3
AL        3
NC        3
DC        3
CO        3
WA        3
MI        2
KY        2
SC        2
ID        2
GA        2
NJ        1
NM        1
NH        1
MA        1
LA        1
SD        1
VT        1
total entries with a state: 260


In [130]:
year='2020'
locations = pd.DataFrame(twitter_follower_locations[year]).T
map(locations, year)

number of entries with a location in the US: 5805
       size
state      
CA      658
NY      358
TX      323
FL      260
OH      200
IL      193
PA      178
WA      146
MN      145
IN      130
MA      128
AZ      123
NC      123
OR      122
GA      111
CO      107
TN      100
NJ       94
DC       88
MD       87
VA       85
MI       76
AR       74
WI       73
NV       68
MO       67
OK       64
SC       54
KY       51
NM       45
AL       39
KS       39
IA       30
LA       26
NH       23
AK       21
HI       20
CT       20
UT       20
WV       19
NE       17
MT       15
MS       15
ME       12
WY       11
ID       10
RI        9
VT        7
SD        6
ND        6
PR        5
DE        2
VI        1
total entries with a state: 4704


In [152]:
year='2016'
locations = pd.DataFrame(twitter_follower_locations[year]).T
user_type = 'followers'
map_world(year, locations, user_type)

   countrycode  size
71          US   334
23          GB    64
10          CA    54
35          IN    27
14          DE    26
..         ...   ...
42          LK     1
43          LR     1
45          MC     1
46          MW     1
0           AE     1

[76 rows x 2 columns]


In [153]:
year='2020'
locations = pd.DataFrame(twitter_follower_locations[year]).T
user_type = 'followers'
map_world(year, locations, user_type)

    countrycode  size
131          US  5805
19           CA   364
44           GB   283
125          TR   188
6            AU   145
..          ...   ...
86           MN     1
93           MZ     1
100          OM     1
107          PS     1
140          ZW     1

[141 rows x 2 columns]
