In [1]:
%pylab inline
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


First, we'll start with a list of country names, ISO-3166 2- and 3- letter codes.

In [2]:
countries =\
pd.read_csv('shared/data/country-codes/countries_codes_and_coordinates.csv')\
    .drop(columns=['Latitude (average)', 'Longitude (average)'])
# remove extraneous quotes
countries['Alpha-2 code'] = countries['Alpha-2 code'].apply(lambda x: x.split("\"")[1])
countries['Alpha-3 code'] = countries['Alpha-3 code'].apply(lambda x: x.split("\"")[1])
countries['Numeric code'] = countries['Numeric code'].apply(lambda x: x.split("\"")[1])

In [3]:
countries.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code
0,Afghanistan,AF,AFG,4
1,Albania,AL,ALB,8
2,Algeria,DZ,DZA,12
3,American Samoa,AS,ASM,16
4,Andorra,AD,AND,20


In [4]:
len(countries)

257

## Merge layer4 (alexa rankings) in

In [5]:
home_bias = pd\
    .read_csv('home-bias/analysis/levenshtein_from_global_ranking-2019-11-04.csv')\
    .rename(columns={
        'country': 'Country',
        'levenshtein_from_global_ranking': 'layer4-alexa-rankings',
    })\
    .drop(columns=['Unnamed: 0'])

home_bias.columns

Index(['Country', 'layer4-alexa-rankings'], dtype='object')

In [6]:
# Manually fix country names
home_bias.loc[home_bias['Country']=='Laos', 'Country'] = "Lao People's Democratic Republic"
home_bias.iloc[86]

Country                  Lao People's Democratic Republic
layer4-alexa-rankings                                  47
Name: 86, dtype: object

In [7]:
layer4_merged =\
    countries.merge(home_bias, how='left')

layer4_merged.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings
0,Afghanistan,AF,AFG,4,47.0
1,Albania,AL,ALB,8,46.0
2,Algeria,DZ,DZA,12,48.0
3,American Samoa,AS,ASM,16,
4,Andorra,AD,AND,20,


In [8]:
len(layer4_merged) #.groupby('Alpha-2 code').sum()

257

## Merge layer5 (data laws) in

In [9]:
data_laws = pd\
    .read_csv('data-locality-laws/analysis/categories-of-discrete-data-laws-on-books-2019-11-07.csv')\
    .rename(columns={'Discrete categories of data laws': 'layer5-discrete-categories-data-laws'})

data_laws.Country = data_laws.Country.apply(lambda x: x.capitalize())
data_laws.head()

Unnamed: 0,Country,layer5-discrete-categories-data-laws
0,Argentina,21
1,Australia,21
2,Austria,12
3,Belgium,12
4,Brazil,27


In [10]:
layer5_merged =\
    layer4_merged\
        .merge(data_laws, how='left')\

layer5_merged['layer5-discrete-categories-data-laws'] =\
    layer5_merged['layer5-discrete-categories-data-laws'].fillna(value=0)

len(layer5_merged)

257

In [11]:
layer5_merged.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings,layer5-discrete-categories-data-laws
0,Afghanistan,AF,AFG,4,47.0,0.0
1,Albania,AL,ALB,8,46.0,0.0
2,Algeria,DZ,DZA,12,48.0,0.0
3,American Samoa,AS,ASM,16,,0.0
4,Andorra,AD,AND,20,,0.0


## Merge layer2 (IPv6 adoption percentages) in

In [12]:
ipv6_adoption = pd\
    .read_csv('ipv4-to-ipv6-transition/data/ipv6-adoption.csv')\
    .rename(columns={
        'ISO-3166-1 Alpha2 code': 'Alpha-2 code',
        ' IPv6 adoption percent': 'layer2-ipv6-adoption'
    })\
    .drop(columns=[' Country name'])

In [13]:
layer2_merged =\
    layer5_merged\
        .merge(ipv6_adoption, how='left')

In [14]:
len(layer2_merged)

257

In [15]:
layer2_merged[layer2_merged['Alpha-2 code']=='DE']

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings,layer5-discrete-categories-data-laws,layer2-ipv6-adoption
83,Germany,DE,DEU,276,47.0,22.0,45.38


## Merge layer3 (network interference events) in

In [31]:
layer3 = pd\
    .read_csv('network-interference-events/data/interference-measurements-by-country-2019-sanity.csv')\
    .drop(columns={
        'num_anomaly',
        'num_confirmed_interference',
        'num_no_confirmed_interference',
        'loose_rate'
    })\
    .rename(columns={
        'country_code': "Alpha-2 code",
        'strict_rate': 'layer3-network-interference-rate'
    })
    
layer3

Unnamed: 0,Alpha-2 code,layer3-network-interference-rate
0,AD,0.000000
1,AE,0.143348
2,AF,0.000000
3,AG,0.000000
4,AI,0.000000
5,AL,0.000000
6,AM,0.000000
7,AO,0.000000
8,AP,0.000000
9,AR,0.000000


In [26]:
layer3_merged = layer2_merged.merge(layer3, how='left')

In [27]:
len(layer3_merged)

257

# Write the final CSV
This CSV file should contain all layer metrics associated with countries and country codes

In [28]:
layer3_merged[layer3_merged['Alpha-2 code']=="VA"]

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings,layer5-discrete-categories-data-laws,layer2-ipv6-adoption,layer3-network-interference-rate
98,Holy See (Vatican City State),VA,VAT,336,,0.0,0.0,0.333333


In [29]:
layer3_merged.to_csv('output/2019-layer-scores.csv')