In [52]:
%pylab inline
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


First, we'll start with a list of country names, ISO-3166 2- and 3- letter codes.

In [53]:
countries =\
pd.read_csv('shared/data/country-codes/countries_codes_and_coordinates.csv')\
    .drop(columns=['Latitude (average)', 'Longitude (average)'])
# remove extraneous quotes
countries['Alpha-2 code'] = countries['Alpha-2 code'].apply(lambda x: x.split("\"")[1])
countries['Alpha-3 code'] = countries['Alpha-3 code'].apply(lambda x: x.split("\"")[1])
countries['Numeric code'] = countries['Numeric code'].apply(lambda x: x.split("\"")[1])

In [54]:
countries.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code
0,Afghanistan,AF,AFG,4
1,Albania,AL,ALB,8
2,Algeria,DZ,DZA,12
3,American Samoa,AS,ASM,16
4,Andorra,AD,AND,20


In [55]:
len(countries)

257

## Merge layer4 (alexa rankings) in

In [56]:
home_bias = pd\
    .read_csv('home-bias/analysis/levenshtein_from_global_ranking-2019-11-04.csv')\
    .rename(columns={
        'country': 'Country',
        'levenshtein_from_global_ranking': 'layer4-alexa-rankings',
    })\
    .drop(columns=['Unnamed: 0'])

home_bias.columns

Index(['Country', 'layer4-alexa-rankings'], dtype='object')

In [57]:
# Manually fix country names
home_bias.loc[home_bias['Country']=='Laos', 'Country'] = "Lao People's Democratic Republic"
home_bias.iloc[86]

Country                  Lao People's Democratic Republic
layer4-alexa-rankings                                  47
Name: 86, dtype: object

In [58]:
home_bias
#.loc[home_bias['Country']=='Laos', 'Country'] = "Lao People's Democratic Republic"

Unnamed: 0,Country,layer4-alexa-rankings
0,Afghanistan,47
1,Albania,46
2,Algeria,48
3,Angola,46
4,Argentina,47
5,Armenia,50
6,Australia,45
7,Austria,46
8,Azerbaijan,48
9,Bahamas,47


In [59]:
layer4_merged =\
    countries.merge(home_bias, how='left')

layer4_merged.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings
0,Afghanistan,AF,AFG,4,47.0
1,Albania,AL,ALB,8,46.0
2,Algeria,DZ,DZA,12,48.0
3,American Samoa,AS,ASM,16,
4,Andorra,AD,AND,20,


In [60]:
len(layer4_merged) #.groupby('Alpha-2 code').sum()

257

## Merge layer5 (has cross-border data flow laws) in

In [73]:
cross_border = pd\
    .read_csv('data-locality-laws/analysis/restrictions-on-cross-border-data-flows-2019-11-05.csv')\
    .drop(columns='Unnamed: 0')\
    .rename(columns={'n_laws': 'layer5-has-cross-border-laws?'})

cross_border.Country = cross_border.Country.apply(lambda x: x.capitalize())

cross_border.head()

Unnamed: 0,Country,layer5-has-cross-border-laws?
0,China,1
1,Russia,1
2,India,1
3,Canada,1
4,Korea,1


In [74]:
layer5_merged =\
    layer4_merged\
        .merge(cross_border, how='left')\

layer5_merged['layer5-has-cross-border-laws?'] = \
    layer5_merged['layer5-has-cross-border-laws?'].fillna(value=0)

len(layer5_merged)

257

In [75]:
layer5_merged.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings,layer5-has-cross-border-laws?
0,Afghanistan,AF,AFG,4,47.0,0.0
1,Albania,AL,ALB,8,46.0,0.0
2,Algeria,DZ,DZA,12,48.0,0.0
3,American Samoa,AS,ASM,16,,0.0
4,Andorra,AD,AND,20,,0.0


## Merge layer2 (IPv6 adoption percentages) in

In [64]:
ipv6_adoption = pd\
    .read_csv('ipv4-to-ipv6-transition/data/ipv6-adoption.csv')\
    .rename(columns={
        'ISO-3166-1 Alpha2 code': 'Alpha-2 code',
        ' IPv6 adoption percent': 'layer2-ipv6-adoption'
    })\
    .drop(columns=[' Country name'])

In [65]:
layer2_merged =\
    layer5_merged\
        .merge(ipv6_adoption, how='left')

In [66]:
len(layer2_merged)

257

In [67]:
layer2_merged[layer2_merged['Alpha-2 code']=='DE']

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings,layer5-has-cross-border-laws?,layer2-ipv6-adoption
83,Germany,DE,DEU,276,47.0,1.0,45.38


## Merge layer3 (network interference events) in

In [68]:
layer3 = pd\
    .read_csv('network-interference-events/analysis/interference-measurements-by-country-2019-2019-11-05.csv')\
    .drop(columns={
        'Unnamed: 0',
        'total',
        'confirmed_count',
        'anomaly_count',
#         'loose_rate'
        'strict_rate'
    })\
    .rename(columns={
        'probe_cc': "Alpha-2 code",
        'loose_rate': 'layer3-network-interference-rate'
    })
    

In [69]:
layer3_merged = layer2_merged.merge(layer3, how='left')

In [70]:
len(layer3_merged)

257

# Write the final CSV
This CSV file should contain all layer metrics associated with countries and country codes

In [71]:
layer3_merged[layer3_merged['Alpha-2 code']=="VA"]

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,layer4-alexa-rankings,layer5-has-cross-border-laws?,layer2-ipv6-adoption,layer3-network-interference-rate
98,Holy See (Vatican City State),VA,VAT,336,,0.0,0.0,0.333333


In [72]:
layer3_merged.to_csv('output/2019-layer-scores.csv')