## Scraping countries data and boundaries

Link to data from Natural Earth: [counties data](https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_0_countries.zip)



In [206]:
import geopandas as gpd
import requests

In [207]:
countries_link = 'https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_0_countries.zip'

In [208]:
r = requests.get(countries_link ,stream=True, headers={"User-Agent": "XY"})
r.status_code

200

In [209]:
with open('../temp/country/ne_50m_admin_0_countries.zip', 'wb') as fd:
    for chunk in r.iter_content(chunk_size=128):
        fd.write(chunk)

In [212]:
zip_file = "zip://../temp/country/ne_50m_admin_0_countries.zip!ne_50m_admin_0_countries.shp"

In [213]:
countries_shp = gpd.read_file(
    zip_file, layer='ne_50m_admin_0_countries'
)

In [214]:
countries_shp.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 242 entries, 0 to 241
Columns: 162 entries, featurecla to geometry
dtypes: float64(4), geometry(1), int64(25), object(132)
memory usage: 306.4+ KB


In [215]:
countries_shp.columns

Index(['featurecla', 'scalerank', 'LABELRANK', 'SOVEREIGNT', 'SOV_A3',
       'ADM0_DIF', 'LEVEL', 'TYPE', 'ADMIN', 'ADM0_A3',
       ...
       'FCLASS_TR', 'FCLASS_ID', 'FCLASS_PL', 'FCLASS_GR', 'FCLASS_IT',
       'FCLASS_NL', 'FCLASS_SE', 'FCLASS_BD', 'FCLASS_UA', 'geometry'],
      dtype='object', length=162)

In [216]:
filter_countries = countries_shp[
    [
        "NAME", "NAME_LONG",
        "FIPS_10", "ISO_A2",
        "ISO_A3", "POSTAL",
        "TYPE", "CONTINENT",
        "SUBREGION", "WIKIDATAID"
    ]
]

In [217]:
filter_countries

Unnamed: 0,NAME,NAME_LONG,FIPS_10,ISO_A2,ISO_A3,POSTAL,TYPE,CONTINENT,SUBREGION,WIKIDATAID
0,Zimbabwe,Zimbabwe,ZI,ZW,ZWE,ZW,Sovereign country,Africa,Eastern Africa,Q954
1,Zambia,Zambia,ZA,ZM,ZMB,ZM,Sovereign country,Africa,Eastern Africa,Q953
2,Yemen,Yemen,YM,YE,YEM,YE,Sovereign country,Asia,Western Asia,Q805
3,Vietnam,Vietnam,VM,VN,VNM,VN,Sovereign country,Asia,South-Eastern Asia,Q881
4,Venezuela,Venezuela,VE,VE,VEN,VE,Sovereign country,South America,South America,Q717
...,...,...,...,...,...,...,...,...,...,...
237,Afghanistan,Afghanistan,AF,AF,AFG,AF,Sovereign country,Asia,Southern Asia,Q889
238,Siachen Glacier,Siachen Glacier,-99,-99,-99,SG,Indeterminate,Asia,Southern Asia,Q333946
239,Antarctica,Antarctica,AY,AQ,ATA,AQ,Indeterminate,Antarctica,Antarctica,Q51
240,Sint Maarten,Sint Maarten,NT,SX,SXM,SX,Country,North America,Caribbean,Q26273


In [218]:
types = set(filter_countries["TYPE"])

In [219]:
continents = set(filter_countries["CONTINENT"])

In [220]:
subregions = set(filter_countries["SUBREGION"])

In [221]:
types

{'Country',
 'Dependency',
 'Disputed',
 'Indeterminate',
 'Sovereign country',
 'Sovereignty'}

In [222]:
continents

{'Africa',
 'Antarctica',
 'Asia',
 'Europe',
 'North America',
 'Oceania',
 'Seven seas (open ocean)',
 'South America'}

In [223]:
subregions

{'Antarctica',
 'Australia and New Zealand',
 'Caribbean',
 'Central America',
 'Central Asia',
 'Eastern Africa',
 'Eastern Asia',
 'Eastern Europe',
 'Melanesia',
 'Micronesia',
 'Middle Africa',
 'Northern Africa',
 'Northern America',
 'Northern Europe',
 'Polynesia',
 'Seven seas (open ocean)',
 'South America',
 'South-Eastern Asia',
 'Southern Africa',
 'Southern Asia',
 'Southern Europe',
 'Western Africa',
 'Western Asia',
 'Western Europe'}

In [224]:
filter_errors = filter_countries['ISO_A2']=='-99'

In [225]:
filter_errors

0      False
1      False
2      False
3      False
4      False
       ...  
237    False
238     True
239    False
240    False
241    False
Name: ISO_A2, Length: 242, dtype: bool

In [226]:
filter_countries[filter_errors]

Unnamed: 0,NAME,NAME_LONG,FIPS_10,ISO_A2,ISO_A3,POSTAL,TYPE,CONTINENT,SUBREGION,WIKIDATAID
58,Somaliland,Somaliland,-99,-99,-99,SL,Indeterminate,Africa,Eastern Africa,Q34754
88,Norway,Norway,-99,-99,-99,N,Sovereign country,Europe,Northern Europe,Q20
160,France,France,FR,-99,-99,F,Country,Europe,Western Europe,Q142
185,N. Cyprus,Northern Cyprus,-99,-99,-99,CN,Sovereign country,Asia,Western Asia,Q23681
226,Indian Ocean Ter.,Indian Ocean Territories,-99,-99,-99,IOT,Dependency,Asia,Seven seas (open ocean),Q4824275
229,Ashmore and Cartier Is.,Ashmore and Cartier Islands,AT,-99,-99,AU,Dependency,Oceania,Australia and New Zealand,Q133888
238,Siachen Glacier,Siachen Glacier,-99,-99,-99,SG,Indeterminate,Asia,Southern Asia,Q333946
