Notebook to fix 09_MakeMeta.py, CPT Feb 2021

In [8]:
# Dependencies
import pandas as pd
import numpy as np
import geopandas as gpd

# Load Files
DATA_PATH= '/home/cascade/projects/UrbanHeat/data/'

# Get GHS-UCDB Columns we want to use 
ghs_fn = DATA_PATH+'raw/GHS_UCDB/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_0.shp'
ghs = gpd.read_file(ghs_fn)

# GHS columns we want 
cols = ['ID_HDC_G0','CTR_MN_NM', 'UC_NM_MN','GCPNT_LAT','GCPNT_LON']
df_out = ghs[cols]

# Fix Ivory coast
df_out.CTR_MN_NM = df_out.CTR_MN_NM.replace('CÃ´te d\'Ivoire', 'Ivory Coast') 

# Get UN regions
regions_fn = DATA_PATH+'raw/countrylist.csv'
regions = pd.read_csv(regions_fn)

cols = ['name','region','sub-region','intermediate-region']
regions = regions[cols]
regions.rename(columns={'name': 'CTR_MN_NM'}, inplace = True)

# Merge
df_out = df_out.merge(regions, on = 'CTR_MN_NM', how = 'left')

# write out 
# fn_out = DATA_PATH+'interim/GHS-UCDB-IDS.csv'
# df_out.to_csv(fn_out, index = False)

In [9]:
df_out

Unnamed: 0,ID_HDC_G0,CTR_MN_NM,UC_NM_MN,GCPNT_LAT,GCPNT_LON,region,sub-region,intermediate-region
0,5782,Russia,Norilsk [RUS],69.333682,88.205172,Europe,Eastern Europe,Eastern Europe
1,3316,Russia,Murmansk [RUS],68.955354,33.078645,Europe,Eastern Europe,Eastern Europe
2,5645,Russia,Novy Urengoy [RUS],66.083799,76.646580,Europe,Eastern Europe,Eastern Europe
3,3185,Finland,Oulu [FIN],65.019378,25.482396,Europe,Northern Europe,Northern Europe
4,3539,Russia,Severodvinsk [RUS],64.572142,39.831477,Europe,Eastern Europe,Eastern Europe
...,...,...,...,...,...,...,...,...
13130,13046,New Zealand,Dunedin [NZL],-45.881060,170.498303,Oceania,Australia and New Zealand,Australia and New Zealand
13131,1116,Argentina,Rio Gallegos [ARG],-51.628216,-69.222429,Americas,Latin America and the Caribbean,South America
13132,1114,Chile,Punta Arenas [CHL],-53.148510,-70.909297,Americas,Latin America and the Caribbean,South America
13133,1161,Argentina,Rio Grande [ARG],-53.793962,-67.714669,Americas,Latin America and the Caribbean,South America


In [10]:
df_out[df_out['GCPNT_LAT'].isnull()]

Unnamed: 0,ID_HDC_G0,CTR_MN_NM,UC_NM_MN,GCPNT_LAT,GCPNT_LON,region,sub-region,intermediate-region


In [11]:
df_out[df_out['ID_HDC_G0'] == 1450]

Unnamed: 0,ID_HDC_G0,CTR_MN_NM,UC_NM_MN,GCPNT_LAT,GCPNT_LON,region,sub-region,intermediate-region
8488,1450,Cape Verde,Mindelo [CPV],16.890056,-24.984563,,,


In [12]:
ids = [ 1450,  1451,  1549,  1562,  1563,  1567,  1583,  1585,  1586,
        1587,  1589,  1590,  1591,  1601,  1603,  1606,  1610,  1611,
        1615,  1617,  1625,  1628,  1633,  1637,  1639,  1642,  1666,
        1672,  1675,  1683,  1684,  1689,  1693,  1704,  1706,  1707,
        1713,  1715,  1718,  1725,  1728,  1729,  1741,  1746,  1751,
        1752,  1753,  1759,  1760,  1761,  1764,  1767,  1770,  1771,
        1772,  1773,  1775,  1776,  1777,  1778,  1780,  1781,  1782,
        1786,  1790,  1792,  1793,  1794,  1795,  1796,  1800,  1803,
        1805,  1806,  1810,  1811,  1813,  1815,  1816,  1819,  1828,
        1831,  1833,  1834,  1835,  1836,  1837,  1838,  1839,  1840,
        1841,  1845,  1847,  1848,  1851,  1857,  1858,  1859,  1860,
        1861,  1862,  1864,  1867,  1869,  1874,  1877,  1880,  1887,
        1889,  1891,  1892,  1893,  1895,  1896,  1899,  1900,  1905,
        1906,  1912,  1914,  1915,  1919,  1920,  1923,  1924,  1929,
        1930,  1936,  1939,  1945,  1946,  1948,  1960,  1965,  1971,
        1973,  1978,  1979,  1980,  1981,  1983,  1984,  1993,  1995,
        2003,  2004,  2005,  2485,  2886,  2911,  2921,  2927,  2939,
        2951,  2989,  2992,  3036,  3063,  3120,  3132,  3468,  3498,
        3501,  3534,  3537, 12687, 12689, 12691, 12692, 12693, 12694,
       12695, 12696, 12697, 12698, 12699, 12700, 12701, 12702, 12703,
       12709, 12714, 12715, 12717, 12718, 12721, 12724]

In [13]:
bad_data = df_out[df_out['ID_HDC_G0'].isin(ids)]

In [14]:
bad_data

Unnamed: 0,ID_HDC_G0,CTR_MN_NM,UC_NM_MN,GCPNT_LAT,GCPNT_LON,region,sub-region,intermediate-region
142,1761,United Kingdom,Carlisle [GBR],54.882122,-2.929729,,,
143,1836,United Kingdom,Washington [GBR],54.890297,-1.543692,,,
163,1837,United Kingdom,Darlington [GBR],54.531060,-1.550412,,,
183,1764,United Kingdom,Morecambe [GBR],54.054076,-2.855242,,,
184,1840,United Kingdom,Harrogate [GBR],53.993119,-1.535719,,,
...,...,...,...,...,...,...,...,...
11187,1675,Côte d'Ivoire,Abidjan [CIV],5.349563,-4.002696,,,
11191,1689,Côte d'Ivoire,Grand-Bassam [CIV],5.211219,-3.742486,,,
11235,12715,Brunei,Bandar Seri Begawan [BRN],4.880750,114.924835,,,
11256,1583,Côte d'Ivoire,San-Pedro [CIV],4.759142,-6.652266,,,


In [17]:
bad_countries = list(bad_data['CTR_MN_NM'].unique())

In [27]:
sorted(bad_countries)

['Brunei',
 'Cape Verde',
 'Czech Republic',
 "Côte d'Ivoire",
 'Moldova',
 'São Tomé and Príncipe',
 'Taiwan',
 'United Kingdom']

In [22]:
# Get UN regions
countries_fn = DATA_PATH+'raw/countrylist.csv'
countries = pd.read_csv(regions_fn)

In [23]:
countries

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4.0,ISO 3166-2:AF,Asia,Southern Asia,Southern Asia,142.0,34.0,
1,Åland Islands,AX,ALA,248.0,ISO 3166-2:AX,Europe,Northern Europe,Northern Europe,150.0,154.0,
2,Albania,AL,ALB,8.0,ISO 3166-2:AL,Europe,Southern Europe,Southern Europe,150.0,39.0,
3,Algeria,DZ,DZA,12.0,ISO 3166-2:DZ,Africa,Northern Africa,Northern Africa,2.0,15.0,
4,American Samoa,AS,ASM,16.0,ISO 3166-2:AS,Oceania,Polynesia,Polynesia,9.0,61.0,
...,...,...,...,...,...,...,...,...,...,...,...
246,Yemen,YE,YEM,887.0,ISO 3166-2:YE,Asia,Western Asia,Western Asia,142.0,145.0,
247,Zambia,ZM,ZMB,894.0,ISO 3166-2:ZM,Africa,Sub-Saharan Africa,Eastern Africa,2.0,202.0,14.0
248,Zimbabwe,ZW,ZWE,716.0,ISO 3166-2:ZW,Africa,Sub-Saharan Africa,Eastern Africa,2.0,202.0,14.0
249,Swaziland,,,,,Africa,Sub-Saharan Africa,Southern Africa,,,


In [25]:
for name in countries['name']:
    print(name)

Afghanistan
Åland Islands
Albania
Algeria
American Samoa
Andorra
Angola
Anguilla
Antarctica
Antigua and Barbuda
Argentina
Armenia
Aruba
Australia
Austria
Azerbaijan
Bahamas
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bermuda
Bhutan
Bolivia
Bonaire, Sint Eustatius and Saba
Bosnia and Herzegovina
Botswana
Bouvet Island
Brazil
British Indian Ocean Territory
Brunei Darussalam
Bulgaria
Burkina Faso
Burundi
Cabo Verde
Cambodia
Cameroon
Canada
Cayman Islands
Central African Republic
Chad
Chile
China
Christmas Island
Cocos (Keeling) Islands
Colombia
Comoros
Republic of Congo
Democratic Republic of the Congo
Cook Islands
Costa Rica
Ivory Coast
Croatia
Cuba
Curaçao
Cyprus
Czechia
Denmark
Djibouti
Dominica
Dominican Republic
Ecuador
Egypt
El Salvador
Equatorial Guinea
Eritrea
Estonia
Eswatini
Ethiopia
Falkland Islands (Malvinas)
Faroe Islands
Fiji
Finland
France
French Guiana
French Polynesia
French Southern Territories
Gabon
Gambia
Georgia
Germany
Ghana
Gibraltar
Greece
Greenland
Gr