## **Section 2:**
## Read in shapefile edited in last section and add population data

In [38]:
# Import a few packages
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import os
pd.options.display.max_rows = 300

In [39]:
print(os.getcwd())
os.chdir('G:/My Drive/Clark')
print(os.getcwd())

G:\My Drive\Clark
G:\My Drive\Clark


In [40]:
# And bring in the edited country boundary shapefile
# These are very detaile and precise polygons. Takes a few moments to load
countriesMerged_path = "GIS Tutorials/Geog-312/geopandas_Files/checkpoint1/countriesMerged.shp"
countriesMerged = gpd.read_file(countriesMerged_path)
print(len(countriesMerged))
countriesMerged.head(10)

249


Unnamed: 0,CNTRY_NAME,COUNTRY,ISO,COUNTRYAFF,geometry
0,Afghanistan,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314..."
1,Akrotiri and Dhekelia,,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34...."
2,Albania,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5..."
3,Algeria,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9..."
4,American Samoa,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080..."
5,Andorra,Andorra,AD,Andorra,"POLYGON ((1.7258 42.5044, 1.71149 42.49224, 1...."
6,Angola,Angola,AO,Angola,"MULTIPOLYGON (((13.10288 -4.68421, 13.10173 -4..."
7,Anguilla,Anguilla,AI,United Kingdom,"MULTIPOLYGON (((-63.42216 18.59739, -63.42672 ..."
8,Antarctica,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ..."
9,Antigua and Barbuda,Antigua and Barbuda,AG,Antigua and Barbuda,"MULTIPOLYGON (((-61.84592 17.72958, -61.83383 ..."


In [41]:
# Drop the 'COUNTRY' column
countriesMerged = countriesMerged.drop(columns=['COUNTRY'])

# Rename 'CNTRY_NAME' to 'COUNTRY'
countriesMerged = countriesMerged.rename(columns={'CNTRY_NAME': 'COUNTRY'})

In [42]:
countriesMerged

Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry
0,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314..."
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34...."
2,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5..."
3,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9..."
4,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080..."
5,Andorra,AD,Andorra,"POLYGON ((1.7258 42.5044, 1.71149 42.49224, 1...."
6,Angola,AO,Angola,"MULTIPOLYGON (((13.10288 -4.68421, 13.10173 -4..."
7,Anguilla,AI,United Kingdom,"MULTIPOLYGON (((-63.42216 18.59739, -63.42672 ..."
8,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ..."
9,Antigua and Barbuda,AG,Antigua and Barbuda,"MULTIPOLYGON (((-61.84592 17.72958, -61.83383 ..."


In [44]:
# Now lets bring in country population
# Country population data is from wikipedia: https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population
# Downloaded as CSV using this website: https://wikitable2csv.ggor.de/
countryPop_path = "GIS Tutorials/Geog-312/Geog-312/1.Geopandas/inputData/countries_population.csv"
countryPopTable = gpd.read_file(countryPop_path)
print(len(countryPopTable))
countryPopTable.head(5)

240


Unnamed: 0,field_1,Location,Population,% of\nworld,Date,Source (official or from\nthe United Nations),Notes
0,â€“,World,8119000000,100%,1 Jul 2024,UN projection,
1,1/2,China,1409670000,17.3%,31 Dec 2023,Official estimate,
2,1/2,India,1404910000,17.3%,1 Jul 2024,Official projection,
3,3,United States,335893238,4.1%,1 Jan 2024,Official projection,
4,4,Indonesia,282477584,3.5%,31 Jun 2024,National annual projection,


In [45]:
countryPopTable.tail(5)

Unnamed: 0,field_1,Location,Population,% of\nworld,Date,Source (official or from\nthe United Nations),Notes
235,â€“,Niue (New Zealand),1681,0%,11 Nov 2022,2022 Census,
236,â€“,Tokelau (New Zealand),1647,0%,1 Jan 2019,2019 Census,
237,195,Vatican City,764,0%,26 Jun 2023,Official figure,
238,â€“,Cocos (Keeling) Islands (Australia),593,0%,30 Jun 2020,2021 Census,
239,â€“,Pitcairn Islands (UK),35,0%,1 Jul 2023,Official estimate,


In [46]:
# countriesFin has a different field for what country territories are affiliated with
# So we can get rid of the country names in parentheses in countryPopTable
countryPopTable['Location'] = countryPopTable['Location'].str.replace(r' \(.+\)', '', regex=True)
# Also, lets convert 'Population' column to numeric by removing commas
countryPopTable['Population'] = countryPopTable['Population'].str.replace(',', '').astype(float)
countryPopTable.tail(10)

Unnamed: 0,field_1,Location,Population,% of\nworld,Date,Source (official or from\nthe United Nations),Notes
230,â€“,"Saint Helena, Ascension and Tristan da Cunha",5651.0,0%,1 Jul 2021,2021 Census,
231,â€“,Montserrat,4386.0,0%,23 Sep 2023,2023 census result,
232,â€“,Falkland Islands,3662.0,0%,10 Oct 2021,2021 census result,
233,â€“,Norfolk Island,2188.0,0%,1 Jan 2021,2021 Census,
234,â€“,Christmas Island,1692.0,0%,1 Jan 2021,2021 Census,
235,â€“,Niue,1681.0,0%,11 Nov 2022,2022 Census,
236,â€“,Tokelau,1647.0,0%,1 Jan 2019,2019 Census,
237,195,Vatican City,764.0,0%,26 Jun 2023,Official figure,
238,â€“,Cocos,593.0,0%,30 Jun 2020,2021 Census,
239,â€“,Pitcairn Islands,35.0,0%,1 Jul 2023,Official estimate,


In [47]:
# Merge based on 'COUNTRY' in countries_gdf and 'Location' in countryPopTable
countries_w_pop = countriesMerged.merge(
    countryPopTable[['Location', 'Population']],  # Select only necessary columns
    left_on='COUNTRY', 
    right_on='Location', 
    how='left'
)

# Drop 'Location' column from the merged result, if no longer needed
#countries_gdf = countries_gdf.drop(columns='Location')

# Show the result
countries_w_pop.head()

Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population
0,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314...",Afghanistan,34262840.0
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34....",,
2,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5...",Albania,2402113.0
3,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9...",Algeria,46700000.0
4,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080...",American Samoa,49710.0


In [48]:
# However we know the two datasets will not line up perfectly, because one has 251 rows, and the other 240
# And there are naming discrepencies
# Identify mismatches:
# If 'Location' is NaN that means the COUNTRY field in countries_gdf does not have an exact match in countries_w_pop
no_match = countries_w_pop[countries_w_pop['Location'].isna()]
print(len(no_match))
no_match

24


Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34....",,
8,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ...",,
29,Bouvet Island,BV,Norway,"MULTIPOLYGON (((3.41075 -54.40417, 3.41331 -54...",,
31,British Indian Ocean Territory,IO,United Kingdom,"MULTIPOLYGON (((71.82919 -5.24042, 71.81917 -5...",,
41,Caribbean Netherlands,,Netherlands,"MULTIPOLYGON (((-68.23875 12.09664, -68.23917 ...",,
48,Cocos Islands,CC,Australia,"MULTIPOLYGON (((96.61025 -11.81319, 96.61127 -...",,
56,Czechia,CZ,Czechia,"POLYGON ((14.82337 50.87056, 14.82642 50.87189...",,
75,French Guiana,GF,France,"MULTIPOLYGON (((-51.65434 4.05947, -51.65664 4...",,
77,French Southern and Antarctic Lands,TF,France,"MULTIPOLYGON (((47.37331 -11.51044, 47.3725 -1...",,
87,Guadeloupe,GP,France,"MULTIPOLYGON (((-61.45881 16.50658, -61.45758 ...",,


In [49]:
# Some of this is happening because of naming conventions. Some are just not included
# Get unique values of "Location" in countryPopTable
countryPopTable_sorted = countryPopTable.sort_values(by="Location")
uniqueCountries = countryPopTable_sorted['Location'].unique()
print(uniqueCountries)

['Abkhazia' 'Afghanistan' 'Albania' 'Algeria' 'American Samoa' 'Andorra'
 'Angola' 'Anguilla' 'Antigua and Barbuda' 'Argentina' 'Armenia' 'Aruba'
 'Australia' 'Austria' 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh'
 'Barbados' 'Belarus' 'Belgium' 'Belize' 'Benin' 'Bermuda' 'Bhutan'
 'Bolivia' 'Bosnia and Herzegovina' 'Botswana' 'Brazil'
 'British Virgin Islands' 'Brunei' 'Bulgaria' 'Burkina Faso' 'Burundi'
 'Cambodia' 'Cameroon' 'Canada' 'Cape Verde' 'Cayman Islands'
 'Central African Republic' 'Chad' 'Chile' 'China' 'Christmas Island'
 'Cocos' 'Colombia' 'Comoros' 'Cook Islands' 'Costa Rica' 'Croatia' 'Cuba'
 'CuraÃ§ao' 'Cyprus' 'Czech Republic' 'Democratic Republic of the Congo'
 'Denmark' 'Djibouti' 'Dominica' 'Dominican Republic' 'East Timor'
 'Ecuador' 'Egypt' 'El Salvador' 'Equatorial Guinea' 'Eritrea' 'Estonia'
 'Eswatini' 'Ethiopia' 'Falkland Islands' 'Faroe Islands' 'Fiji' 'Finland'
 'France' 'French Polynesia' 'Gabon' 'Gambia' 'Georgia' 'Germany' 'Ghana'
 'Gibraltar' 'Greece'

#####  It is a bit tedious but we can sort this out. Interesting to consider the lack of exact standards in many cases, for example, Turkey or Turkiye? Czech Republic or Czechia? I go with Turkey, and Czechia. Subjective, I guess! 
##### Cleaning all this up involves some deleting, changing, matching, adding

In [50]:
# Modify tables pre-merge based on what does not line up post-merge
# We will leave uninhabited places with NaN for population and change those to 0 later
# And do other changes to population column later

# Delete Abkhazia *FROM POP TABLE* (Too hard to find shapefile/pop)
countryPopTable = countryPopTable[countryPopTable['Location'] != 'Abkhazia']
# Add Akrotiri and Dhekelia
AD = pd.DataFrame({'Location': ['Akrotiri and Dhekelia'], 'Population': [18195]})
countryPopTable = pd.concat([countryPopTable, AD], ignore_index=True)
# Match Cocos Islands
countryPopTable.loc[countryPopTable['Location'] == 'Cocos', 'Location'] = 'Cocos Islands'
# Match Czechia
countryPopTable.loc[countryPopTable['Location'] == 'Czech Republic', 'Location'] = 'Czechia'
# Match Curacao
countryPopTable.loc[countryPopTable['Location'] == 'CuraÃ§ao', 'Location'] = 'Curacao'
# Match Sao Tome and Principe
countryPopTable.loc[countryPopTable['Location'] == 'SÃ£o TomÃ© and PrÃ\xadncipe', 'Location'] = 'Sao Tome and Principe'
# Delete South Ossetia *FROM POP TABLE* (Too hard to find shapefile/pop)
countryPopTable = countryPopTable[countryPopTable['Location'] != 'South Ossetia']
# Match Timor Leste
countryPopTable.loc[countryPopTable['Location'] == 'East Timor', 'Location'] = 'Timor Leste'
# Match US Virgin Islands 	
countryPopTable.loc[countryPopTable['Location'] == 'U.S. Virgin Islands', 'Location'] = 'US Virgin Islands'

In [51]:
# And a few edits to countriesMerged
# Match Northern Mariana Islands
countriesMerged.loc[countriesMerged['COUNTRY'] == 'Northern Mariana Island', 'COUNTRY'] = 'Northern Mariana Islands'
# Delete Saba, it is part of Carribean Netherlands
countriesMerged = countriesMerged[countriesMerged['COUNTRY'] != 'Saba']
# Match Saint Helena, Ascension and Tristan da Cunha
countriesMerged.loc[countriesMerged['COUNTRY'] == 'Saint Helena', 'COUNTRY'] = 'Saint Helena, Ascension and Tristan da Cunha'

In [52]:
# Perform that join again, now that we've modified the originall tables
countries_w_pop2 = countriesMerged.merge(
    countryPopTable[['Location', 'Population']],  # Select only necessary columns
    left_on='COUNTRY', 
    right_on='Location', 
    how='left'
)

# Drop 'Location' column from the merged result, if no longer needed
#countries_gdf = countries_gdf.drop(columns='Location')

# Show the result
print(len(countries_w_pop2))
countries_w_pop2.head()

248


Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population
0,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314...",Afghanistan,34262840.0
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34....",Akrotiri and Dhekelia,18195.0
2,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5...",Albania,2402113.0
3,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9...",Algeria,46700000.0
4,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080...",American Samoa,49710.0


In [53]:
# Check again or mismatches
# Looks like it's now just random territories where we will add population numbers
no_match2 = countries_w_pop2[countries_w_pop2['Location'].isna()]
print(len(no_match2))
no_match2

14


Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population
8,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ...",,
29,Bouvet Island,BV,Norway,"MULTIPOLYGON (((3.41075 -54.40417, 3.41331 -54...",,
31,British Indian Ocean Territory,IO,United Kingdom,"MULTIPOLYGON (((71.82919 -5.24042, 71.81917 -5...",,
41,Caribbean Netherlands,,Netherlands,"MULTIPOLYGON (((-68.23875 12.09664, -68.23917 ...",,
75,French Guiana,GF,France,"MULTIPOLYGON (((-51.65434 4.05947, -51.65664 4...",,
77,French Southern and Antarctic Lands,TF,France,"MULTIPOLYGON (((47.37331 -11.51044, 47.3725 -1...",,
87,Guadeloupe,GP,France,"MULTIPOLYGON (((-61.45881 16.50658, -61.45758 ...",,
95,Heard Island and McDonald Islands,HM,Australia,"MULTIPOLYGON (((73.58247 -52.91919, 73.57833 -...",,
137,Martinique,MQ,France,"MULTIPOLYGON (((-61.15028 14.87597, -61.14608 ...",,
140,Mayotte,YT,France,"MULTIPOLYGON (((45.02414 -12.63375, 45.02667 -...",,


In [54]:
# Lets double check that its just that
# Remember, once the tables are joined, we don't actually need the 'location' field
no_match_pop = countries_w_pop2[countries_w_pop2['Population'].isna()]
print(len(no_match_pop))
no_match_pop
# Yup, same list

14


Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population
8,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ...",,
29,Bouvet Island,BV,Norway,"MULTIPOLYGON (((3.41075 -54.40417, 3.41331 -54...",,
31,British Indian Ocean Territory,IO,United Kingdom,"MULTIPOLYGON (((71.82919 -5.24042, 71.81917 -5...",,
41,Caribbean Netherlands,,Netherlands,"MULTIPOLYGON (((-68.23875 12.09664, -68.23917 ...",,
75,French Guiana,GF,France,"MULTIPOLYGON (((-51.65434 4.05947, -51.65664 4...",,
77,French Southern and Antarctic Lands,TF,France,"MULTIPOLYGON (((47.37331 -11.51044, 47.3725 -1...",,
87,Guadeloupe,GP,France,"MULTIPOLYGON (((-61.45881 16.50658, -61.45758 ...",,
95,Heard Island and McDonald Islands,HM,Australia,"MULTIPOLYGON (((73.58247 -52.91919, 73.57833 -...",,
137,Martinique,MQ,France,"MULTIPOLYGON (((-61.15028 14.87597, -61.14608 ...",,
140,Mayotte,YT,France,"MULTIPOLYGON (((45.02414 -12.63375, 45.02667 -...",,


In [55]:
# Post merge edits
# Add population to Caribean Netherlands
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'Caribean Netherlands', 'Population'] = 30000
# Add population to French Guiana
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'French Guiana', 'Population'] = 295385
# Add population to Guadeloupe
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'Guadeloupe', 'Population'] = 395726
# Add population to Martinique
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'Martinique', 'Population'] = 349925
# Add population to Mayotte
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'Mayotte', 'Population'] = 320901
# Add population to Reunion
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'Reunion', 'Population'] = 885700
# Add population to Saint Barthelemy
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'Saint Barthelemy', 'Population'] = 10967
# Add population to Svalbard and Jan Mayen
countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'Svalbard and Jan Mayen', 'Population'] = 2600

# Change NaNs to '0'
countries_w_pop2['Population'] = countries_w_pop2['Population'].fillna(0)

In [56]:
# Right now the population of France includes its overseas departments and regions
# But we are counting these seperately, so lets subtract these pop numbers from the total
# Calculate the population of France
total_france_pop = countries_w_pop2.loc[countries_w_pop2['COUNTRY'] == 'France', 'Population'].iloc[0]
print(total_france_pop)
# Calculate the total population of the specified overseas territories
territories_population = countries_w_pop2.loc[
    countries_w_pop2['COUNTRY'].isin(['French Guiana', 'Guadeloupe', 'Martinique', 'Mayotte', 'Reunion']),
    'Population'
].sum()
print(territories_population)

# Subtract the territories' population from France's population
adjusted_france_population = total_france_pop - territories_population

# Display the result
print("Adjusted population for France:", adjusted_france_population)
# Add population to Svalbard and Jan Mayen
countries_w_pop2.loc[countries_w_pop2['Location'] == 'France', 'Population'] = adjusted_france_population

68513000.0
2247637.0
Adjusted population for France: 66265363.0


In [57]:
# Check no match again
no_match_pop = countries_w_pop2[countries_w_pop2['Population'].isna()]
print(len(no_match_pop))
no_match_pop

0


Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population


In [58]:
# Take a look at the merged table
countries_w_pop2.head(300)

Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population
0,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314...",Afghanistan,34262840.0
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34....",Akrotiri and Dhekelia,18195.0
2,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5...",Albania,2402113.0
3,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9...",Algeria,46700000.0
4,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080...",American Samoa,49710.0
5,Andorra,AD,Andorra,"POLYGON ((1.7258 42.5044, 1.71149 42.49224, 1....",Andorra,86398.0
6,Angola,AO,Angola,"MULTIPOLYGON (((13.10288 -4.68421, 13.10173 -4...",Angola,35121730.0
7,Anguilla,AI,United Kingdom,"MULTIPOLYGON (((-63.42216 18.59739, -63.42672 ...",Anguilla,15780.0
8,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ...",,0.0
9,Antigua and Barbuda,AG,Antigua and Barbuda,"MULTIPOLYGON (((-61.84592 17.72958, -61.83383 ...",Antigua and Barbuda,103603.0


In [59]:
# # The country boundary shapefile is highly detailed. This map takes a minute or so to draw
# countries_w_pop2.explore('Population')

In [60]:
# It would also be nice to have a column for the official name of all the soveriegn states
# Source: https://en.wikipedia.org/wiki/List_of_sovereign_states
# Converted to CVS once again with https://wikitable2csv.ggor.de/
off_names_path = "GIS Tutorials/Geog-312/Geog-312/1.Geopandas/inputData/List_of_sovereign_states.csv"
off_names = pd.read_csv(off_names_path)
print(len(off_names))
off_names.head(200)

195


Unnamed: 0,Common and formal names,Membership within the UN System,Sovereignty dispute,Further information on status and recognition of sovereignty
0,Afghanistan,A UN member state,A None,"The Islamic Emirate of Afghanistan, the de fac..."
1,Albania – Republic of Albania,A UN member state,A None,
2,Algeria – People's Democratic Republic of Algeria,A UN member state,A None,
3,Andorra – Principality of Andorra,A UN member state,A None,Andorra is a co-principality in which the offi...
4,Angola – Republic of Angola,A UN member state,A None,
5,Antigua and Barbuda,A UN member state,A None,Antigua and Barbuda is a Commonwealth realm wi...
6,Argentina – Argentine Republic,A UN member state,A None,Argentina is a federation of 23 provinces and ...
7,Armenia – Republic of Armenia,A UN member state,Not recognised by Pakistan.,Armenia is not recognised by Pakistan due to t...
8,Australia – Commonwealth of Australia,A UN member state,A None,Australia is a Commonwealth realm and a federa...
9,Austria – Republic of Austria,A UN member state,A None,Member of the European Union. Austria is a fed...


In [64]:
# The'Common and formal names' column is a mess
# Create 'Common Name' and 'Formal Name' columns
off_names['commonName'] = off_names['Common and formal names'].apply(
    lambda x: x.split('–')[0].replace("–", "").strip()
)
off_names['formalName'] = off_names['Common and formal names'].apply(
    lambda x: x.split('–')[1].strip() if '–' in x else x.replace("–", "").strip()
)

# Display the first few rows to verify
print(off_names[['Common and formal names', 'commonName', 'formalName']].head())

                             Common and formal names   commonName  \
0                                        Afghanistan  Afghanistan   
1                      Albania – Republic of Albania      Albania   
2  Algeria – People's Democratic Republic of Algeria      Algeria   
3                  Andorra – Principality of Andorra      Andorra   
4                        Angola – Republic of Angola       Angola   

                                formalName  
0                              Afghanistan  
1                      Republic of Albania  
2  People's Democratic Republic of Algeria  
3                  Principality of Andorra  
4                       Republic of Angola  


In [65]:
# Perform that join again, now that we've modified the originall tables
countries_w_off_names = countries_w_pop2.merge(
    off_names[['commonName', 'formalName']],  # Select only necessary columns
    left_on='COUNTRY', 
    right_on='commonName', 
    how='left'
)

# Drop 'Location' column from the merged result, if no longer needed
#countries_gdf = countries_gdf.drop(columns='Location')

# Show the result
print(len(countries_w_off_names))
countries_w_off_names.head()

248


Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population,commonName,formalName
0,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314...",Afghanistan,34262840.0,Afghanistan,Afghanistan
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34....",Akrotiri and Dhekelia,18195.0,,
2,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5...",Albania,2402113.0,Albania,Republic of Albania
3,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9...",Algeria,46700000.0,Algeria,People's Democratic Republic of Algeria
4,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080...",American Samoa,49710.0,,


In [66]:
countries_w_off_names

Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Location,Population,commonName,formalName
0,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314...",Afghanistan,34262840.0,Afghanistan,Afghanistan
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34....",Akrotiri and Dhekelia,18195.0,,
2,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5...",Albania,2402113.0,Albania,Republic of Albania
3,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9...",Algeria,46700000.0,Algeria,People's Democratic Republic of Algeria
4,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080...",American Samoa,49710.0,,
5,Andorra,AD,Andorra,"POLYGON ((1.7258 42.5044, 1.71149 42.49224, 1....",Andorra,86398.0,Andorra,Principality of Andorra
6,Angola,AO,Angola,"MULTIPOLYGON (((13.10288 -4.68421, 13.10173 -4...",Angola,35121730.0,Angola,Republic of Angola
7,Anguilla,AI,United Kingdom,"MULTIPOLYGON (((-63.42216 18.59739, -63.42672 ...",Anguilla,15780.0,,
8,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ...",,0.0,,
9,Antigua and Barbuda,AG,Antigua and Barbuda,"MULTIPOLYGON (((-61.84592 17.72958, -61.83383 ...",Antigua and Barbuda,103603.0,Antigua and Barbuda,Antigua and Barbuda


In [67]:
# Combing through. Looks like the list misses a few.
# Here are some final edits
# Islamic Emirate of Afghanistan
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Afghanistan', 'formalName'] = 'Islamic Emirate of Afghanistan'
# Commonwealth of The Bahamas
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Bahamas', 'formalName'] = 'Commonwealth of The Bahamas'
# Czech Republic
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Czechia', 'formalName'] = 'Czech Republic'
# Congo, Democratic Republic of the
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Democratic Republic of the Congo', 'formalName'] = 'Congo, Democratic Republic of the'
# Republic of The Gambia
# It really should be "The Gambia", right?!
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Gambia', 'COUNTRY'] = 'The Gambia'
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'The Gambia', 'COUNTRYAFF'] = 'The Gambia'
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'The Gambia', 'formalName'] = 'Republic of The Gambia'
# Hong Kong Special Administrative Region of the People's Republic of China
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Hong Kong', 'formalName'] = "Hong Kong Special Administrative Region of the People's Republic of China"
# Republic of Ireland
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Ireland', 'formalName'] = 'Republic of Ireland'
# Republic of Cote d'Ivoire
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Ivory Coast', 'formalName'] = "Republic of Cote d'Ivoire"
# Republic of Kosovo
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Kosovo', 'formalName'] = 'Republic of Kosovo'
# Macao Special Administrative Region of the People's Republic of China
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Macao', 'formalName'] = "Macao Special Administrative Region of the People's Republic of China"
# Federated States of Micronesia
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Micronesia', 'formalName'] = 'Federated States of Micronesia'
# Congo, Republic of the
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Republic of the Congo', 'formalName'] = 'Congo, Republic of the'
# Republic of China (ROC)
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Taiwan', 'formalName'] = 'Republic of China (ROC)'
# Democratic Republic of Timor-Leste
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Timor Leste', 'formalName'] = 'Democratic Republic of Timor-Leste'
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Timor Leste', 'COUNTRYAFF'] = 'Timor Leste'
# Republic of Turkiye
countries_w_off_names.loc[countries_w_off_names['COUNTRY'] == 'Turkey', 'formalName'] = 'Republic of Turkiye'

In [68]:
# Now we can drop superfluous colums
countries_w_off_names = countries_w_off_names.drop(columns=['Location'])
countries_w_off_names = countries_w_off_names.drop(columns=['commonName'])

In [69]:
countries_w_off_names.dtypes


COUNTRY         object
ISO             object
COUNTRYAFF      object
geometry      geometry
Population     float64
formalName      object
dtype: object

In [70]:
countries_w_off_names['Population'] = countries_w_off_names['Population'].astype('int64')

In [71]:
# # Take one last look
countries_w_off_names

Unnamed: 0,COUNTRY,ISO,COUNTRYAFF,geometry,Population,formalName
0,Afghanistan,AF,Afghanistan,"POLYGON ((74.88986 37.23409, 74.88962 37.23314...",34262840,Islamic Emirate of Afghanistan
1,Akrotiri and Dhekelia,,United Kingdom,"MULTIPOLYGON (((32.8388 34.70555, 32.84127 34....",18195,
2,Albania,AL,Albania,"MULTIPOLYGON (((20.0789 42.5558, 20.07939 42.5...",2402113,Republic of Albania
3,Algeria,DZ,Algeria,"MULTIPOLYGON (((8.64188 36.94206, 8.64196 36.9...",46700000,People's Democratic Republic of Algeria
4,American Samoa,AS,United States,"MULTIPOLYGON (((-171.07753 -11.06622, -171.080...",49710,
5,Andorra,AD,Andorra,"POLYGON ((1.7258 42.5044, 1.71149 42.49224, 1....",86398,Principality of Andorra
6,Angola,AO,Angola,"MULTIPOLYGON (((13.10288 -4.68421, 13.10173 -4...",35121734,Republic of Angola
7,Anguilla,AI,United Kingdom,"MULTIPOLYGON (((-63.42216 18.59739, -63.42672 ...",15780,
8,Antarctica,AQ,,"MULTIPOLYGON (((-46.15775 -60.51078, -46.1787 ...",0,
9,Antigua and Barbuda,AG,Antigua and Barbuda,"MULTIPOLYGON (((-61.84592 17.72958, -61.83383 ...",103603,Antigua and Barbuda


In [72]:
countries_w_off_names.to_file("GIS Tutorials/Geog-312/geopandas_Files/checkpoint2/countries_w_off_names.shp", driver='ESRI Shapefile')