# 5. Getting representative 1sqkm squares

On step #3, we computed 1sqkm squares that cover the entirety of each city (with some degree of overlap), as well as their level of tree coverage. On step #4, we computed how many people live in areas with different levels of tree coverage. 

On this notebook, we will select 'representative' squares of the most populated tree levels in each city. This way, we can use them as a way of visualizing how green an area looks like using satellite imagery.

In [2]:
# Importing packages
import geopandas as gpd
import joypy
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
tqdm.pandas()

## 5.1 Selecting representative squares – where most people live

In [3]:
# First, let's load the hexagon information we computed in the previous step
hexbin_levels = pd.read_csv("../output/population-by-tree-coverage-bin.csv")

In [4]:
# Since we saved it as csv, the bounds are now in text and not in numpy objects.
# Let's solve that by extracting upper and lower bounds using a regular expression.
hexbin_levels[['tr_pct_bin_lower', 'tr_pct_bin_upper']] = hexbin_levels['tr_pct_bin'].str.extract(r'\[(.*),(.*)\)').astype(float)

In [5]:
# Now, for each city, let's keep only the row with the most populated tree coverage bucket.
most_populated = hexbin_levels.loc[hexbin_levels.groupby(['city_id']).pop_ft_pct.idxmax()]

In [6]:
# Taking a look at what we have:
most_populated.sort_values(by='tr_pct_bin', ascending=False)

Unnamed: 0,city_id,tr_pct_bin,pop_ft,pop_ft_pct,UC_NM_MN,CTR_MN_NM,CTR_MN_ISO,continent,INCM_CMI,tr_pct_bin_lower,tr_pct_bin_upper
3260,8835.0,"[0.38, 0.4)",2.327518e+05,0.051525,Colombo,Sri Lanka,LKA,Asia,LMIC,0.38,0.40
492,530.0,"[0.36, 0.38)",2.542809e+04,0.082163,Nashville,United States,USA,Americas,HIC,0.36,0.38
2614,3675.0,"[0.34, 0.36)",8.077239e+05,0.054239,Moscow,Russia,RUS,Europe,UMIC,0.34,0.36
788,855.0,"[0.3, 0.32)",1.845908e+05,0.050220,Washington D.C.,United States,USA,Americas,HIC,0.30,0.32
1853,2973.0,"[0.28, 0.3)",1.058625e+05,0.070918,Stockholm,Sweden,SWE,Europe,HIC,0.28,0.30
...,...,...,...,...,...,...,...,...,...,...,...
3034,5635.0,"[0.0, 0.02)",2.857980e+06,0.893468,Sana'a,Yemen,YEM,Asia,LMIC,0.00,0.02
1167,1502.0,"[0.0, 0.02)",1.254795e+06,0.503607,Conakry,Guinea,GIN,Africa,LIC,0.00,0.02
3053,5737.0,"[0.0, 0.02)",8.272488e+06,0.655313,Tehran,Iran,IRN,Asia,UMIC,0.00,0.02
3079,5915.0,"[0.0, 0.02)",3.523200e+06,0.919489,Dubai,United Arab Emirates,ARE,Asia,HIC,0.00,0.02


In [8]:
# Now, let's read in the 1sqkm squares
squares = gpd.read_file("../output/earth_engine_exports/TreeCoverAnalysis-Boxes-10-09-24-09_49/")

In [9]:
squares.head()

Unnamed: 0,tree_area,pop_ft,tree_pct,ft_area,lon,hexagon_n,lat,city_id,geometry
0,1089.635103,2752.569644,0.001088,1001255.0,-17.50108,0,14.75488,1452.0,"POLYGON ((-17.50569 14.75038, -17.50569 14.759..."
1,4387.9397,13226.590721,0.004382,1001255.0,-17.422321,1,14.744907,1452.0,"POLYGON ((-17.42694 14.74041, -17.42694 14.749..."
2,96.148115,15852.254603,9.6e-05,1001255.0,-17.465827,2,14.728914,1452.0,"POLYGON ((-17.47044 14.72442, -17.47044 14.733..."
3,161620.824962,2505.278298,0.161418,1001255.0,-17.339451,3,14.75589,1452.0,"POLYGON ((-17.34407 14.75139, -17.34407 14.760..."
4,0.0,0.0,0.0,1001255.0,-17.422019,4,14.717225,1452.0,"POLYGON ((-17.42663 14.71273, -17.42663 14.721..."


In [10]:
# Let's merge the squares with the hexagon data.
# Now, for each square, I also have information on the 
# upper and lower bounds of a representative entry for the city.
most_populated_squares = squares.merge(
    most_populated[['city_id', 'UC_NM_MN', 'CTR_MN_NM', 'CTR_MN_ISO', 'tr_pct_bin', 
                    'tr_pct_bin_lower', 'tr_pct_bin_upper', 'pop_ft_pct']], 
    on='city_id')

In [14]:
# Let's rename the columns to avoid confusion.
most_populated_squares = most_populated_squares.rename(columns={"tr_pct_bin_lower": "city_lower_bound", 
                                                                "tr_pct_bin_upper": "city_upper_bound"})

In [16]:
# Now, let's keep only the squares that are representative of each city most
# populates bucket. That is, those that fall within the lower and upper bounds.
most_populated_squares = most_populated_squares[
    (most_populated_squares.tree_pct >= most_populated_squares.city_lower_bound) 
    & (most_populated_squares.tree_pct < most_populated_squares.city_upper_bound)
]

In [18]:
# And, finally, we can keep only the most populated squares of each city.
most_populated_squares = most_populated_squares.loc[most_populated_squares.groupby('city_id').pop_ft.idxmax()]

In [19]:
most_populated_squares.sort_values(by='tree_pct', ascending=False)

Unnamed: 0,tree_area,pop_ft,tree_pct,ft_area,lon,hexagon_n,lat,city_id,geometry,UC_NM_MN,CTR_MN_NM,CTR_MN_ISO,tr_pct_bin,city_lower_bound,city_upper_bound,pop_ft_pct
74420,384295.695035,6159.254380,0.383814,1.001256e+06,79.905287,12,6.757942,8835.0,"POLYGON ((79.90079 6.75344, 79.90079 6.76244, ...",Colombo,Sri Lanka,LKA,"[0.38, 0.4)",0.38,0.40,0.051525
130000,363718.119223,3635.774753,0.363262,1.001255e+06,-86.668807,88,36.042443,530.0,"POLYGON ((-86.67433 36.03795, -86.67433 36.046...",Nashville,United States,USA,"[0.36, 0.38)",0.36,0.38,0.082163
114427,342488.638464,30981.637349,0.342059,1.001255e+06,37.731962,2378,55.612282,3675.0,"POLYGON ((37.72406 55.60778, 37.72406 55.61678...",Moscow,Russia,RUS,"[0.34, 0.36)",0.34,0.36,0.054239
143888,318475.873162,7558.978396,0.318077,1.001255e+06,-76.976787,1257,39.005578,855.0,"POLYGON ((-76.98253 39.00108, -76.98253 39.010...",Washington D.C.,United States,USA,"[0.3, 0.32)",0.30,0.32,0.050220
106609,297527.184368,5006.468970,0.297154,1.001255e+06,18.099273,339,59.285554,2973.0,"POLYGON ((18.09054 59.28106, 18.09054 59.29005...",Stockholm,Sweden,SWE,"[0.28, 0.3)",0.28,0.30,0.070918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22615,94.880913,33931.503111,0.000095,1.001255e+06,44.531034,821,33.439875,5314.0,"POLYGON ((44.52569 33.43538, 44.52569 33.44437...",Baghdad,Iraq,IRQ,"[0.0, 0.02)",0.00,0.02,0.818284
1762,34.894459,100249.583266,0.000035,1.001255e+06,-7.493542,356,33.589053,1575.0,"POLYGON ((-7.49890 33.58455, -7.49890 33.59355...",Casablanca,Morocco,MAR,"[0.0, 0.02)",0.00,0.02,0.863899
6864,0.000000,67521.060366,0.000000,1.001256e+06,3.335577,1291,6.466243,2125.0,"POLYGON ((3.33109 6.46175, 3.33109 6.47074, 3....",Lagos,Nigeria,NGA,"[0.0, 0.02)",0.00,0.02,0.868962
18098,0.000000,15382.548084,0.000000,1.001255e+06,32.425066,283,15.653556,4335.0,"POLYGON ((32.42043 15.64906, 32.42043 15.65806...",Khartoum,Sudan,SDN,"[0.0, 0.02)",0.00,0.02,0.963118


In [20]:
# With this information, we can now query Google Earth Engine to generate 
# geotiff images showing the tree coverage in each of this areas.
# However, we will do this at a later step.
# For now, let's save this information as a basic CSV that we can access later.
# Notice that hexagon_n refers to the hexagon the square was created from at step 3.
most_populated_squares = most_populated_squares[['city_id', 'hexagon_n', 'UC_NM_MN', 'CTR_MN_NM', 'CTR_MN_ISO',
                                                 'tree_pct', 'pop_ft_pct', 'tr_pct_bin', 'geometry']]
most_populated_squares.to_csv("../output/representative-squares-most-populated.csv", index=False)

In [21]:
# Let's also save it as a GeoJSON that we can directly upload and visualize.
most_populated_squares['name'] = most_populated_squares.UC_NM_MN + ' (' + most_populated_squares.CTR_MN_NM + ')'
most_populated_squares.to_file("../output/representative-squares-most-populated.json")

In [24]:
# A sneak peek at the results
most_populated_squares.sort_values(by='tree_pct', ascending=False).head(10)

Unnamed: 0,city_id,hexagon_n,UC_NM_MN,CTR_MN_NM,CTR_MN_ISO,tree_pct,pop_ft_pct,tr_pct_bin,geometry,name
74420,8835.0,12,Colombo,Sri Lanka,LKA,0.383814,0.051525,"[0.38, 0.4)","POLYGON ((79.90079 6.75344, 79.90079 6.76244, ...",Colombo (Sri Lanka)
130000,530.0,88,Nashville,United States,USA,0.363262,0.082163,"[0.36, 0.38)","POLYGON ((-86.67433 36.03795, -86.67433 36.046...",Nashville (United States)
114427,3675.0,2378,Moscow,Russia,RUS,0.342059,0.054239,"[0.34, 0.36)","POLYGON ((37.72406 55.60778, 37.72406 55.61678...",Moscow (Russia)
143888,855.0,1257,Washington D.C.,United States,USA,0.318077,0.05022,"[0.3, 0.32)","POLYGON ((-76.98253 39.00108, -76.98253 39.010...",Washington D.C. (United States)
106609,2973.0,339,Stockholm,Sweden,SWE,0.297154,0.070918,"[0.28, 0.3)","POLYGON ((18.09054 59.28106, 18.09054 59.29005...",Stockholm (Sweden)
155303,1074.0,384,Asuncion,Paraguay,PRY,0.276011,0.108538,"[0.26, 0.28)","POLYGON ((-57.47996 -25.17201, -57.47996 -25.1...",Asuncion (Paraguay)
105339,2851.0,655,Berlin,Germany,DEU,0.255156,0.085781,"[0.24, 0.26)","POLYGON ((13.44281 52.48183, 13.44281 52.49083...",Berlin (Germany)
91473,1045.0,8,Portland,United States,USA,0.243641,0.095134,"[0.24, 0.26)","POLYGON ((-70.29398 43.67819, -70.29398 43.687...",Portland (United States)
114816,183.0,51,Anchorage,United States,USA,0.239431,0.132741,"[0.22, 0.24)","POLYGON ((-149.95900 61.18427, -149.95900 61.1...",Anchorage (United States)
137456,651.0,1381,Indianapolis,United States,USA,0.236695,0.065662,"[0.22, 0.24)","POLYGON ((-86.25929 39.81067, -86.25929 39.819...",Indianapolis (United States)
