In [56]:
import os

In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import country_converter as coco

In [58]:
# Load the Global Forest Watch dataset
file_path = './data/treecover_gain.csv'
tree_gain = pd.read_csv(file_path, low_memory=False)

# Display the first 10 rows of the dataset
tree_gain.head(10)

Unnamed: 0,iso,umd_tree_cover_gain__ha,umd_tree_cover_extent_2000__ha
0,ABW,19.23964,18197.51
1,AFG,10741.02,64385720.0
2,AGO,1224126.0,124742300.0
3,AIA,69.88141,8332.248
4,ALA,2582.831,150643.3
5,ALB,16472.16,2873537.0
6,AND,120.0824,45196.41
7,ARE,595.953,7120448.0
8,ARG,1107735.0,278010100.0
9,ARM,12255.93,2969009.0


In [59]:
iso3_codes = tree_gain['iso']
country = coco.convert(names=iso3_codes, to='name_short')
print(country)

XAD not found in ISO3
XCA not found in ISO3
XCL not found in ISO3
XKO not found in ISO3
XNC not found in ISO3
XPI not found in ISO3
XSP not found in ISO3


['Aruba', 'Afghanistan', 'Angola', 'Anguilla', 'Aland Islands', 'Albania', 'Andorra', 'United Arab Emirates', 'Argentina', 'Armenia', 'French Southern Territories', 'Antigua and Barbuda', 'Australia', 'Austria', 'Azerbaijan', 'Burundi', 'Belgium', 'Benin', 'Bonaire, Saint Eustatius and Saba', 'Burkina Faso', 'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas', 'Bosnia and Herzegovina', 'St. Barths', 'Belarus', 'Belize', 'Bermuda', 'Bolivia', 'Brazil', 'Barbados', 'Brunei Darussalam', 'Bhutan', 'Botswana', 'Central African Republic', 'Canada', 'Switzerland', 'Chile', 'China', "Cote d'Ivoire", 'Cameroon', 'DR Congo', 'Congo Republic', 'Colombia', 'Comoros', 'Cabo Verde', 'Costa Rica', 'Cuba', 'Curacao', 'Cayman Islands', 'Cyprus', 'Czechia', 'Germany', 'Djibouti', 'Dominica', 'Denmark', 'Dominican Republic', 'Algeria', 'Ecuador', 'Egypt', 'Eritrea', 'Western Sahara', 'Spain', 'Estonia', 'Ethiopia', 'Finland', 'Fiji', 'Falkland Islands', 'France', 'Faroe Islands', 'Micronesia, Fed. Sts.', 'Gab

In [67]:
# Add the new countries column to the tree_gain DataFrame
tree_gain['country'] = country

# Reorder columns to make 'countries' the first column
columns_order = ['country'] + [col for col in tree_gain.columns if col != 'country']
tree_gain = tree_gain[columns_order]
tree_gain

Unnamed: 0,country,iso,umd_tree_cover_gain__ha,umd_tree_cover_extent_2000__ha
0,Aruba,ABW,1.923964e+01,1.819751e+04
1,Afghanistan,AFG,1.074102e+04,6.438572e+07
2,Angola,AGO,1.224126e+06,1.247423e+08
3,Anguilla,AIA,6.988141e+01,8.332248e+03
4,Aland Islands,ALA,2.582831e+03,1.506433e+05
...,...,...,...,...
231,not found,XSP,0.000000e+00,1.881529e+02
232,Yemen,YEM,7.046677e+03,4.522090e+07
233,South Africa,ZAF,3.882409e+05,1.219710e+08
234,Zambia,ZMB,7.681568e+05,7.504920e+07


In [68]:
# Rename the column
tree_gain_edit = tree_gain.rename(columns={'umd_tree_cover_gain__ha': 'tree_cover_gain_1000_ha'})

# Divide the entire column by 1,000,000,000 to convert to billions
tree_gain_edit['tree_cover_gain_1000_ha'] /= 1000

# Convert the 'tree_cover_gain_b' column to standard decimal format (not scientific notation)
tree_gain_edit['tree_cover_gain_1000_ha'] = tree_gain_edit['tree_cover_gain_1000_ha'].map(lambda x: f"{x:.6f}").astype(float)

tree_gain_edit.head()

Unnamed: 0,country,iso,tree_cover_gain_1000_ha,umd_tree_cover_extent_2000__ha
0,Aruba,ABW,0.01924,18197.51
1,Afghanistan,AFG,10.741024,64385720.0
2,Angola,AGO,1224.126467,124742300.0
3,Anguilla,AIA,0.069881,8332.248
4,Aland Islands,ALA,2.582831,150643.3


In [69]:
tree_gain_edit.describe()

Unnamed: 0,tree_cover_gain_1000_ha,umd_tree_cover_extent_2000__ha
count,236.0,236.0
mean,554.468964,56248220.0
std,2893.601691,173230600.0
min,0.0,49.90083
25%,0.346805,313094.1
50%,19.157199,7666780.0
75%,209.625047,39290750.0
max,37220.539179,1689455000.0


In [70]:
output_dir = 'data'

# Export the DataFrame to a CSV file
tree_gain_edit.to_csv(os.path.join(output_dir, 'tree_gain_clean.csv'), index=False)