In [1]:
# SQL Master Database

In [2]:
### Import Dependencies

In [3]:
import os
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import text

In [4]:
# Define the destination folder and database path
destination_folder = 'data'
database_filename = 'master.sqlite'
destination_path = os.path.join(destination_folder, database_filename)

# Create an SQLite engine
engine = create_engine(f'sqlite:///{destination_path}')

In [5]:
# Load CSVs into DataFrames
tree_loss = pd.read_csv('data/tree_loss_clean.csv', low_memory = False)
tree_gain = pd.read_csv('data/tree_gain_clean.csv', low_memory = False)
qcl = pd.read_csv('data/faostat/QCL_clean.csv')
rl = pd.read_csv('data/faostat/RL_clean.csv')

# Check the DataFrame
print(tree_loss.head())
print(tree_gain.head())
print(qcl.head())
print(rl.head())

             country_old  threshold      area  extent_2000  extent_2010  \
0            Afghanistan         30  64385715       205791        71797   
1            Afghanistan         50  64385715       148430        46242   
2            Afghanistan         75  64385715        75486        18270   
3  Akrotiri and Dhekelia         30     23448          456          383   
4  Akrotiri and Dhekelia         50     23448          336          238   

   gain_2000-2020  2001  2002  2003  2004  ...  emissions_2020  \
0           10741    88   179   244   201  ...         11387.0   
1           10741    78   135   200   159  ...         10299.0   
2           10741    47    61    96    61  ...          6438.0   
3              42     2     1     0     2  ...           321.0   
4              42     1     1     0     2  ...            81.0   

   emissions_2021  emissions_2022  emissions_2023  subnational1  iso  \
0          6746.0          1908.0          3296.0           NaN  AND   
1       

In [6]:
# Load DataFrames
tree_loss.to_sql('tree_loss', con=engine, if_exists='replace', index=False)
tree_gain.to_sql('tree_gain', con=engine, if_exists='replace', index=False)
qcl.to_sql('qcl', con=engine, if_exists='replace', index=False)
rl.to_sql('rl', con=engine, if_exists='replace', index=False)

# Verify tables
tree_loss_from_db = pd.read_sql('SELECT * FROM tree_loss', con=engine)
tree_gain_from_db = pd.read_sql('SELECT * FROM tree_gain', con=engine)
qcl_from_db = pd.read_sql('SELECT * FROM qcl', con=engine)
rl_from_db = pd.read_sql('SELECT * FROM rl', con=engine)

# Check the data loaded
print(tree_loss_from_db.head())
print(tree_gain_from_db.head())
print(qcl_from_db.head())
print(rl_from_db.head())

             country_old  threshold      area  extent_2000  extent_2010  \
0            Afghanistan         30  64385715       205791        71797   
1            Afghanistan         50  64385715       148430        46242   
2            Afghanistan         75  64385715        75486        18270   
3  Akrotiri and Dhekelia         30     23448          456          383   
4  Akrotiri and Dhekelia         50     23448          336          238   

   gain_2000-2020  2001  2002  2003  2004  ...  emissions_2020  \
0           10741    88   179   244   201  ...         11387.0   
1           10741    78   135   200   159  ...         10299.0   
2           10741    47    61    96    61  ...          6438.0   
3              42     2     1     0     2  ...           321.0   
4              42     1     1     0     2  ...            81.0   

   emissions_2021  emissions_2022  emissions_2023  subnational1  iso  \
0          6746.0          1908.0          3296.0          None  AND   
1       