# Library and Setup

In [28]:
import pandas as pd # Data management tools
import numpy as np # Mathematical operations
import psycopg2 # Access to SQL
import re # Regex and other lookup tools

In [29]:
def fetch_table_to_df(conn, query):
    """"Extracts entire table(s) from an SQL database. 
    
    conn: Should be a connect function from psycopg2.
    query: Tables to be extracted from the database.

    """
    cur = conn.cursor()
    cur.execute(query)
    rows = cur.fetchall()
    column_names = [desc[0] for desc in cur.description]
    df = pd.DataFrame(rows, columns=column_names).replace({np.nan}, None)
    cur.close()
    return df

In [30]:
conn = psycopg2.connect(
    database="testing",
    user="postgres",
    password="postgres",
    host="localhost",
    port="5432"
)

In [31]:
tables = "tree_monitorings", "measurement_informations", "biomass_formulas", "tree_biomasses", 'taxonomies' 
for table in tables:
    table_name = f"{table}"
    globals()[table_name] = fetch_table_to_df(conn, f"SELECT * FROM {table}")

In [32]:
conn.close()

# Preparation

Python supports $pow()$, only need to remove $ sign from the equation

In [33]:
#replacements = {'pow' : '', ',' : '**'}
#biomass_formulas['formula_python'] = biomass_formulas['formula'].replace(replacements, regex = True).str.replace('$','')

biomass_formulas['formula_python'] = biomass_formulas['formula'].str.replace('$','') #Simpler, pow() exists in python as well

Changing data types for superfluous operations

In [34]:
tree_monitorings[['tree_height', 'tree_dbh']] = tree_monitorings[['tree_height', 'tree_dbh']].astype('float')
taxonomies['taxonomy_id'] = taxonomies['taxonomy_id'].astype('int')
taxonomies['wood_density'] = taxonomies['wood_density'].astype('float')
tree_monitorings['taxonomy_id'] = tree_monitorings['taxonomy_id'].astype('int')

Current data shows combination of hasHeight+noDBH, noHeight+noDBH, hasHeight+hasDBH but no noHeight+hasDBH

In [35]:
tree_monitorings[['tree_height','tree_dbh']].notna().value_counts()

tree_height  tree_dbh
True         False       6301
False        False       4193
True         True        3234
Name: count, dtype: int64

Standardizing tree_species names (unfinished, need to deal with NAs)

In [36]:
tree_species = ['Suren', 'Kopi Liberika', 'Citrus', 'Casuarina','Other', 'Meranti','Soursop','Gaharu','Mango','Rosewood','Orange','Tengkurung','Durian','Cajuput','Jackfruit','Rambutan','Clove','Coffee', 'Lamtoro','Meranti Bakau', 'Meranti Bunga', 'Asam Gelugur','Avocado','Cempedak', 'Asam', 'Tampui', 'Sirsak', 'Kuras','Bitterbean','Mentangor']

In [37]:

#x = [next(iter(x), np.nan) 
#          for x in map(lambda x: difflib.get_close_matches(x, tree_species, cutoff = 0.7), tree_monitorings['tree_species']) if x]

Adding column of biomass_formula_id for simpler referencing in upcoming operations

In [38]:
dbh_exist = tree_monitorings['tree_dbh'].notna()
height_exist = tree_monitorings['tree_height'].notna()

trees_wood_density = [taxonomies.loc[taxonomies['taxonomy_id'] == tax_id, 'wood_density'].values[0] 
                if tax_id in taxonomies['taxonomy_id'].values else None
                for tax_id in tree_monitorings['taxonomy_id']]

In [39]:
for index, (dbh, height, wood) in enumerate(zip(dbh_exist, height_exist, trees_wood_density)):
    if dbh == 1 and height == 1:
        if (wood != wood or wood == None):
            tree_monitorings.loc[index, 'biomass_formulas_id'] = 7
        else:
            tree_monitorings.loc[index, 'biomass_formulas_id'] = 3
    elif (dbh == 1 and height == 0):
        tree_monitorings.loc[index, 'biomass_formulas_id'] = 5
    elif (dbh == 0 and height == 1):
        tree_monitorings.loc[index, 'biomass_formulas_id'] = 4
    else:
        tree_monitorings.loc[index, 'biomass_formulas_id'] = None

In [40]:
tree_monitorings.biomass_formulas_id.value_counts(dropna= False)

biomass_formulas_id
4.0    6301
NaN    4193
7.0    3178
3.0      56
Name: count, dtype: int64

Filtering data from measurement_information which also exist in tree_monitorings

In [41]:
type1_measurement = measurement_informations[(measurement_informations['monitoring_id'].isin(tree_monitorings['id'])== True) & measurement_informations['monitoring_type'] == 1]

Calculation of biomass, still unoptimized

In [42]:
taxonomy_dict = taxonomies.set_index('taxonomy_id')['wood_density'].to_dict() #Set wood identity for referral from taxonomy_id

biomass_index = [] #empty list for storing valid biomass ids
result = [] #empty list for storing results
for index_used in type1_measurement['monitoring_id']:
    tree_dbh     = tree_monitorings.loc[tree_monitorings['id'] == index_used,'tree_dbh'].values[0]
    tree_height  = tree_monitorings.loc[tree_monitorings['id'] == index_used,'tree_height'].values[0]
    biomass_id   = tree_monitorings.loc[tree_monitorings['id'] == index_used,'biomass_formulas_id'].values[0]
    tax_id       = tree_monitorings.loc[tree_monitorings['id'] == index_used,'taxonomy_id'].values[0]
    wood_density = taxonomy_dict.get(tax_id)
    if (biomass_id != biomass_id or biomass_id == None):
        continue
    else:
        biomass_form = biomass_formulas.loc[biomass_formulas['id'] == biomass_id, 'formula_python']
        for biomass in biomass_form:
            biomass_result = round(eval(biomass),3)
            result.append(biomass_result)
            biomass_index.append(index_used)

# Creating Biomass Table/Dataframe

In [43]:
tree_biomasses_python = type1_measurement[type1_measurement['monitoring_id'].isin(biomass_index) == True]

In [44]:
tree_biomasses_python = tree_biomasses_python.drop(['id','timenow','start', 'end','username','notes','_xform_id','_xform_id_string','monitoring_order','month_monitoring','phase'], axis = 1)


In [45]:
tree_biomasses_python['result'] = result

In [46]:
tree_biomasses_python['taxonomy_id'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'taxonomy_id'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [47]:
tree_biomasses_python['tree_species'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'tree_species'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [48]:
tree_biomasses_python['tree_id'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'tree_id'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [49]:
tree_biomasses_python['submission_time'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'submission_time'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [50]:
import datetime
tree_biomasses_python['updated_at'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')


In [51]:
carbon = []
for res in result:
    if (res != res or res == None):
        continue
    else:
        carbon.append(res * 0.5 * 3.67) # biomass * 0.5 * 3.67
tree_biomasses_python['carbon'] = carbon

In [52]:
tree_biomasses_python['id'] = range(1,len(tree_biomasses_python)+1)

In [53]:
tree_biomasses_python = tree_biomasses_python[['id','monitoring_id','tree_id','tree_species','taxonomy_id','result','carbon','date_monitoring','submission_time','created_at','updated_at']]
tree_biomasses_python

Unnamed: 0,id,monitoring_id,tree_id,tree_species,taxonomy_id,result,carbon,date_monitoring,submission_time,created_at,updated_at
1653,1,62,AA01T0180,Meranti,5,0.000,0.000000,2018-04-23,2019-03-29 14:36:00,2021-08-01 20:19:02,2024-11-14 11:27:14
1686,2,1,AA01T0001,Gaharu,4,0.031,0.056885,2018-02-21,2019-03-29 14:02:38,2021-08-01 20:19:01,2024-11-14 11:27:14
1793,3,30838,BB-01-0-0024,Suren,64,0.295,0.541325,2024-08-15,2024-09-11 00:00:00,2024-09-11 18:13:55,2024-11-14 11:27:14
1794,4,30839,BB-01-0-0026,Suren,64,0.144,0.264240,2024-08-15,2024-09-11 00:00:00,2024-09-11 18:13:55,2024-11-14 11:27:14
1795,5,30840,BB-01-0-0027,Suren,64,0.329,0.603715,2024-08-15,2024-09-11 00:00:00,2024-09-11 18:13:55,2024-11-14 11:27:14
...,...,...,...,...,...,...,...,...,...,...,...
18791,10581,36980,CL-10-0-1843,suren,64,0.473,0.867955,2023-08-21,2024-09-11 00:00:00,2024-09-11 21:22:56,2024-11-14 11:27:14
18792,10582,36981,CL-10-0-1844,suren,64,0.577,1.058795,2023-08-21,2024-09-11 00:00:00,2024-09-11 21:22:56,2024-11-14 11:27:14
18793,10583,36982,CL-10-0-1845,suren,64,0.944,1.732240,2023-08-21,2024-09-11 00:00:00,2024-09-11 21:22:56,2024-11-14 11:27:14
18794,10584,36983,CL-10-0-1846,suren,64,0.225,0.412875,2023-08-21,2024-09-11 00:00:00,2024-09-11 21:22:56,2024-11-14 11:27:14


In [54]:
tree_biomasses

Unnamed: 0,id,monitoring_id,tree_id,tree_species,taxonomy_id,result,carbon,date_monitoring,submission_time,created_at,updated_at
0,1,32192,BB-06-0-1981,Mango,63,0.13072,0.06536,2024-08-15,2024-09-11 00:00:00,2024-09-11 22:42:35,2024-09-11 22:42:35
1,2,1253,P-1-051880062,Jelutung,17,,,2019-08-25,2021-07-28 00:00:00,2024-09-11 22:42:35,2024-09-11 22:42:35
2,3,2064,P-1-051880063,Jelutung,17,,,2020-09-19,2021-07-28 00:00:00,2024-09-11 22:42:35,2024-09-11 22:42:35
3,4,1254,P-1-051880063,Jelutung,17,,,2019-08-25,2021-07-28 00:00:00,2024-09-11 22:42:35,2024-09-11 22:42:35
4,5,2065,P-1-051880064,Jelutung,17,,,2020-09-19,2021-07-28 00:00:00,2024-09-11 22:42:35,2024-09-11 22:42:35
...,...,...,...,...,...,...,...,...,...,...,...
10639,10640,37979,CL-10-0-1844,Suren,64,,,2024-08-19,2024-09-11 00:00:00,2024-09-11 22:42:55,2024-09-11 22:42:55
10640,10641,37980,CL-10-0-1845,Suren,64,,,2024-08-19,2024-09-11 00:00:00,2024-09-11 22:42:55,2024-09-11 22:42:55
10641,10642,37981,CL-10-0-1846,Suren,64,,,2024-08-19,2024-09-11 00:00:00,2024-09-11 22:42:55,2024-09-11 22:42:55
10642,10643,37982,CL-10-0-1847,Suren,64,,,2024-08-19,2024-09-11 00:00:00,2024-09-11 22:42:55,2024-09-11 22:42:55
