# Library and Setup

In [67]:
import pandas as pd # Data management tools
import numpy as np # Mathematical operations
import psycopg2 # Access to SQL
import re # Regex and other lookup tools
import datetime
from sqlalchemy import create_engine

In [24]:
def fetch_table_to_df(conn, query):
    """"Extracts entire table(s) from an SQL database. 
    
    conn: Should be a connect function from psycopg2.
    query: Tables to be extracted from the database.

    """
    cur = conn.cursor()
    cur.execute(query)
    rows = cur.fetchall()
    column_names = [desc[0] for desc in cur.description]
    df = pd.DataFrame(rows, columns=column_names).replace({np.nan}, None)
    cur.close()
    return df

In [25]:
conn = psycopg2.connect(
    database="testing",
    user="postgres",
    password="forest25",
    host="localhost",
    port="5432"
)

In [26]:
tables = "tree_monitorings", "measurement_informations", "biomass_formulas", "tree_biomasses", 'taxonomies', 'tree_samplings', 'plot_informations', "site_informations"
for table in tables:
    table_name = f"{table}"
    globals()[table_name] = fetch_table_to_df(conn, f"SELECT * FROM {table}")

# Biomass Calculation

# Process

Python supports $pow()$, only need to remove $ sign from the equation

In [27]:
#replacements = {'pow' : '', ',' : '**'}
#biomass_formulas['formula_python'] = biomass_formulas['formula'].replace(replacements, regex = True).str.replace('$','')

biomass_formulas['formula_python'] = biomass_formulas['formula'].str.replace('$','') #Simpler, pow() exists in python as well

Changing data types for smooth operations

In [28]:
tree_monitorings[['tree_height', 'tree_dbh']] = tree_monitorings[['tree_height', 'tree_dbh']].astype('float')
taxonomies['id'] = taxonomies['id'].astype('int')
taxonomies['wood_density'] = taxonomies['wood_density'].astype('float')
tree_monitorings['taxonomy_id'] = tree_monitorings['taxonomy_id'].astype('int')

Current data shows combination of hasHeight+noDBH, noHeight+noDBH, hasHeight+hasDBH but no noHeight+hasDBH

## Suggested tree_monitoring changes

In [29]:
tree_species_replace = {65  :'Suren', 
                        7   :'Kopi Liberika', 
                        70  :'Citrus', 
                        72  :'Casuarina',
                        0   :'Other', 
                        5   :'Meranti',
                        9   :'Soursop',
                        4   :'Gaharu',
                        63  :'Mango',
                        85  :'Rosewood',
                        78  :'Orange',
                        3   :'Tengkurung',
                        10  :'Durian',
                        65  :'Cajuput',
                        68  :'Jackfruit',
                        66  :'Rambutan',
                        71  :'Clove',
                        80  :'Coffee',
                        61  :'Lamtoro',
                        12  :'Meranti Bakau', 
                        6   :'Meranti Bunga', 
                        13  :'Asam',
                        67  :'Avocado',
                        14  :'Cempedak',
                        8   :'Tampui', 
                        9   :'Sirsak', 
                        999 :'Kuras',
                        69  :'Bitterbean',
                        11  :'Mentangor'}

In [30]:
tree_spec = []
for index_used in tree_monitorings['taxonomy_id']:
    res = tree_species_replace.get(index_used)
    tree_spec.append(res)


In [31]:
tree_monitorings['tree_species'] = tree_spec

## Full Census Biomass Calculation

Adding column of biomass_formula_id for simpler referencing in upcoming operations

In [32]:
dbh_exist = tree_monitorings['tree_dbh'].notna() # check which is not NA
height_exist = tree_monitorings['tree_height'].notna() # check which is not NA

trees_wood_density = [taxonomies.loc[taxonomies['id'] == tax_id, 'wood_density'].values[0] 
                if tax_id in taxonomies['id'].values else None
                for tax_id in tree_monitorings['taxonomy_id']]

In [33]:
for index, (dbh, height, wood) in enumerate(zip(dbh_exist, height_exist, trees_wood_density)):
    if dbh == 1 and height == 1:
        if (wood != wood or wood == None):
            tree_monitorings.loc[index, 'biomass_formulas_id'] = 7
        else:
            tree_monitorings.loc[index, 'biomass_formulas_id'] = 3
    elif (dbh == 1 and height == 0):
        tree_monitorings.loc[index, 'biomass_formulas_id'] = 5
    elif (dbh == 0 and height == 1):
        tree_monitorings.loc[index, 'biomass_formulas_id'] = 4
    else:
        tree_monitorings.loc[index, 'biomass_formulas_id'] = None

Filtering data from measurement_information which also exist in tree_monitorings

In [34]:
type1_measurement = measurement_informations[(measurement_informations['monitoring_id'].isin(tree_monitorings['id'])== True) & measurement_informations['monitoring_type'] == 1]

In [35]:
tree_monitorings['tree_cond'] = tree_monitorings['tree_cond'].fillna("0")

In [36]:
taxonomy_dict = taxonomies.set_index('id')['wood_density'].to_dict() #Set wood identity for referral from taxonomy_id

biomass_index = [] #empty list for storing valid biomass ids
result = [] #empty list for storing results
for index_used in type1_measurement['monitoring_id']:
    tree_dbh     = tree_monitorings.loc[tree_monitorings['id'] == index_used,'tree_dbh'].values[0]
    tree_height  = tree_monitorings.loc[tree_monitorings['id'] == index_used,'tree_height'].values[0]
    biomass_id   = tree_monitorings.loc[tree_monitorings['id'] == index_used,'biomass_formulas_id'].values[0]
    tax_id       = tree_monitorings.loc[tree_monitorings['id'] == index_used,'taxonomy_id'].values[0]
    wood_density = taxonomy_dict.get(tax_id)
    if (biomass_id != biomass_id or biomass_id == None):
        continue
    else:
        biomass_form = biomass_formulas.loc[biomass_formulas['id'] == biomass_id, 'formula_python']
        for biomass in biomass_form:
            biomass_result = round(eval(biomass), 5)
            result.append(biomass_result)
            biomass_index.append(index_used)
#            print("dbh:", tree_dbh,
#                  "height:", tree_height,
##                  "index:", index_used, 
 #                 "formula", biomass_form,
 #                 "result:", biomass_result)

### Creating Biomass Table/Dataframe

In [37]:
tree_biomasses_python = type1_measurement[type1_measurement['monitoring_id'].isin(biomass_index) == True]

In [38]:
tree_biomasses_python = tree_biomasses_python.drop(['id','timenow','start', 'end','username','notes','_xform_id','_xform_id_string','monitoring_order','month_monitoring','phase'], axis = 1)


In [39]:
tree_biomasses_python['result'] = result

In [40]:
tree_biomasses_python['taxonomy_id'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'taxonomy_id'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [41]:
tree_biomasses_python['tree_species'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'tree_species'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [42]:
tree_biomasses_python['tree_id'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'tree_id'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [43]:
tree_biomasses_python['submission_time'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'submission_time'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [44]:
tree_biomasses_python['tree_cond'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'tree_cond'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [45]:
tree_biomasses_python['updated_at'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')


In [46]:
carbon = []
for res in result:
    if (res != res or res == None):
        continue
    else:
        carbon.append(round((res * 0.5 * 3.67),5)) # biomass * 0.5 * 3.67
tree_biomasses_python['carbon'] = carbon

In [47]:
tree_biomasses_python['id'] = range(1,len(tree_biomasses_python)+1)

In [48]:
tree_biomasses_python = tree_biomasses_python[['id','monitoring_id','tree_id','tree_species','taxonomy_id','result','carbon','date_monitoring','submission_time','created_at','updated_at']]

## Sampling Biomass Calculation

In [49]:
sampling_dbh_exist = tree_samplings['tree_dbh'].notna() # check which is not NA
sampling_height_exist = tree_samplings['tree_height'].notna() # check which is not NA

sampling_trees_wood_density = [taxonomies.loc[taxonomies['id'] == tax_id, 'wood_density'].values[0] 
                if tax_id in taxonomies['id'].values else None
                for tax_id in tree_samplings['taxonomy_id']]

In [50]:
for index, (dbh, height, wood) in enumerate(zip(sampling_dbh_exist, sampling_height_exist, sampling_trees_wood_density)):
    if dbh == 1 and height == 1:
        if (wood != wood or wood == None):
            tree_samplings.loc[index, 'biomass_formulas_id'] = 7
        else:
            tree_samplings.loc[index, 'biomass_formulas_id'] = 3
    elif (dbh == 1 and height == 0):
        tree_samplings.loc[index, 'biomass_formulas_id'] = 5
    elif (dbh == 0 and height == 1):
        tree_samplings.loc[index, 'biomass_formulas_id'] = 4
    else:
        tree_samplings.loc[index, 'biomass_formulas_id'] = None

In [51]:
tree_samplings[['tree_height','tree_dbh']] = tree_samplings[['tree_height','tree_dbh']].fillna('0')

In [52]:
taxonomy_dict = taxonomies.set_index('id')['wood_density'].to_dict() #Set wood identity for referral from taxonomy_id

sampling_results = [] #empty list for storing results
for index, row in tree_samplings.iterrows():
    tree_dbh     = float(row['tree_dbh'])
    tree_height  = float(row['tree_height'])
    biomass_id   = row['biomass_formulas_id']
    tax_id       = row['taxonomy_id']
    wood_density = taxonomy_dict.get(tax_id)
    if (biomass_id != biomass_id or biomass_id == None):
        sampling_results.append(None)
    else:
        biomass_form = biomass_formulas.loc[biomass_formulas['id'] == biomass_id, 'formula_python']
        for biomass in biomass_form:
            biomass_result = eval(biomass)
            sampling_results.append(biomass_result)

### Creating Sampling Biomass Table/Dataframe

In [53]:
tree_samplings['month_monitoring'] = pd.to_datetime(tree_samplings['date_monitoring'], format="%Y-%M-%d").dt.strftime("%b-%y")

In [54]:
sample_tree_biomasses_python = tree_samplings[['plot_information_id', 'subplot_id', 'tree_species', 'taxonomy_id', 'month_monitoring',
       'created_at', 'updated_at', 'tree_id', 'tree_height', 'tree_cond']]

In [55]:
sample_tree_biomasses_python['result'] = sampling_results

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_tree_biomasses_python['result'] = sampling_results


In [56]:
tree_biomasses_python['updated_at'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')


In [57]:
carbon = []
for res in sampling_results:
    if (res != res or res == None):
        carbon.append(None)
    else:
        carbon.append(res * 0.5 * 3.67) # biomass * 0.5 * 3.67
sample_tree_biomasses_python['carbon'] = carbon

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_tree_biomasses_python['carbon'] = carbon


In [58]:
sample_tree_biomasses_python['id'] = range(1,len(sample_tree_biomasses_python)+1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_tree_biomasses_python['id'] = range(1,len(sample_tree_biomasses_python)+1)


In [68]:
sampling_biomasses_python = sample_tree_biomasses_python[['id', 'tree_id', 'plot_information_id', 'subplot_id', 'tree_species','taxonomy_id', 'tree_height', 'result', 'carbon','month_monitoring','created_at','updated_at']]

# Comparisons

In [60]:
tree_biomasses_python['tree_cond'] = [tree_monitorings.loc[tree_monitorings['id'] == mon_id, 'tree_cond'].values[0] 
                if mon_id in tree_monitorings['id'].values else None
                for mon_id in tree_biomasses_python['monitoring_id']]

In [61]:
census_biomasses_comparison = tree_biomasses_python.merge(tree_biomasses, on = ['monitoring_id','tree_id','taxonomy_id','tree_species','submission_time'], how = 'inner')

In [62]:
census_biomasses_comparison['result_y'] = census_biomasses_comparison['result_y'].astype('float')

In [63]:
census_biomasses_comparison.tree_cond.value_counts(dropna= False)

tree_cond
0    1824
2      19
Name: count, dtype: int64

In [64]:
census_biomasses_comparison[['result_x','result_y']]

Unnamed: 0,result_x,result_y
0,0.00000,
1,0.00000,
2,0.03073,0.03073
3,0.03073,0.03073
4,0.05722,0.05722
...,...,...
1838,0.05580,0.05580
1839,0.00032,0.00032
1840,0.05166,0.05166
1841,0.08850,0.08850


In [65]:
census_biomasses_comparison[census_biomasses_comparison['result_x'] != census_biomasses_comparison['result_y']][['tree_id','result_x','result_y','tree_cond']].head(50)

Unnamed: 0,tree_id,result_x,result_y,tree_cond
0,AA01T0180,0.0,,0
1,AA01T0180,0.0,,0
106,AA01T0078,0.0,,0
107,AA01T0078,0.0,,0
116,AA01T0144,0.0,,0
117,AA01T0144,0.0,,0
118,AA01T0145,0.0,,0
119,AA01T0145,0.0,,0
190,AA01T0079,0.0,,0
191,AA01T0079,0.0,,0


# Upload dataframe

In [None]:
# Create engine format 'method:// username:password @ server : port / database name'
engine = create_engine('postgresql://postgres:forest25@localhost:5432/testing')


In [None]:
tree_biomasses_python.to_sql('tree_biomasses_python', engine)

585

In [69]:
sampling_biomasses_python.to_sql('sampling_biomasses_python', engine)

847