# Tree Growth and Yield Analysis

### Import all the necessary libraries

In [35]:
import numpy as np # this for specific mathematical functions such pi, random, etc.
import pandas as pd  # this for read data and to create dataframes
import matplotlib.pyplot as plt # this for visualization of data

import pyodbc # this is to connect to external database (eg. SQL, MS Access, etc)

In [40]:
# LOAD THE DATASET
def connect_to_database(conn, db_driver, db_path):
    conn = conn.connect(
        rf"DRIVER={db_driver};"
        rf"DBQ={db_path};"
    )
    return conn, conn.cursor()

file_path = r"C:\Users\GILBERT FG\Desktop\Readings\PSP_database.accdb"
ms_access_driver = r"Microsoft Access Driver (*.mdb, *.accdb)"

conn, db = connect_to_database(pyodbc, ms_access_driver, file_path)

# list all tables
for table in db.tables(tableType="TABLE"):
    print(table.table_name)

Block register
Plot monitoring history
Plot-block relation
PSPs
Slope correction factors
Thinning history
TreeData


### Query the database to select the dataset for teak in Tain II plantation

In [62]:
query = "SELECT * FROM TreeData WHERE AreaType = 'Teak' AND Plantations = 'Tain II'"
teak_df = pd.read_sql(query, conn)
teak_df.head()

  teak_df = pd.read_sql(query, conn)


Unnamed: 0,Plantations,AreaType,Monitoring year,Monitoring month,Monitoring day,PLOT,TREE NR,Tree SPECIES,Species scientific name,H (m),DBH (cm),Merchantable height (m),REMARKS,Incorrect DBH,Incorrect H,Incorrect H / DBH,Exclude,Justification for exclusion
0,Tain II,Teak,2018.0,1.0,13,1,71.0,Teak,Tectona grandis,7.25,12.0,,Fire scars,False,False,,False,
1,Tain II,Teak,2018.0,1.0,13,1,76.0,Teak,Tectona grandis,8.25,12.0,,Fire scars,False,False,,False,
2,Tain II,Teak,2018.0,1.0,13,1,68.0,Teak,Tectona grandis,7.0,11.8,,Fire scars,False,False,,False,
3,Tain II,Teak,2018.0,1.0,13,1,53.0,Teak,Tectona grandis,7.25,11.5,,Fire scars,False,False,,False,
4,Tain II,Teak,2018.0,1.0,13,1,58.0,Teak,Tectona grandis,7.0,10.5,,Fire scars,False,False,,False,


In [63]:
teak_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191860 entries, 0 to 191859
Data columns (total 18 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   Plantations                  191860 non-null  object 
 1   AreaType                     191860 non-null  object 
 2   Monitoring year              191860 non-null  float64
 3   Monitoring month             191860 non-null  float64
 4   Monitoring day               191860 non-null  int64  
 5   PLOT                         191860 non-null  object 
 6   TREE NR                      191860 non-null  float64
 7   Tree SPECIES                 191860 non-null  object 
 8   Species scientific name      191851 non-null  object 
 9   H (m)                        191804 non-null  float64
 10  DBH (cm)                     191800 non-null  float64
 11  Merchantable height (m)      0 non-null       object 
 12  REMARKS                      70254 non-null   object 
 13 

In [64]:
teak_df.rename(columns={'PLOT': 'Plot no'}, inplace=True)

In [75]:
teak_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191860 entries, 0 to 191859
Data columns (total 19 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   Plantations                  191860 non-null  object 
 1   AreaType                     191860 non-null  object 
 2   Monitoring year              191860 non-null  float64
 3   Monitoring month             191860 non-null  float64
 4   Monitoring day               191860 non-null  int64  
 5   Plot no                      191860 non-null  object 
 6   TREE NR                      191860 non-null  float64
 7   Tree SPECIES                 191860 non-null  object 
 8   Species scientific name      191851 non-null  object 
 9   H (m)                        191804 non-null  float64
 10  DBH (cm)                     191800 non-null  float64
 11  Merchantable height (m)      0 non-null       object 
 12  REMARKS                      70254 non-null   object 
 13 

In [76]:
teak_df_trim = teak_df[['Plantations', 'AreaType', 'Monitoring year', 'Plot no', 'H (m)', 'DBH (cm)']]
teak_df_trim.head()

Unnamed: 0,Plantations,AreaType,Monitoring year,Plot no,H (m),DBH (cm)
0,Tain II,Teak,2018.0,1,7.25,12.0
1,Tain II,Teak,2018.0,1,8.25,12.0
2,Tain II,Teak,2018.0,1,7.0,11.8
3,Tain II,Teak,2018.0,1,7.25,11.5
4,Tain II,Teak,2018.0,1,7.0,10.5


In [67]:
query = "SELECT * FROM [Plot-block relation] WHERE AreaType = 'Teak' AND Plantations = 'Tain II'"
plt_blk_df = pd.read_sql(query, conn)
plt_blk_df.head()

  plt_blk_df = pd.read_sql(query, conn)


Unnamed: 0,RelationID,Plantations,Plot no,AreaType,Deviating plant year,Block,Start date,End date
0,74,Tain II,1370,Teak,,PF80,2020-05-01,9999-12-31 00:00:00
1,76,Tain II,1372,Teak,,C17,2020-05-01,9999-12-31 00:00:00
2,77,Tain II,1366,Teak,,C17,2020-05-01,9999-12-31 00:00:00
3,78,Tain II,1363,Teak,,C17,2020-05-01,9999-12-31 00:00:00
4,79,Tain II,1374,Teak,,C17,2020-05-01,9999-12-31 00:00:00


In [68]:
plt_blk_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1111 entries, 0 to 1110
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   RelationID            1111 non-null   int64         
 1   Plantations           1111 non-null   object        
 2   Plot no               1111 non-null   object        
 3   AreaType              1111 non-null   object        
 4   Deviating plant year  7 non-null      float64       
 5   Block                 1111 non-null   object        
 6   Start date            1111 non-null   datetime64[ns]
 7   End date              1111 non-null   object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(5)
memory usage: 69.6+ KB


In [74]:
plt_blk_df_trim = plt_blk_df[['Plantations', 'Plot no', 'AreaType', 'Block']]
plt_blk_df_trim.head()

Unnamed: 0,Plantations,Plot no,AreaType,Block
0,Tain II,1370,Teak,PF80
1,Tain II,1372,Teak,C17
2,Tain II,1366,Teak,C17
3,Tain II,1363,Teak,C17
4,Tain II,1374,Teak,C17


In [72]:
query = "SELECT * FROM [Block register] WHERE Plantation = 'Tain II'"
block_register_df = pd.read_sql(query, conn)
block_register_df.head()

  block_register_df = pd.read_sql(query, conn)


Unnamed: 0,PLANTATION,BLOCK,STATUS,ROTATION,OWNERSHIP,RESERVE,YEAR PLANTED,BLOCK AREA (ha),PRODUCTIVE,UNPRODUCTIVE,TEAK,NATURAL INDIGENOUS,PLANTED INDIGENOUS,GMELINA,OPEN LAND,ROCK,REMARK
0,Tain II,A17,PLANTED,1,FORM GHANA,TAIN II,2023,61.95713,58.858151,,58.858151,,,,,3.098979,
1,Tain II,A19,PLANTED,1,FORM GHANA,TAIN II,2019,67.587663,64.984407,0.15,64.984407,,,,0.0,2.453256,
2,Tain II,A20,PLANTED,1,FORM GHANA,TAIN II,2019,70.331435,70.197867,0.0,70.197867,,,,0.0,0.133569,
3,Tain II,A21,PLANTED,1,FORM GHANA,TAIN II,2019,48.65717,47.743954,0.4,47.743954,,,,0.0,0.513216,
4,Tain II,A22,PLANTED,1,FORM GHANA,TAIN II,2019,50.624752,50.110498,0.0,50.110498,,,,0.0,0.514254,


In [73]:
block_register_df_trim = block_register_df[['PLANTATION', 'BLOCK', 'YEAR PLANTED', 'PRODUCTIVE']]
block_register_df_trim.head()

Unnamed: 0,PLANTATION,BLOCK,YEAR PLANTED,PRODUCTIVE
0,Tain II,A17,2023,58.858151
1,Tain II,A19,2019,64.984407
2,Tain II,A20,2019,70.197867
3,Tain II,A21,2019,47.743954
4,Tain II,A22,2019,50.110498


In [86]:
block_register_df_trim.rename(columns={'BLOCK': 'Block'}, inplace=True)
block_register_df_trim.rename(columns={'PLANTATION': 'Plantations'}, inplace=True)
block_register_df_trim.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  block_register_df_trim.rename(columns={'BLOCK': 'Block'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  block_register_df_trim.rename(columns={'PLANTATION': 'Plantations'}, inplace=True)


Unnamed: 0,Plantations,Block,YEAR PLANTED,PRODUCTIVE
0,Tain II,A17,2023,58.858151
1,Tain II,A19,2019,64.984407
2,Tain II,A20,2019,70.197867
3,Tain II,A21,2019,47.743954
4,Tain II,A22,2019,50.110498


In [82]:
teak_blk_merge = pd.merge(teak_df_trim, plt_blk_df_trim, on=['Plot no', 'Plantations', 'AreaType'], how='outer')

In [84]:
teak_blk_merge.head()

Unnamed: 0,Plantations,AreaType,Monitoring year,Plot no,H (m),DBH (cm),Block
0,Tain II,Teak,2018.0,1,7.25,12.0,B39
1,Tain II,Teak,2018.0,1,8.25,12.0,B39
2,Tain II,Teak,2018.0,1,7.0,11.8,B39
3,Tain II,Teak,2018.0,1,7.25,11.5,B39
4,Tain II,Teak,2018.0,1,7.0,10.5,B39


In [87]:
result_df = pd.merge(teak_blk_merge, block_register_df_trim, on=['Plantations', 'Block'], how='outer')
result_df.head()

Unnamed: 0,Plantations,AreaType,Monitoring year,Plot no,H (m),DBH (cm),Block,YEAR PLANTED,PRODUCTIVE
0,Tain II,,,,,,A17,2023.0,58.858151
1,Tain II,Teak,2023.0,1141.0,4.0,5.2,A19,2019.0,64.984407
2,Tain II,Teak,2023.0,1141.0,4.25,5.5,A19,2019.0,64.984407
3,Tain II,Teak,2023.0,1141.0,3.75,3.7,A19,2019.0,64.984407
4,Tain II,Teak,2023.0,1141.0,3.5,3.2,A19,2019.0,64.984407


In [89]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191903 entries, 0 to 191902
Data columns (total 9 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Plantations      191903 non-null  object 
 1   AreaType         191860 non-null  object 
 2   Monitoring year  191860 non-null  float64
 3   Plot no          191860 non-null  object 
 4   H (m)            191804 non-null  float64
 5   DBH (cm)         191800 non-null  float64
 6   Block            191646 non-null  object 
 7   YEAR PLANTED     191608 non-null  float64
 8   PRODUCTIVE       191608 non-null  float64
dtypes: float64(5), object(4)
memory usage: 13.2+ MB


In [93]:
result_df.dropna()

Unnamed: 0,Plantations,AreaType,Monitoring year,Plot no,H (m),DBH (cm),Block,YEAR PLANTED,PRODUCTIVE
1,Tain II,Teak,2023.0,1141,4.00,5.2,A19,2019.0,64.984407
2,Tain II,Teak,2023.0,1141,4.25,5.5,A19,2019.0,64.984407
3,Tain II,Teak,2023.0,1141,3.75,3.7,A19,2019.0,64.984407
4,Tain II,Teak,2023.0,1141,3.50,3.2,A19,2019.0,64.984407
5,Tain II,Teak,2023.0,1141,4.50,5.5,A19,2019.0,64.984407
...,...,...,...,...,...,...,...,...,...
191641,Tain II,Teak,2023.0,1370,4.00,5.5,PF80,2020.0,115.368242
191642,Tain II,Teak,2023.0,1370,4.25,5.8,PF80,2020.0,115.368242
191643,Tain II,Teak,2023.0,1370,2.00,2.0,PF80,2020.0,115.368242
191644,Tain II,Teak,2023.0,1370,5.25,6.7,PF80,2020.0,115.368242


In [94]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191903 entries, 0 to 191902
Data columns (total 9 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Plantations      191903 non-null  object 
 1   AreaType         191860 non-null  object 
 2   Monitoring year  191860 non-null  float64
 3   Plot no          191860 non-null  object 
 4   H (m)            191804 non-null  float64
 5   DBH (cm)         191800 non-null  float64
 6   Block            191646 non-null  object 
 7   YEAR PLANTED     191608 non-null  float64
 8   PRODUCTIVE       191608 non-null  float64
dtypes: float64(5), object(4)
memory usage: 13.2+ MB


In [95]:
result_df['Age (years)'] = result_df['Monitoring year'] - result_df['YEAR PLANTED']
result_df.head()

Unnamed: 0,Plantations,AreaType,Monitoring year,Plot no,H (m),DBH (cm),Block,YEAR PLANTED,PRODUCTIVE,Age (years)
0,Tain II,,,,,,A17,2023.0,58.858151,
1,Tain II,Teak,2023.0,1141.0,4.0,5.2,A19,2019.0,64.984407,4.0
2,Tain II,Teak,2023.0,1141.0,4.25,5.5,A19,2019.0,64.984407,4.0
3,Tain II,Teak,2023.0,1141.0,3.75,3.7,A19,2019.0,64.984407,4.0
4,Tain II,Teak,2023.0,1141.0,3.5,3.2,A19,2019.0,64.984407,4.0


In [96]:
result_df.dropna(subset=['Plot no'], inplace=True)

In [97]:
result_df.head()

Unnamed: 0,Plantations,AreaType,Monitoring year,Plot no,H (m),DBH (cm),Block,YEAR PLANTED,PRODUCTIVE,Age (years)
1,Tain II,Teak,2023.0,1141,4.0,5.2,A19,2019.0,64.984407,4.0
2,Tain II,Teak,2023.0,1141,4.25,5.5,A19,2019.0,64.984407,4.0
3,Tain II,Teak,2023.0,1141,3.75,3.7,A19,2019.0,64.984407,4.0
4,Tain II,Teak,2023.0,1141,3.5,3.2,A19,2019.0,64.984407,4.0
5,Tain II,Teak,2023.0,1141,4.5,5.5,A19,2019.0,64.984407,4.0


In [98]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 191860 entries, 1 to 191902
Data columns (total 10 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Plantations      191860 non-null  object 
 1   AreaType         191860 non-null  object 
 2   Monitoring year  191860 non-null  float64
 3   Plot no          191860 non-null  object 
 4   H (m)            191804 non-null  float64
 5   DBH (cm)         191800 non-null  float64
 6   Block            191603 non-null  object 
 7   YEAR PLANTED     191565 non-null  float64
 8   PRODUCTIVE       191565 non-null  float64
 9   Age (years)      191565 non-null  float64
dtypes: float64(6), object(4)
memory usage: 16.1+ MB


In [104]:
result_df['Age (years)'].dropna()

1         4.0
2         4.0
3         4.0
4         4.0
5         4.0
         ... 
191641    3.0
191642    3.0
191643    3.0
191644    3.0
191645    3.0
Name: Age (years), Length: 191565, dtype: float64

In [105]:
result_df.isnull()

Unnamed: 0,Plantations,AreaType,Monitoring year,Plot no,H (m),DBH (cm),Block,YEAR PLANTED,PRODUCTIVE,Age (years)
1,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...
191898,False,False,False,False,False,False,True,True,True,True
191899,False,False,False,False,False,False,True,True,True,True
191900,False,False,False,False,False,False,True,True,True,True
191901,False,False,False,False,False,False,True,True,True,True


#### Predicting future DBH, Height and Volume base on age using regression models

#### Chapman-Richards growth function

In [106]:
# Define the chapman-richards growth function
def chapman_richards(age, a, b, c):
    return a * (1 - np.exp(-b*age))**c

# Logistic growth function
def logistic(age, Hmax, k, t0):
    return Hmax / (1 + np.exp(-k * (age-t0)))


In [107]:
from scipy.optimize import curve_fit

In [108]:
# Fit model to DBH vs Age
proj_dbh, _ = curve_fit(chapman_richards, result_df['Age (years)'], result_df['DBH (cm)'], p0=[30, 0.05, 1.5])
result_df['Height_Pred'] = chapman_richards(result_df['Age (years)', *proj_dbh])

result_df.head()

ValueError: array must not contain infs or NaNs