In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)

In [None]:
mt_tree = pd.read_csv("MT_TREE.csv")
fia_trees = pd.read_csv("fia_treenames.csv")
# all_tree = pd.read_csv("ENTIRE_TREE.csv")

In [None]:
fia_trees[fia_trees["FIA Code"] == 202]

In [None]:
list(mt_tree.columns)

In [None]:
mt_tree.SPCD.value_counts()

In [None]:
mt_tree_202 = mt_tree[mt_tree["SPCD"] == 202]

In [None]:
# drop rows where volcfnet is nan
# mt_tree_202 = mt_tree_202[mt_tree_202["VOLCFNET"].notna()]
mt_tree = mt_tree[mt_tree["VOLCFNET"].notna()]
# all_tree = all_tree[all_tree["VOLCFNET"].notna()]
# all_tree = all_tree[all_tree["VOLCFGRS"].notna()]
# all_tree = all_tree[all_tree["HT"].notna()]

In [None]:
mt_tree.CARBON_AG.isna().sum()

In [None]:
mt_tree.isna().sum()

In [None]:
# plot DIAH^2*H vs VOLCFNET
import matplotlib.pyplot as plt

mt_tree_202["D2H"] = (mt_tree_202["DIA"]**2)

plt.scatter(mt_tree_202["VOLCFNET"], mt_tree_202["DIA"], s=1)
plt.show()

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

def train_random_forest(df: pd.DataFrame, feature_cols: list, target_col: str):
    """
    Trains a Random Forest Regressor model, evaluates it, and returns the trained model.

    Args:
        df (pd.DataFrame): The input dataframe containing the data.
        feature_cols (list): A list of column names to be used as features.
        target_col (str): The name of the target column.

    Returns:
        A trained scikit-learn pipeline object.
    """
    print("--- Starting Model Training ---")

    # 1. Define Features (X) and Target (y)
    X = df[feature_cols]
    y = df[target_col]

    # 2. Identify categorical and numerical features
    categorical_features = X.select_dtypes(include=['object', 'category']).columns
    numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

    print(f"Identified {len(numerical_features)} numerical features: {list(numerical_features)}")
    print(f"Identified {len(categorical_features)} categorical features: {list(categorical_features)}")

    # 3. Create a preprocessor object
    # This step handles categorical features by one-hot encoding them.
    # 'passthrough' means numerical features will be left as they are.
    # 'handle_unknown='ignore'' prevents errors if a category appears in test data
    # but not in training data.
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', 'passthrough', numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ])

    # 4. Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.1, random_state=42
    )
    print(f"\nData split complete. Training set size: {len(X_train)}, Test set size: {len(X_test)}")


    # 5. Define the model
    # We use a random_state for reproducibility
    rf_model = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)

    # 6. Create the full pipeline
    # This pipeline first preprocesses the data then feeds it to the model.
    model_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                     ('regressor', rf_model)])

    # 7. Train the model
    print("\nTraining the model...")
    model_pipeline.fit(X_train, y_train)
    print("Training complete.")

    # 8. Make predictions on the test set
    y_pred = model_pipeline.predict(X_test)

    # 9. Evaluate the model
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("\n--- Model Evaluation ---")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"R-squared (R²): {r2:.4f}")
    print("------------------------\n")

    return model_pipeline

# --- Example Usage ---
if __name__ == "__main__":

    # Define feature columns and the target column
    features = ['HT', 'DIA', 'SPCD']
    target = 'VOLCFGRS'

    # Train the model using the function
    trained_model = train_random_forest(df=mt_tree, feature_cols=features, target_col=target)

--- Starting Model Training ---
Identified 3 numerical features: ['HT', 'DIA', 'SPCD']
Identified 0 categorical features: []

Data split complete. Training set size: 286595, Test set size: 31844

Training the model...
Training complete.

--- Model Evaluation ---
Mean Absolute Error (MAE): 0.4683
Mean Squared Error (MSE): 3.6448
R-squared (R²): 0.9942
------------------------



In [None]:
new_tree_data = pd.DataFrame({
    'HT': [88],
    'DIA': [15.8],
    'SPCD': [202],
    'PLOT': [0]
})

prediction = trained_model.predict(new_tree_data)
print(f"Predicted value for the new tree: {prediction[0]:,.2f}")

Predicted value for the new tree: 9.48


In [16]:
mt_tree[(mt_tree['SPCD'] == 202) & (mt_tree.DIA == 7.3) & (mt_tree.HT == 34)]#[["DIA", "HT", "SPCD", "VOLCFGRS", "VOLCFNET"]]

Unnamed: 0,CN,PLT_CN,PREV_TRE_CN,INVYR,STATECD,UNITCD,COUNTYCD,PLOT,SUBP,TREE,CONDID,PREVCOND,STATUSCD,SPCD,SPGRPCD,DIA,DIAHTCD,HT,HTCD,ACTUALHT,TREECLCD,CR,CCLCD,TREEGRCD,AGENTCD,CULL,DAMLOC1,DAMTYP1,DAMSEV1,DAMLOC2,DAMTYP2,DAMSEV2,DECAYCD,STOCKING,WDLDSTEM,VOLCFNET,VOLCFGRS,VOLCSNET,VOLCSGRS,VOLBFNET,VOLBFGRS,VOLCFSND,DIACHECK,MORTYR,SALVCD,UNCRCD,CPOSCD,CLIGHTCD,CVIGORCD,CDENCD,CDIEBKCD,TRANSCD,TREEHISTCD,BHAGE,TOTAGE,CULLDEAD,CULLFORM,CULLMSTOP,CULLBF,CULLCF,BFSND,CFSND,SAWHT,BOLEHT,FORMCL,HTCALC,HRDWD_CLUMP_CD,SITREE,CREATED_DATE,MODIFIED_DATE,MORTCD,HTDMP,ROUGHCULL,MIST_CL_CD,CULL_FLD,RECONCILECD,PREVDIA,P2A_GRM_FLG,TREECLCD_NERS,TREECLCD_SRS,TREECLCD_NCRS,TREECLCD_RMRS,STANDING_DEAD_CD,PREV_STATUS_CD,PREV_WDLDSTEM,TPA_UNADJ,DRYBIO_BOLE,DRYBIO_STUMP,DRYBIO_BG,CARBON_AG,CARBON_BG,CYCLE,SUBCYCLE,BORED_CD_PNWRS,DAMLOC1_PNWRS,DAMLOC2_PNWRS,DIACHECK_PNWRS,DMG_AGENT1_CD_PNWRS,DMG_AGENT2_CD_PNWRS,DMG_AGENT3_CD_PNWRS,MIST_CL_CD_PNWRS,SEVERITY1_CD_PNWRS,SEVERITY1A_CD_PNWRS,SEVERITY1B_CD_PNWRS,SEVERITY2_CD_PNWRS,SEVERITY2A_CD_PNWRS,SEVERITY2B_CD_PNWRS,SEVERITY3_CD_PNWRS,UNKNOWN_DAMTYP1_PNWRS,UNKNOWN_DAMTYP2_PNWRS,PREV_PNTN_SRS,DISEASE_SRS,DIEBACK_SEVERITY_SRS,DAMAGE_AGENT_CD1,DAMAGE_AGENT_CD2,DAMAGE_AGENT_CD3,CENTROID_DIA,CENTROID_DIA_HT,CENTROID_DIA_HT_ACTUAL,UPPER_DIA,UPPER_DIA_HT,VOLCSSND,DRYBIO_SAWLOG,DAMAGE_AGENT_CD1_SRS,DAMAGE_AGENT_CD2_SRS,DAMAGE_AGENT_CD3_SRS,DRYBIO_AG,ACTUALHT_CALC,ACTUALHT_CALC_CD,CULL_BF_ROTTEN,CULL_BF_ROTTEN_CD,CULL_BF_ROUGH,CULL_BF_ROUGH_CD,PREVDIA_FLD,TREECLCD_31_NCRS,TREE_GRADE_NCRS,BOUGHS_AVAILABLE_NCRS,BOUGHS_HRVST_NCRS,TREECLCD_31_NERS,AGENTCD_NERS,BFSNDCD_NERS,AGECHKCD_RMRS,PREV_AGECHKCD_RMRS,PREV_BHAGE_RMRS,PREV_TOTAGE_RMRS,PREV_TREECLCD_RMRS,RADAGECD_RMRS,RADGRW_RMRS,VOLBSGRS,VOLBSNET,SAPLING_FUSIFORM_SRS,EPIPHYTE_PNWRS,ROOT_HT_PNWRS,CAVITY_USE_PNWRS,CORE_LENGTH_PNWRS,CULTURALLY_KILLED_PNWRS,DIA_EST_PNWRS,GST_PNWRS,INC10YR_PNWRS,INC5YRHT_PNWRS,INC5YR_PNWRS,RING_COUNT_INNER_2INCHES_PNWRS,RING_COUNT_PNWRS,SNAG_DIS_CD_PNWRS,CONEPRESCD1,CONEPRESCD2,CONEPRESCD3,MASTCD,VOLTSGRS,VOLTSGRS_BARK,VOLTSSND,VOLTSSND_BARK,VOLCFGRS_STUMP,VOLCFGRS_STUMP_BARK,VOLCFSND_STUMP,VOLCFSND_STUMP_BARK,VOLCFGRS_BARK,VOLCFGRS_TOP,VOLCFGRS_TOP_BARK,VOLCFSND_BARK,VOLCFSND_TOP,VOLCFSND_TOP_BARK,VOLCFNET_BARK,VOLCSGRS_BARK,VOLCSSND_BARK,VOLCSNET_BARK,DRYBIO_STEM,DRYBIO_STEM_BARK,DRYBIO_STUMP_BARK,DRYBIO_BOLE_BARK,DRYBIO_BRANCH,DRYBIO_FOLIAGE,DRYBIO_SAWLOG_BARK,PREV_ACTUALHT_FLD,PREV_HT_FLD,UTILCLCD,SPCD_ID_CD,ABNORMAL_TERMINATION
63034,3024024010690,3024010010690,,1989,30,4,49,16,1,3,1,,1,202.0,10,7.3,1,34.0,1.0,,2.0,55.0,3.0,,79.0,,,,,,,,,7.3417,,3.255261,3.260207,0.0,0.0,0.0,0.0,3.255261,,,,,,,,,,,,,,,,,,,,,,,,,,,2004-05-27 00:00:00,2025-02-19 13:50:00,,,,,,,,,,,,,,,,27.524002,91.407729,6.252964,31.402807,69.624715,16.203849,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,134.931618,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.057724,15.462125,,,,,,,,
75274,2946663010690,2946653010690,,1989,30,5,1,87,2,1,1,,1,202.0,10,7.3,1,34.0,1.0,,2.0,35.0,3.0,,0.0,,,,,,,,,6.4,,3.255261,3.260207,0.0,0.0,0.0,0.0,3.255261,,,,,,,,,,,,,,,,,,,,,,,,,,,2004-05-27 00:00:00,2025-02-19 13:49:19,,,,,,,,,,,,,,,,27.524002,91.407729,6.252964,31.402807,69.624715,16.203849,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,134.931618,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.057724,15.462125,,,,,,,,
91930,2328544010690,2328527010690,,2003,30,1,53,83229,2,1,2,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,75.0,4.0,,,0.0,,,,,,,,1.9766,,3.228051,3.228051,,,,,3.228051,0.0,,,,,,,,,,,,,0.0,,0.0,,,,,,,,,,,2011-08-23 18:05:07,2025-02-19 13:50:10,,,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,1,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,0.0,,,,,1.0,11.0,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
119513,4761960010690,4761933010690,,2004,30,5,31,86293,3,3,1,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,35.0,3.0,,,0.0,,,,,,,,0.9883,,3.228051,3.228051,,,,,3.228051,0.0,,,,,,,,,,,,,0.0,,0.0,,,,,,,,,,,2011-08-23 18:11:32,2025-02-19 13:49:42,,,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,2,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
123065,5438938010690,5438923010690,,2005,30,1,29,94010,2,2,1,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,75.0,3.0,,,0.0,,,,,,,,0.9883,,3.228051,3.228051,,,,,3.228051,0.0,,,92.0,4.0,5.0,,35.0,0.0,15.0,,56.0,,0.0,,0.0,,,,,,,,,,,2011-08-23 18:18:34,2025-02-19 13:49:39,,,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,3,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,0.0,,,,,2.0,7.0,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
132133,5443830010690,5443791010690,,2005,30,4,43,80912,4,8,1,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,45.0,3.0,,,0.0,,,,,,,,1.2354,,3.228051,3.228051,,,,,3.228051,0.0,,,,,,,,,,,,,0.0,,0.0,,,,,,,,,,,2011-08-23 18:17:38,2025-02-19 13:49:50,,,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,3,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
168796,31434540010690,31434518010690,,2008,30,1,29,82685,4,2,1,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,50.0,4.0,,,0.0,,,,,,,,0.4942,,3.228051,3.228051,,,,,3.228051,0.0,,,85.0,,,,,,,,,,0.0,,0.0,,,,,,,,,,,2011-08-25 18:55:37,2025-02-19 13:49:39,,,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,6,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,0.0,,,,,1.0,7.0,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
177091,31430656010690,31430601010690,,2008,30,4,13,81274,4,10,1,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,40.0,3.0,,,0.0,,,,,,,,0.9883,,3.228051,3.228051,,,,,3.228051,0.0,,,50.0,,,,,,,,,,0.0,,0.0,,,,,,,,,,,2011-08-25 18:56:24,2025-02-19 13:49:25,,,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,6,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
194033,39605973010690,35376429010690,,2009,30,4,49,88487,4,2,1,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,28.0,3.0,,,0.0,,,,,,,,0.9883,,3.228051,3.228051,,,,,3.228051,0.0,,,40.0,,,,,,,,,,0.0,,0.0,,,,,,,,,,,2011-08-25 19:01:16,2025-02-19 13:50:00,,,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,7,,,,,,,,,,,,,,,,,,,,,26000.0,0.0,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
221592,49315496020004,40395778010690,,2011,30,3,61,82367,1,3,1,,1,202.0,10,7.3,1,34.0,1.0,34.0,2.0,80.0,3.0,,,0.0,,,,,,,,1.3178,,3.228051,3.228051,,,,,3.228051,1.0,,,90.0,,,,,,,,,,0.0,,0.0,,,,,,,,,,,2012-09-11 17:00:26,2025-02-19 13:50:19,,5.0,,0.0,0.0,,,,,,,1.0,,,,6.018046,76.786552,7.524123,31.140321,82.083677,16.068405,2,9,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,159.076893,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,3.889611,1.445995,3.889611,1.445995,0.316309,0.11759,0.316309,0.11759,1.200055,0.345251,0.12835,1.200055,0.345251,0.12835,1.200055,,,,92.52325,6.138179,0.499165,5.094174,60.415464,31.399463,,,,,,
