In [None]:
# Importing IMP libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score

# Load data
file_path = "/content/Q3_F4F.csv"
df = pd.read_csv(file_path)

df.tail(10)


Unnamed: 0,Tree species,TreeHeight_foot,TreeCrown_foot,TreeDBH_cm
190,Orange,9,6.0,9.872611
191,Orange,10,6.0,11.783439
192,Orange,9,5.0,8.917197
193,Orange,7,4.0,8.917197
194,Orange,9,6.0,9.55414
195,Orange,10,7.0,10.509554
196,Orange,10,7.0,8.917197
197,Orange,9,6.0,10.509554
198,Orange,10,6.0,13.694268
199,Orange,7,4.0,8.917197


In [None]:
df.isnull().sum()

Unnamed: 0,0
Tree species,0
TreeHeight_foot,0
TreeCrown_foot,0
TreeDBH_cm,0


In [None]:
df.describe()

Unnamed: 0,TreeHeight_foot,TreeCrown_foot,TreeDBH_cm
count,200.0,200.0,200.0
mean,9.095,5.94,10.534365
std,6.832966,2.126384,4.489946
min,3.0,1.0,2.547771
25%,7.0,4.375,7.002817
50%,9.0,6.0,10.191083
75%,10.0,7.0,13.136943
max,99.0,18.5,31.847134


In [None]:
# Separate features and target
X = df.drop("TreeDBH_cm", axis=1)
y = df["TreeDBH_cm"]

# Define categorical and numerical columns
categorical_cols = ["Tree species"]
numerical_cols = ["TreeHeight_foot", "TreeCrown_foot"]

# Preprocessing pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_cols),
        ("cat", OneHotEncoder(), categorical_cols),
    ]
)

# Model pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(random_state=42))
])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# Train the model
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)

# Using Regression Matrix for evaluation perpose
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"RMSE: {rmse}, R²: {r2} , MSE: {mse}")


# Combine test features, true target, and predictions
results_df = X_test.copy()
results_df["True_TreeDBH_cm"] = y_test.values
results_df["Predicted_TreeDBH_cm"] = y_pred
results_df["Tree species"] = X_test["Tree species"]

# Calculate RMSE for each category of "Tree species"
category_metrics = results_df.groupby("Tree species").apply(
    lambda group: pd.Series({
        "RMSE": np.sqrt(mean_squared_error(group["True_TreeDBH_cm"], group["Predicted_TreeDBH_cm"]))

    })
)

# Display RMSE and R^2 for each category
print("Metrics for each Tree species category:")
print(category_metrics)

# Print predicted TreeDBH_cm values for each Tree species
print("\nPredicted TreeDBH_cm values for each Tree species:")
predictions_by_species = results_df.groupby("Tree species")["Predicted_TreeDBH_cm"].mean()
print(predictions_by_species)


RMSE: 2.0328402171514464, R²: 0.669280740512957 , MSE: 4.13243934846834
Metrics for each Tree species category:
                   RMSE
Tree species           
Custard apple  1.525566
Lemon          2.224519
Mango          3.049725
Orange         1.113125

Predicted TreeDBH_cm values for each Tree species:
Tree species
Custard apple    11.589472
Lemon             6.999547
Mango            16.669731
Orange           10.412526
Name: Predicted_TreeDBH_cm, dtype: float64


  category_metrics = results_df.groupby("Tree species").apply(


In [None]:
#Creating function for prediction wrt species.
def predict_tree_dbh(Tree_species: str, tree_height_foot: float, tree_crown_foot: float) -> float:

# Create a DataFrame for the input
    input_data = pd.DataFrame({
        "Tree species": [Tree_species],
        "TreeHeight_foot": [tree_height_foot],
        "TreeCrown_foot": [tree_crown_foot]
    })

    # Predict the TreeDBH_cm value
    prediction = model.predict(input_data)[0]

    return prediction

# Taking input for predictions
species = input("Write Name of  actual species:")  # Replace with an actual species present in your dataset
height = float(input("Write Height of Tree in foot:"))   # Example height in feet
crown = float(input("Write Crown of Tree foot:")) # Example crown in feet

predicted_dbh = predict_tree_dbh(species, height, crown)
print(f"Predicted Tree diameter at breast height_cm for {species} with height {height} feet and crown {crown} feet: {predicted_dbh:.2f} cms")

Write Name of  actual species:Orange
Write Height of Tree in foot:5
Write Crown of Tree foot:3
Predicted Tree diameter at breast height_cm for Orange with height 5.0 feet and crown 3.0 feet: 5.08 cms
