In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the data from csv file here: "S:\Zack\Imagery\Hazelnut\WPP_Farm_LiDAR\ArcGIS_Pro_WPP_Farm_LiDAR\WPP_BlockB_Hazelnut_CrossDim_HW.csv"
data = pd.read_csv("S:\Zack\Imagery\Hazelnut\WPP_Farm_LiDAR\ArcGIS_Pro_WPP_Farm_LiDAR\WPP_BlockB_Hazelnut_SfM_VertexCoords.csv")

In [2]:
# list columns in data
print(data.columns)

Index(['treeID', 'area', 'diameter', 'ORIG_FID', 'volume', 'xmin', 'xmax',
       'width', 'y_xmax', 'y_xmin', 'POINT_Y', 'ymin', 'ymax', 'height',
       'x_ymax', 'x_ymin', 'POINT_X'],
      dtype='object')


In [3]:
new_data = data.copy()

# only keep the first row of each unique treeID
new_data = new_data.drop_duplicates(subset='treeID')

# drop POINT_X and POINT_Y columns
new_data = new_data.drop(columns=['POINT_X', 'POINT_Y'])

# print first 5 rows
print(new_data.head())


       treeID       area  diameter  ORIG_FID     volume         xmin  \
0          99   7.422937  3.403003         0  10.574112  396329.1404   
2409      100   7.522573  0.068171         1  10.169464  396338.0000   
5240      102   6.049380  0.031915         2   7.830704  396343.1434   
7843      104  12.339207  4.096281         3  15.086003  396348.6164   
12773     105   4.883722  1.741400         4   6.130027  396353.3100   

              xmax   width  y_xmax  y_xmin         ymin         ymax  height  \
0      396332.2313  3.0909       0       0  4437268.275  4437271.515   3.240   
2409   396341.8550  3.8550       0       0  4437268.195  4437271.595   3.400   
5240   396346.3851  3.2417       0       0  4437268.280  4437271.070   2.790   
7843   396353.5800  4.9636       0       0  4437267.770  4437271.850   4.080   
12773  396355.9338  2.6238       0       0  4437268.395  4437271.400   3.005   

       x_ymax  x_ymin  
0         NaN     NaN  
2409      NaN     NaN  
5240      NaN 

In [8]:
# calculate euclidean width, length, and diameter for each tree
# new_data.loc[:, 'euclidean_width'] = np.nan
# new_data.loc[:, 'euclidean_length'] = np.nan
# new_data.loc[:, 'diameter'] = np.nan

for treeID in new_data['treeID']:
    tree = data[data['treeID'] == treeID].copy()
    tree.loc[:, 'x_ymin'] = min(tree.loc[tree['POINT_Y'] == tree['POINT_Y'].min(), 'POINT_X'])
    tree.loc[:, 'x_ymax'] = max(tree.loc[tree['POINT_Y'] == tree['POINT_Y'].max(), 'POINT_X'])
    tree.loc[:, 'y_xmin'] = min(tree.loc[tree['POINT_X'] == tree['POINT_X'].min(), 'POINT_Y'])
    tree.loc[:, 'y_xmax'] = max(tree.loc[tree['POINT_X'] == tree['POINT_X'].max(), 'POINT_Y'])
    tree.loc[:, 'euclidean_width'] = np.sqrt(((tree['xmax'] - tree['xmin'])**2 + (tree['y_xmax'] - tree['y_xmin'])**2))
    tree.loc[:, 'euclidean_length'] = np.sqrt(((tree['ymax'] - tree['ymin'])**2 + (tree['x_ymax'] - tree['x_ymin'])**2))

    # # find diameter (this takes a while to execute)
    # max_distance = 0
    # for i in range(len(tree)):
    #     for j in range(i+1, len(tree)):
    #         # diameter is longest euclidean distance between any two points
    #         distance = np.sqrt((tree.iloc[i]['POINT_X'] - tree.iloc[j]['POINT_X'])**2 + (tree.iloc[i]['POINT_Y'] - tree.iloc[j]['POINT_Y'])**2)
    #         if distance > max_distance:
    #             max_distance = distance

    # new_data.loc[new_data['treeID'] == treeID, 'diameter'] = max_distance
    new_data.loc[new_data['treeID'] == treeID, 'euclidean_width'] = tree['euclidean_width'].values[0]
    new_data.loc[new_data['treeID'] == treeID, 'euclidean_length'] = tree['euclidean_length'].values[0]
    new_data.loc[new_data['treeID'] == treeID, 'x_ymin'] = float(tree['x_ymin'].values[0])
    new_data.loc[new_data['treeID'] == treeID, 'x_ymax'] = float(tree['x_ymax'].values[0])
    new_data.loc[new_data['treeID'] == treeID, 'y_xmin'] = float(tree['y_xmin'].values[0])
    new_data.loc[new_data['treeID'] == treeID, 'y_xmax'] = float(tree['y_xmax'].values[0])

In [9]:
print(new_data.head())

       treeID       area  diameter  ORIG_FID     volume         xmin  \
0          99   7.422937  3.557081         0  10.574112  396329.1404   
2409      100   7.522573  4.134737         1  10.169464  396338.0000   
5240      102   6.049380  3.598017         2   7.830704  396343.1434   
7843      104  12.339207  4.977972         3  15.086003  396348.6164   
12773     105   4.883722  3.367555         4   6.130027  396353.3100   

              xmax   width       y_xmax       y_xmin         ymin  \
0      396332.2313  3.0909  4437270.643  4437269.153  4437268.275   
2409   396341.8550  3.8550  4437268.795  4437270.205  4437268.195   
5240   396346.3851  3.2417  4437269.586  4437270.633  4437268.280   
7843   396353.5800  4.9636  4437269.035  4437269.390  4437267.770   
12773  396355.9338  2.6238  4437268.932  4437269.580  4437268.395   

              ymax  height      x_ymax      x_ymin  euclidean_width  \
0      4437271.515   3.240  396330.555  396330.580         3.431292   
2409   443

In [10]:
# save "S:\Zack\Imagery\Hazelnut\WPP_Farm_LiDAR\ArcGIS_Pro_WPP_Farm_LiDAR\WPP_BlockB_Hazelnut_Euclidean_HW.csv"
new_data.to_csv("S:\Zack\Imagery\Hazelnut\WPP_Farm_LiDAR\ArcGIS_Pro_WPP_Farm_LiDAR\WPP_BlockB_Hazelnut_SfM_Euclidean_HW.csv", index=False)